Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2)  *	An async IO implementation for Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *	Written by Benjamin LaHaise <bcrl@kvack.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *	Implements an efficient asynchronous io interface.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *	Copyright 2018 Christoph Hellwig.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *	See ../COPYING for licensing terms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #define pr_fmt(fmt) "%s: " fmt, __func__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/aio_abi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/aio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/eventfd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/ramfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/percpu-refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/pseudo_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <asm/kmap_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #define KIOCB_KEY		0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #define AIO_RING_MAGIC			0xa10a10a1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #define AIO_RING_COMPAT_FEATURES	1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #define AIO_RING_INCOMPAT_FEATURES	0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) struct aio_ring {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	unsigned	id;	/* kernel internal index number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	unsigned	nr;	/* number of io_events */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 	unsigned	head;	/* Written to by userland or under ring_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 				 * mutex by aio_read_events_ring(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	unsigned	tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	unsigned	magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 	unsigned	compat_features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	unsigned	incompat_features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	unsigned	header_length;	/* size of aio_ring */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	struct io_event		io_events[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) }; /* 128 bytes + ring size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74)  * Plugging is meant to work with larger batches of IOs. If we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75)  * have more than the below, then don't bother setting up a plug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #define AIO_PLUG_THRESHOLD	2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #define AIO_RING_PAGES	8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) struct kioctx_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	struct rcu_head		rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	unsigned		nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	struct kioctx __rcu	*table[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) struct kioctx_cpu {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	unsigned		reqs_available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) struct ctx_rq_wait {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	struct completion comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	atomic_t count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) struct kioctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	struct percpu_ref	users;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	atomic_t		dead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	struct percpu_ref	reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	unsigned long		user_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	struct __percpu kioctx_cpu *cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	 * For percpu reqs_available, number of slots we move to/from global
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	 * counter at a time:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	unsigned		req_batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	 * This is what userspace passed to io_setup(), it's not used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	 * anything but counting against the global max_reqs quota.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	 * The real limit is nr_events - 1, which will be larger (see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	 * aio_setup_ring())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	unsigned		max_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	/* Size of ringbuffer, in units of struct io_event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	unsigned		nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	unsigned long		mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	unsigned long		mmap_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	struct page		**ring_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	long			nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	struct rcu_work		free_rwork;	/* see free_ioctx() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	 * signals when all in-flight requests are done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	struct ctx_rq_wait	*rq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		 * This counts the number of available slots in the ringbuffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 		 * so we avoid overflowing it: it's decremented (if positive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 		 * when allocating a kiocb and incremented when the resulting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 		 * io_event is pulled off the ringbuffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 		 * We batch accesses to it with a percpu version.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 		atomic_t	reqs_available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 		spinlock_t	ctx_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 		struct list_head active_reqs;	/* used for cancellation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 		struct mutex	ring_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 		wait_queue_head_t wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		unsigned	tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		unsigned	completed_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		spinlock_t	completion_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	struct page		*internal_pages[AIO_RING_PAGES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	struct file		*aio_ring_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	unsigned		id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171)  * First field must be the file pointer in all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  * iocb unions! See also 'struct kiocb' in <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) struct fsync_iocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	struct file		*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	struct work_struct	work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	bool			datasync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	struct cred		*creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) struct poll_iocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	struct file		*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	struct wait_queue_head	*head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	__poll_t		events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	bool			cancelled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	bool			work_scheduled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	bool			work_need_resched;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	struct wait_queue_entry	wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	struct work_struct	work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  * NOTE! Each of the iocb union members has the file pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * as the first entry in their struct definition. So you can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  * access the file pointer through any of the sub-structs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  * or directly as just 'ki_filp' in this struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) struct aio_kiocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		struct file		*ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		struct kiocb		rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 		struct fsync_iocb	fsync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		struct poll_iocb	poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	struct kioctx		*ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	kiocb_cancel_fn		*ki_cancel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	struct io_event		ki_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	struct list_head	ki_list;	/* the aio core uses this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 						 * for cancellation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	refcount_t		ki_refcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	 * If the aio_resfd field of the userspace iocb is not zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	 * this is the underlying eventfd context to deliver events to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	struct eventfd_ctx	*ki_eventfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) /*------ sysctl variables----*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) static DEFINE_SPINLOCK(aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) unsigned long aio_nr;		/* current system wide number of aio requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) /*----end sysctl variables---*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) static struct kmem_cache	*kiocb_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) static struct kmem_cache	*kioctx_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) static struct vfsmount *aio_mnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) static const struct file_operations aio_ring_fops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) static const struct address_space_operations aio_ctx_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	if (IS_ERR(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 		return ERR_CAST(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	inode->i_mapping->a_ops = &aio_ctx_aops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	inode->i_mapping->private_data = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	inode->i_size = PAGE_SIZE * nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 				O_RDWR, &aio_ring_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	if (IS_ERR(file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 		iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) static int aio_init_fs_context(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	if (!init_pseudo(fc, AIO_RING_MAGIC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	fc->s_iflags |= SB_I_NOEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) /* aio_setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  *	Creates the slab caches used by the aio routines, panic on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264)  *	failure as this is done early during the boot sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) static int __init aio_setup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	static struct file_system_type aio_fs = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		.name		= "aio",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		.init_fs_context = aio_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		.kill_sb	= kill_anon_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	aio_mnt = kern_mount(&aio_fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	if (IS_ERR(aio_mnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		panic("Failed to create aio fs mount.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) __initcall(aio_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) static void put_aio_ring_file(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	struct file *aio_ring_file = ctx->aio_ring_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	struct address_space *i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	if (aio_ring_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		truncate_setsize(file_inode(aio_ring_file), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 		/* Prevent further access to the kioctx from migratepages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		i_mapping = aio_ring_file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		spin_lock(&i_mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		i_mapping->private_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		ctx->aio_ring_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 		spin_unlock(&i_mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 		fput(aio_ring_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) static void aio_free_ring(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	/* Disconnect the kiotx from the ring file.  This prevents future
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	 * accesses to the kioctx from page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	put_aio_ring_file(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	for (i = 0; i < ctx->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 		pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 				page_count(ctx->ring_pages[i]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		page = ctx->ring_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		ctx->ring_pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		kfree(ctx->ring_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		ctx->ring_pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) static int aio_ring_mremap(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	struct file *file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	struct kioctx_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	int i, res = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	spin_lock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	table = rcu_dereference(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	for (i = 0; i < table->nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		ctx = rcu_dereference(table->table[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		if (ctx && ctx->aio_ring_file == file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 			if (!atomic_read(&ctx->dead)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 				ctx->user_id = ctx->mmap_base = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 				res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	spin_unlock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) static const struct vm_operations_struct aio_ring_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	.mremap		= aio_ring_mremap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) #if IS_ENABLED(CONFIG_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	.fault		= filemap_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	.map_pages	= filemap_map_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	.page_mkwrite	= filemap_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	vma->vm_flags |= VM_DONTEXPAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	vma->vm_ops = &aio_ring_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) static const struct file_operations aio_ring_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	.mmap = aio_ring_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) #if IS_ENABLED(CONFIG_MIGRATION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) static int aio_migratepage(struct address_space *mapping, struct page *new,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 			struct page *old, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	pgoff_t idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	 * We cannot support the _NO_COPY case here, because copy needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	 * happen under the ctx->completion_lock. That does not work with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	 * migration workflow of MIGRATE_SYNC_NO_COPY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (mode == MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	/* mapping->private_lock here protects against the kioctx teardown.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	spin_lock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	ctx = mapping->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		rc = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	/* The ring_lock mutex.  The prevents aio_read_events() from writing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	 * to the ring's head, and prevents page migration from mucking in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	 * a partially initialized kiotx.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	if (!mutex_trylock(&ctx->ring_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	idx = old->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	if (idx < (pgoff_t)ctx->nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		/* Make sure the old page hasn't already been changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		if (ctx->ring_pages[idx] != old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 			rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		rc = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	if (rc != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	/* Writeback must be complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	BUG_ON(PageWriteback(old));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	get_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	rc = migrate_page_move_mapping(mapping, new, old, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	if (rc != MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 		put_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	/* Take completion_lock to prevent other writes to the ring buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	 * while the old page is copied to the new.  This prevents new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	 * events from being lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	migrate_page_copy(new, old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	BUG_ON(ctx->ring_pages[idx] != old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	ctx->ring_pages[idx] = new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	/* The old page is no longer accessible. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	put_page(old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	mutex_unlock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	spin_unlock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) static const struct address_space_operations aio_ctx_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	.set_page_dirty = __set_page_dirty_no_writeback,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) #if IS_ENABLED(CONFIG_MIGRATION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	.migratepage	= aio_migratepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	struct aio_ring *ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	unsigned long size, unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	/* Compensate for the ring buffer's head/tail overlap entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	nr_events += 2;	/* 1 is required, 2 for good luck */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	size = sizeof(struct aio_ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	size += sizeof(struct io_event) * nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	nr_pages = PFN_UP(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	if (nr_pages < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	file = aio_private_file(ctx, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 		ctx->aio_ring_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	ctx->aio_ring_file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 			/ sizeof(struct io_event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	ctx->ring_pages = ctx->internal_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	if (nr_pages > AIO_RING_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 					  GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		if (!ctx->ring_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 			put_aio_ring_file(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		page = find_or_create_page(file->f_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 					   i, GFP_HIGHUSER | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		pr_debug("pid(%d) page[%d]->count=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 			 current->pid, i, page_count(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		ctx->ring_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	ctx->nr_pages = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	if (unlikely(i != nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		aio_free_ring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	ctx->mmap_size = nr_pages * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	if (mmap_write_lock_killable(mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		ctx->mmap_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 		aio_free_ring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 				 PROT_READ | PROT_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 				 MAP_SHARED, 0, &unused, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	mmap_write_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	if (IS_ERR((void *)ctx->mmap_base)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		ctx->mmap_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		aio_free_ring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	ctx->user_id = ctx->mmap_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	ctx->nr_events = nr_events; /* trusted copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	ring->nr = nr_events;	/* user copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	ring->id = ~0U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	ring->head = ring->tail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	ring->magic = AIO_RING_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	ring->compat_features = AIO_RING_COMPAT_FEATURES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	ring->header_length = sizeof(struct aio_ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	flush_dcache_page(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	struct kioctx *ctx = req->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	spin_lock_irqsave(&ctx->ctx_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	list_add_tail(&req->ki_list, &ctx->active_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	req->ki_cancel = cancel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) EXPORT_SYMBOL(kiocb_set_cancel_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579)  * free_ioctx() should be RCU delayed to synchronize against the RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580)  * protected lookup_ioctx() and also needs process context to call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581)  * aio_free_ring().  Use rcu_work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) static void free_ioctx(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 					  free_rwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	pr_debug("freeing %p\n", ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	aio_free_ring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	free_percpu(ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	percpu_ref_exit(&ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	percpu_ref_exit(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	kmem_cache_free(kioctx_cachep, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) static void free_ioctx_reqs(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	/* At this point we know that there are no any in-flight requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		complete(&ctx->rq_wait->comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	/* Synchronize against RCU protected table->table[] dereferences */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	queue_rcu_work(system_wq, &ctx->free_rwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610)  * When this function runs, the kioctx has been removed from the "hash table"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611)  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612)  * now it's safe to cancel any that need to be.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) static void free_ioctx_users(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	struct aio_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	spin_lock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	while (!list_empty(&ctx->active_reqs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 		req = list_first_entry(&ctx->active_reqs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 				       struct aio_kiocb, ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 		req->ki_cancel(&req->rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 		list_del_init(&req->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	spin_unlock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	percpu_ref_kill(&ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	percpu_ref_put(&ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	unsigned i, new_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	struct kioctx_table *table, *old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	struct aio_ring *ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	spin_lock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	table = rcu_dereference_raw(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		if (table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 			for (i = 0; i < table->nr; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 				if (!rcu_access_pointer(table->table[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 					ctx->id = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 					rcu_assign_pointer(table->table[i], ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 					spin_unlock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 					/* While kioctx setup is in progress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 					 * we are protected from page migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 					 * changes ring_pages by ->ring_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 					 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 					ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 					ring->id = ctx->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 					kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 					return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		new_nr = (table ? table->nr : 1) * 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		spin_unlock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 				new_nr, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		if (!table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 		table->nr = new_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		spin_lock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 		old = rcu_dereference_raw(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		if (!old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 			rcu_assign_pointer(mm->ioctx_table, table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		} else if (table->nr > old->nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 			memcpy(table->table, old->table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			       old->nr * sizeof(struct kioctx *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 			rcu_assign_pointer(mm->ioctx_table, table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			kfree_rcu(old, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 			kfree(table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 			table = old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) static void aio_nr_sub(unsigned nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	spin_lock(&aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	if (WARN_ON(aio_nr - nr > aio_nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		aio_nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		aio_nr -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	spin_unlock(&aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) /* ioctx_alloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700)  *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) static struct kioctx *ioctx_alloc(unsigned nr_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	 * Store the original nr_events -- what userspace passed to io_setup(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	 * for counting against the global limit -- before it changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	unsigned int max_reqs = nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	 * We keep track of the number of available ringbuffer slots, to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	 * overflow (reqs_available), and we also use percpu counters for this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	 * So since up to half the slots might be on other cpu's percpu counters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	 * and unavailable, double nr_events so userspace sees what they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	 * expected: additionally, we move req_batch slots to/from percpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	 * counters at a time, so make sure that isn't 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	nr_events = max(nr_events, num_possible_cpus() * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	nr_events *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	/* Prevent overflows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	if (nr_events > (0x10000000U / sizeof(struct io_event))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		pr_debug("ENOMEM: nr_events too high\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 		return ERR_PTR(-EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	ctx->max_reqs = max_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	spin_lock_init(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	spin_lock_init(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	mutex_init(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	/* Protect against page migration throughout kiotx setup by keeping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	 * the ring_lock mutex held until setup is complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	mutex_lock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	init_waitqueue_head(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	INIT_LIST_HEAD(&ctx->active_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	if (!ctx->cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	err = aio_setup_ring(ctx, nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	if (ctx->req_batch < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		ctx->req_batch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	/* limit the number of system wide aios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	spin_lock(&aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	if (aio_nr + ctx->max_reqs > aio_max_nr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	    aio_nr + ctx->max_reqs < aio_nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		spin_unlock(&aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		goto err_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	aio_nr += ctx->max_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	spin_unlock(&aio_nr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	percpu_ref_get(&ctx->users);	/* io_setup() will drop this ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	percpu_ref_get(&ctx->reqs);	/* free_ioctx_users() will drop this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	err = ioctx_add_table(ctx, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		goto err_cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	/* Release the ring_lock mutex now that all setup is complete. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	mutex_unlock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		 ctx, ctx->user_id, mm, ctx->nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	return ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) err_cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	aio_nr_sub(ctx->max_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) err_ctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	atomic_set(&ctx->dead, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	if (ctx->mmap_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		vm_munmap(ctx->mmap_base, ctx->mmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	aio_free_ring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	mutex_unlock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	free_percpu(ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	percpu_ref_exit(&ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	percpu_ref_exit(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	kmem_cache_free(kioctx_cachep, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	pr_debug("error allocating ioctx %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) /* kill_ioctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  *	Cancels all outstanding aio requests on an aio context.  Used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  *	when the processes owning a context have all exited to encourage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  *	the rapid destruction of the kioctx.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		      struct ctx_rq_wait *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	struct kioctx_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	spin_lock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	if (atomic_xchg(&ctx->dead, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		spin_unlock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	table = rcu_dereference_raw(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	RCU_INIT_POINTER(table->table[ctx->id], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	spin_unlock(&mm->ioctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	/* free_ioctx_reqs() will do the necessary RCU synchronization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	wake_up_all(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	 * It'd be more correct to do this in free_ioctx(), after all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	 * the outstanding kiocbs have finished - but by then io_destroy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	 * has already returned, so io_setup() could potentially return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	 * -EAGAIN with no ioctxs actually in use (as far as userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	 *  could tell).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	aio_nr_sub(ctx->max_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (ctx->mmap_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		vm_munmap(ctx->mmap_base, ctx->mmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	ctx->rq_wait = wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	percpu_ref_kill(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854)  * exit_aio: called when the last user of mm goes away.  At this point, there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855)  * no way for any new requests to be submited or any of the io_* syscalls to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856)  * called on the context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858)  * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859)  * them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) void exit_aio(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	struct ctx_rq_wait wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	int i, skipped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	if (!table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	atomic_set(&wait.count, table->nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	init_completion(&wait.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	skipped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	for (i = 0; i < table->nr; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		struct kioctx *ctx =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			rcu_dereference_protected(table->table[i], true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 			skipped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		 * We don't need to bother with munmap() here - exit_mmap(mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		 * is coming and it'll unmap everything. And we simply can't,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 		 * this is not necessarily our ->mm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		 * that it needs to unmap the area, just set it to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		ctx->mmap_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		kill_ioctx(mm, ctx, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	if (!atomic_sub_and_test(skipped, &wait.count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		/* Wait until all IO for the context are done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		wait_for_completion(&wait.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	RCU_INIT_POINTER(mm->ioctx_table, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	kfree(table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) static void put_reqs_available(struct kioctx *ctx, unsigned nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	struct kioctx_cpu *kcpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	kcpu = this_cpu_ptr(ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	kcpu->reqs_available += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	while (kcpu->reqs_available >= ctx->req_batch * 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		kcpu->reqs_available -= ctx->req_batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		atomic_add(ctx->req_batch, &ctx->reqs_available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) static bool __get_reqs_available(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	struct kioctx_cpu *kcpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	kcpu = this_cpu_ptr(ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	if (!kcpu->reqs_available) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		int old, avail = atomic_read(&ctx->reqs_available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 			if (avail < ctx->req_batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 			old = avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			avail = atomic_cmpxchg(&ctx->reqs_available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 					       avail, avail - ctx->req_batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		} while (avail != old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 		kcpu->reqs_available += ctx->req_batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	kcpu->reqs_available--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) /* refill_reqs_available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951)  *	Updates the reqs_available reference counts used for tracking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952)  *	number of free slots in the completion ring.  This can be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953)  *	from aio_complete() (to optimistically update reqs_available) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954)  *	from aio_get_req() (the we're out of events case).  It must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955)  *	called holding ctx->completion_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) static void refill_reqs_available(struct kioctx *ctx, unsigned head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958)                                   unsigned tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	unsigned events_in_ring, completed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	/* Clamp head since userland can write to it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	head %= ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	if (head <= tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 		events_in_ring = tail - head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		events_in_ring = ctx->nr_events - (head - tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	completed = ctx->completed_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	if (events_in_ring < completed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		completed -= events_in_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		completed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	if (!completed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	ctx->completed_events -= completed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	put_reqs_available(ctx, completed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) /* user_refill_reqs_available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983)  *	Called to refill reqs_available when aio_get_req() encounters an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984)  *	out of space in the completion ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) static void user_refill_reqs_available(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	if (ctx->completed_events) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		struct aio_ring *ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 		unsigned head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 		/* Access of ring->head may race with aio_read_events_ring()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		 * here, but that's okay since whether we read the old version
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 		 * or the new version, and either will be valid.  The important
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		 * part is that head cannot pass tail since we prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 		 * aio_complete() from updating tail by holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		 * ctx->completion_lock.  Even if head is invalid, the check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		 * against ctx->completed_events below will make sure we do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		 * safe/right thing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 		ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 		head = ring->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 		kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 		refill_reqs_available(ctx, head, ctx->tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) static bool get_reqs_available(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	if (__get_reqs_available(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	user_refill_reqs_available(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	return __get_reqs_available(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) /* aio_get_req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)  *	Allocate a slot for an aio request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)  * Returns NULL if no requests are free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)  * The refcount is initialized to 2 - one for the async op completion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)  * one for the synchronous code that does this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	struct aio_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	if (unlikely(!req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	if (unlikely(!get_reqs_available(ctx))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		kmem_cache_free(kiocb_cachep, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	percpu_ref_get(&ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	req->ki_ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	INIT_LIST_HEAD(&req->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	refcount_set(&req->ki_refcnt, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	req->ki_eventfd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) static struct kioctx *lookup_ioctx(unsigned long ctx_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	struct aio_ring __user *ring  = (void __user *)ctx_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	struct kioctx *ctx, *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	struct kioctx_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	unsigned id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	if (get_user(id, &ring->id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	table = rcu_dereference(mm->ioctx_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	if (!table || id >= table->nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	id = array_index_nospec(id, table->nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	ctx = rcu_dereference(table->table[id]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	if (ctx && ctx->user_id == ctx_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		if (percpu_ref_tryget_live(&ctx->users))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 			ret = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) static inline void iocb_destroy(struct aio_kiocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	if (iocb->ki_eventfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		eventfd_ctx_put(iocb->ki_eventfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	if (iocb->ki_filp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		fput(iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	percpu_ref_put(&iocb->ki_ctx->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	kmem_cache_free(kiocb_cachep, iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) /* aio_complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)  *	Called when the io request on the given iocb is complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) static void aio_complete(struct aio_kiocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	struct kioctx	*ctx = iocb->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	struct aio_ring	*ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	struct io_event	*ev_page, *event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	unsigned tail, pos, head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	unsigned long	flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	 * Add a completion event to the ring buffer. Must be done holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	 * ctx->completion_lock to prevent other code from messing with the tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	 * pointer since we might be called from irq context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	tail = ctx->tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	pos = tail + AIO_EVENTS_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	if (++tail >= ctx->nr_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 		tail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	*event = iocb->ki_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	kunmap_atomic(ev_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		 (void __user *)(unsigned long)iocb->ki_res.obj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	/* after flagging the request as done, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	 * must never even look at it again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	smp_wmb();	/* make event visible before updating tail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	ctx->tail = tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	head = ring->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	ring->tail = tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	flush_dcache_page(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	ctx->completed_events++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	if (ctx->completed_events > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		refill_reqs_available(ctx, head, tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	pr_debug("added to ring %p at [%u]\n", iocb, tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	 * Check if the user asked us to deliver the result through an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	 * eventfd. The eventfd_signal() function is safe to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	 * from IRQ context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	if (iocb->ki_eventfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 		eventfd_signal(iocb->ki_eventfd, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	 * We have to order our ring_info tail store above and test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	 * of the wait list below outside the wait lock.  This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	 * like in wake_up_bit() where clearing a bit has to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	 * ordered with the unlocked test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	if (waitqueue_active(&ctx->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		wake_up(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) static inline void iocb_put(struct aio_kiocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		aio_complete(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		iocb_destroy(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) /* aio_read_events_ring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)  *	Pull an event off of the ioctx's event ring.  Returns the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)  *	events fetched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static long aio_read_events_ring(struct kioctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 				 struct io_event __user *event, long nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	struct aio_ring *ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	unsigned head, tail, pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	int copy_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	 * The mutex can block and wake us up and that will cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	 * wait_event_interruptible_hrtimeout() to schedule without sleeping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	 * and repeat. This should be rare enough that it doesn't cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	 * peformance issues. See the comment in read_events() for more detail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	sched_annotate_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	mutex_lock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	head = ring->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	tail = ring->tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	 * Ensure that once we've read the current tail pointer, that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	 * we also see the events that were stored up to the tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	if (head == tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	head %= ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	tail %= ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	while (ret < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 		long avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		struct io_event *ev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		avail = (head <= tail ?  tail : ctx->nr_events) - head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		if (head == tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		pos = head + AIO_EVENTS_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		pos %= AIO_EVENTS_PER_PAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 		avail = min(avail, nr - ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		ev = kmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		copy_ret = copy_to_user(event + ret, ev + pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 					sizeof(*ev) * avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		kunmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		if (unlikely(copy_ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 			ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		ret += avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 		head += avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 		head %= ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	ring = kmap_atomic(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	ring->head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	kunmap_atomic(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	flush_dcache_page(ctx->ring_pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	pr_debug("%li  h%u t%u\n", ret, head, tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	mutex_unlock(&ctx->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 			    struct io_event __user *event, long *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		*i += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	if (unlikely(atomic_read(&ctx->dead)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	if (!*i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		*i = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	return ret < 0 || *i >= min_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) static long read_events(struct kioctx *ctx, long min_nr, long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 			struct io_event __user *event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 			ktime_t until)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	 * Note that aio_read_events() is being called as the conditional - i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	 * we're calling it after prepare_to_wait() has set task state to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	 * TASK_INTERRUPTIBLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	 * But aio_read_events() can block, and if it blocks it's going to flip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	 * the task state back to TASK_RUNNING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	 * This should be ok, provided it doesn't flip the state back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	 * TASK_RUNNING and return 0 too much - that causes us to spin. That
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	 * will only happen if the mutex_lock() call blocks, and we then find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	 * the ringbuffer empty. So in practice we should be ok, but it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	 * something to be aware of when touching this code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	if (until == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		aio_read_events(ctx, min_nr, nr, event, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		wait_event_interruptible_hrtimeout(ctx->wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 				aio_read_events(ctx, min_nr, nr, event, &ret),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 				until);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /* sys_io_setup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)  *	Create an aio_context capable of receiving at least nr_events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)  *	ctxp must not point to an aio_context that already exists, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)  *	must be initialized to 0 prior to the call.  On successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)  *	creation of the aio_context, *ctxp is filled in with the resulting 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)  *	handle.  May fail with -EINVAL if *ctxp is not initialized,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)  *	if the specified nr_events exceeds internal limits.  May fail 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)  *	with -EAGAIN if the specified nr_events exceeds the user's limit 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)  *	of available events.  May fail with -ENOMEM if insufficient kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)  *	resources are available.  May fail with -EFAULT if an invalid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)  *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)  *	implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	struct kioctx *ioctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	unsigned long ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	ret = get_user(ctx, ctxp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	if (unlikely(ctx || nr_events == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		pr_debug("EINVAL: ctx %lu nr_events %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		         ctx, nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	ioctx = ioctx_alloc(nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	ret = PTR_ERR(ioctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	if (!IS_ERR(ioctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		ret = put_user(ioctx->user_id, ctxp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 			kill_ioctx(current->mm, ioctx, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 		percpu_ref_put(&ioctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	struct kioctx *ioctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	unsigned long ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	ret = get_user(ctx, ctx32p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	if (unlikely(ctx || nr_events == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 		pr_debug("EINVAL: ctx %lu nr_events %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 		         ctx, nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	ioctx = ioctx_alloc(nr_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	ret = PTR_ERR(ioctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	if (!IS_ERR(ioctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 		/* truncating is ok because it's a user address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 		ret = put_user((u32)ioctx->user_id, ctx32p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 			kill_ioctx(current->mm, ioctx, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 		percpu_ref_put(&ioctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /* sys_io_destroy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)  *	Destroy the aio_context specified.  May cancel any outstanding 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)  *	AIOs and block on completion.  Will fail with -ENOSYS if not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)  *	implemented.  May fail with -EINVAL if the context pointed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)  *	is invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	struct kioctx *ioctx = lookup_ioctx(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	if (likely(NULL != ioctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 		struct ctx_rq_wait wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		init_completion(&wait.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		atomic_set(&wait.count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		/* Pass requests_done to kill_ioctx() where it can be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 		 * in a thread-safe way. If we try to set it here then we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 		 * a race condition if two io_destroy() called simultaneously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 		ret = kill_ioctx(current->mm, ioctx, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		percpu_ref_put(&ioctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 		/* Wait until all IO for the context are done. Otherwise kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 		 * keep using user-space buffers even if user thinks the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 		 * is destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 			wait_for_completion(&wait.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	pr_debug("EINVAL: invalid context id\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) static void aio_remove_iocb(struct aio_kiocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	struct kioctx *ctx = iocb->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	spin_lock_irqsave(&ctx->ctx_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	list_del(&iocb->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 	if (!list_empty_careful(&iocb->ki_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		aio_remove_iocb(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	if (kiocb->ki_flags & IOCB_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 		struct inode *inode = file_inode(kiocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		 * Tell lockdep we inherited freeze protection from submission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		 * thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		if (S_ISREG(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		file_end_write(kiocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	iocb->ki_res.res = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	iocb->ki_res.res2 = res2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	iocb_put(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	req->ki_complete = aio_complete_rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	req->private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	req->ki_pos = iocb->aio_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	req->ki_flags = iocb_flags(req->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	if (iocb->aio_flags & IOCB_FLAG_RESFD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		req->ki_flags |= IOCB_EVENTFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 	if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 		 * If the IOCB_FLAG_IOPRIO flag of aio_flags is set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		 * aio_reqprio is interpreted as an I/O scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 		 * class and priority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		ret = ioprio_check_cap(iocb->aio_reqprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 			pr_debug("aio ioprio check cap error: %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		req->ki_ioprio = iocb->aio_reqprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		req->ki_ioprio = get_current_ioprio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 		struct iovec **iovec, bool vectored, bool compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 		struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	size_t len = iocb->aio_nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	if (!vectored) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 		ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 		*iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	case -EIOCBQUEUED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	case -ERESTARTSYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	case -ERESTARTNOINTR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	case -ERESTARTNOHAND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	case -ERESTART_RESTARTBLOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		 * There's no easy way to restart the syscall since other AIO's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		 * may be already running. Just fail this IO with EINTR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 		req->ki_complete(req, ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) static int aio_read(struct kiocb *req, const struct iocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 			bool vectored, bool compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	struct iov_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	ret = aio_prep_rw(req, iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	file = req->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	if (unlikely(!(file->f_mode & FMODE_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	if (unlikely(!file->f_op->read_iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 		aio_rw_done(req, call_read_iter(file, req, &iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 	kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) static int aio_write(struct kiocb *req, const struct iocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 			 bool vectored, bool compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	struct iov_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 	ret = aio_prep_rw(req, iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	file = req->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	if (unlikely(!(file->f_mode & FMODE_WRITE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	if (unlikely(!file->f_op->write_iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 		 * Open-code file_start_write here to grab freeze protection,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 		 * which will be released by another thread in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 		 * aio_complete_rw().  Fool lockdep by telling it the lock got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		 * released so that it doesn't complain about the held lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 		 * we return to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 		if (S_ISREG(file_inode(file)->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 			sb_start_write(file_inode(file)->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 		req->ki_flags |= IOCB_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 		aio_rw_done(req, call_write_iter(file, req, &iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) static void aio_fsync_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	const struct cred *old_cred = override_creds(iocb->fsync.creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	revert_creds(old_cred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	put_cred(iocb->fsync.creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	iocb_put(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		     bool datasync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 			iocb->aio_rw_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	if (unlikely(!req->file->f_op->fsync))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	req->creds = prepare_creds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	if (!req->creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	req->datasync = datasync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	INIT_WORK(&req->work, aio_fsync_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	schedule_work(&req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) static void aio_poll_put_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	iocb_put(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)  * Safely lock the waitqueue which the request is on, synchronizing with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)  * case where the ->poll() provider decides to free its waitqueue early.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)  * Returns true on success, meaning that req->head->lock was locked, req->wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)  * is on req->head, and an RCU read lock was taken.  Returns false if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)  * request was already removed from its waitqueue (which might no longer exist).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) static bool poll_iocb_lock_wq(struct poll_iocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	wait_queue_head_t *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	 * While we hold the waitqueue lock and the waitqueue is nonempty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	 * wake_up_pollfree() will wait for us.  However, taking the waitqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	 * lock in the first place can race with the waitqueue being freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	 * We solve this as eventpoll does: by taking advantage of the fact that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	 * all users of wake_up_pollfree() will RCU-delay the actual free.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	 * we enter rcu_read_lock() and see that the pointer to the queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	 * non-NULL, we can then lock it without the memory being freed out from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	 * under us, then check whether the request is still on the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 	 * case the caller deletes the entry from the queue, leaving it empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 	 * In that case, only RCU prevents the queue memory from being freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	head = smp_load_acquire(&req->head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	if (head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 		spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		if (!list_empty(&req->wait.entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 		spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) static void poll_iocb_unlock_wq(struct poll_iocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	spin_unlock(&req->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) static void aio_poll_complete_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 	struct poll_table_struct pt = { ._key = req->events };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	struct kioctx *ctx = iocb->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 	__poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	if (!READ_ONCE(req->cancelled))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		mask = vfs_poll(req->file, &pt) & req->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	 * synchronize with them.  In the cancellation case the list_del_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	 * itself is not actually needed, but harmless so we keep it in to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	 * avoid further branches in the fast path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	spin_lock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	if (poll_iocb_lock_wq(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		if (!mask && !READ_ONCE(req->cancelled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 			 * The request isn't actually ready to be completed yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 			 * Reschedule completion if another wakeup came in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 			if (req->work_need_resched) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 				schedule_work(&req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 				req->work_need_resched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 				req->work_scheduled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 			poll_iocb_unlock_wq(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 			spin_unlock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 		list_del_init(&req->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		poll_iocb_unlock_wq(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	} /* else, POLLFREE has freed the waitqueue, so we must complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	list_del_init(&iocb->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	iocb->ki_res.res = mangle_poll(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	spin_unlock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	iocb_put(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) /* assumes we are called with irqs disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) static int aio_poll_cancel(struct kiocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	struct poll_iocb *req = &aiocb->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	if (poll_iocb_lock_wq(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		WRITE_ONCE(req->cancelled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 		if (!req->work_scheduled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 			schedule_work(&aiocb->poll.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 			req->work_scheduled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		poll_iocb_unlock_wq(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 	} /* else, the request was force-cancelled by POLLFREE already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	__poll_t mask = key_to_poll(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 	/* for instances that support it check for an event match first: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	if (mask && !(mask & req->events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	 * Complete the request inline if possible.  This requires that three
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	 * conditions be met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	 *   1. An event mask must have been passed.  If a plain wakeup was done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	 *	instead, then mask == 0 and we have to call vfs_poll() to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	 *	the events, so inline completion isn't possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	 *   2. The completion work must not have already been scheduled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	 *   3. ctx_lock must not be busy.  We have to use trylock because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	 *	already hold the waitqueue lock, so this inverts the normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	 *	locking order.  Use irqsave/irqrestore because not all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	 *	filesystems (e.g. fuse) call this function with IRQs disabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	 *	yet IRQs have to be disabled before ctx_lock is obtained.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	if (mask && !req->work_scheduled &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	    spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 		struct kioctx *ctx = iocb->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 		list_del_init(&req->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		list_del(&iocb->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 		iocb->ki_res.res = mangle_poll(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		if (iocb->ki_eventfd && eventfd_signal_count()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 			iocb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 			INIT_WORK(&req->work, aio_poll_put_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 			schedule_work(&req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 		if (iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 			iocb_put(iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 		 * Schedule the completion work if needed.  If it was already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 		 * scheduled, record that another wakeup came in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 		 * Don't remove the request from the waitqueue here, as it might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 		 * not actually be complete yet (we won't know until vfs_poll()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 		 * is called), and we must not miss any wakeups.  POLLFREE is an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 		 * exception to this; see below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 		if (req->work_scheduled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 			req->work_need_resched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 			schedule_work(&req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 			req->work_scheduled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 		 * If the waitqueue is being freed early but we can't complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 		 * the request inline, we have to tear down the request as best
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 		 * we can.  That means immediately removing the request from its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 		 * waitqueue and preventing all further accesses to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 		 * waitqueue via the request.  We also need to schedule the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		 * completion work (done above).  Also mark the request as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 		 * cancelled, to potentially skip an unneeded call to ->poll().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 		if (mask & POLLFREE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 			WRITE_ONCE(req->cancelled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 			list_del_init(&req->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 			 * Careful: this *must* be the last step, since as soon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 			 * as req->head is NULL'ed out, the request can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 			 * completed and freed, since aio_poll_complete_work()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 			 * will no longer need to take the waitqueue lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 			smp_store_release(&req->head, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) struct aio_poll_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	struct poll_table_struct	pt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	struct aio_kiocb		*iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	bool				queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 	int				error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 		struct poll_table_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	/* multiple wait queues per file are not supported */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 	if (unlikely(pt->queued)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		pt->error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 	pt->queued = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	pt->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	pt->iocb->poll.head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	add_wait_queue(head, &pt->iocb->poll.wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 	struct kioctx *ctx = aiocb->ki_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	struct poll_iocb *req = &aiocb->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	struct aio_poll_table apt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	bool cancel = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	__poll_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	/* reject any unknown events outside the normal event mask. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	if ((u16)iocb->aio_buf != iocb->aio_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	/* reject fields that are not defined for poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	INIT_WORK(&req->work, aio_poll_complete_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	req->head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	req->cancelled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	req->work_scheduled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	req->work_need_resched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	apt.pt._qproc = aio_poll_queue_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	apt.pt._key = req->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 	apt.iocb = aiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 	apt.queued = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	/* initialized the list so that we can do list_empty checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	INIT_LIST_HEAD(&req->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	mask = vfs_poll(req->file, &apt.pt) & req->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	spin_lock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	if (likely(apt.queued)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		bool on_queue = poll_iocb_lock_wq(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 		if (!on_queue || req->work_scheduled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 			 * aio_poll_wake() already either scheduled the async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 			 * completion work, or completed the request inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 			if (apt.error) /* unsupported case: multiple queues */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 				cancel = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 			apt.error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 			mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 		if (mask || apt.error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 			/* Steal to complete synchronously. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 			list_del_init(&req->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		} else if (cancel) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 			/* Cancel if possible (may be too late though). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 			WRITE_ONCE(req->cancelled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 		} else if (on_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 			 * Actually waiting for an event, so add the request to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 			 * active_reqs so that it can be cancelled if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 			aiocb->ki_cancel = aio_poll_cancel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		if (on_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 			poll_iocb_unlock_wq(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	if (mask) { /* no async, we'd stolen it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 		aiocb->ki_res.res = mangle_poll(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 		apt.error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	spin_unlock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	if (mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 		iocb_put(aiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	return apt.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 			   struct iocb __user *user_iocb, struct aio_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 			   bool compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	req->ki_filp = fget(iocb->aio_fildes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 	if (unlikely(!req->ki_filp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		struct eventfd_ctx *eventfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 		 * instance of the file* now. The file descriptor must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		 * an eventfd() fd, and will be signaled for each completed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		 * event using the eventfd_signal() function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		if (IS_ERR(eventfd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 			return PTR_ERR(eventfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 		req->ki_eventfd = eventfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		pr_debug("EFAULT: aio_key\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	req->ki_res.obj = (u64)(unsigned long)user_iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	req->ki_res.data = iocb->aio_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 	req->ki_res.res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	req->ki_res.res2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 	switch (iocb->aio_lio_opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 	case IOCB_CMD_PREAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 		return aio_read(&req->rw, iocb, false, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	case IOCB_CMD_PWRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		return aio_write(&req->rw, iocb, false, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	case IOCB_CMD_PREADV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 		return aio_read(&req->rw, iocb, true, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	case IOCB_CMD_PWRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 		return aio_write(&req->rw, iocb, true, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	case IOCB_CMD_FSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		return aio_fsync(&req->fsync, iocb, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 	case IOCB_CMD_FDSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 		return aio_fsync(&req->fsync, iocb, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	case IOCB_CMD_POLL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 		return aio_poll(req, iocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 			 bool compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	struct aio_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	struct iocb iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	/* enforce forwards compatibility on users */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	if (unlikely(iocb.aio_reserved2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 		pr_debug("EINVAL: reserve field set\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	/* prevent overflows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	if (unlikely(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	    ((ssize_t)iocb.aio_nbytes < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	   )) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		pr_debug("EINVAL: overflow check\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	req = aio_get_req(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	if (unlikely(!req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	/* Done with the synchronous reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	iocb_put(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	 * If err is 0, we'd either done aio_complete() ourselves or have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	 * arranged for that to be done asynchronously.  Anything non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	 * means that we need to destroy req ourselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 		iocb_destroy(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 		put_reqs_available(ctx, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) /* sys_io_submit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)  *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)  *	the number of iocbs queued.  May return -EINVAL if the aio_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019)  *	specified by ctx_id is invalid, if nr is < 0, if the iocb at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)  *	*iocbpp[0] is not properly initialized, if the operation specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)  *	is invalid for the file descriptor in the iocb.  May fail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)  *	-EFAULT if any of the data structures point to invalid data.  May
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)  *	fail with -EBADF if the file descriptor specified in the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024)  *	iocb is invalid.  May fail with -EAGAIN if insufficient resources
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)  *	are available to queue any iocbs.  Will return 0 if nr is 0.  Will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026)  *	fail with -ENOSYS if not implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		struct iocb __user * __user *, iocbpp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 	if (unlikely(nr < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	ctx = lookup_ioctx(ctx_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	if (unlikely(!ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 		pr_debug("EINVAL: invalid context id\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	if (nr > ctx->nr_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		nr = ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	if (nr > AIO_PLUG_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 		blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 		struct iocb __user *user_iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 		if (unlikely(get_user(user_iocb, iocbpp + i))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 			ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 		ret = io_submit_one(ctx, user_iocb, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	if (nr > AIO_PLUG_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 		blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 	percpu_ref_put(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	return i ? i : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 		       int, nr, compat_uptr_t __user *, iocbpp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 	if (unlikely(nr < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	ctx = lookup_ioctx(ctx_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 	if (unlikely(!ctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 		pr_debug("EINVAL: invalid context id\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 	if (nr > ctx->nr_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 		nr = ctx->nr_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 	if (nr > AIO_PLUG_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 		blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 		compat_uptr_t user_iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 		if (unlikely(get_user(user_iocb, iocbpp + i))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 			ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	if (nr > AIO_PLUG_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 		blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	percpu_ref_put(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	return i ? i : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) /* sys_io_cancel:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)  *	Attempts to cancel an iocb previously passed to io_submit.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114)  *	the operation is successfully cancelled, the resulting event is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115)  *	copied into the memory pointed to by result without being placed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)  *	into the completion queue and 0 is returned.  May fail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)  *	-EFAULT if any of the data structures pointed to are invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)  *	May fail with -EINVAL if aio_context specified by ctx_id is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119)  *	invalid.  May fail with -EAGAIN if the iocb specified was not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)  *	cancelled.  Will fail with -ENOSYS if not implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 		struct io_event __user *, result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	struct kioctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	struct aio_kiocb *kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	u32 key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	u64 obj = (u64)(unsigned long)iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	if (unlikely(get_user(key, &iocb->aio_key)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	if (unlikely(key != KIOCB_KEY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 	ctx = lookup_ioctx(ctx_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 	if (unlikely(!ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	spin_lock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	/* TODO: use a hash or array, this sucks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 		if (kiocb->ki_res.obj == obj) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 			ret = kiocb->ki_cancel(&kiocb->rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 			list_del_init(&kiocb->ki_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 	spin_unlock_irq(&ctx->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 		 * The result argument is no longer used - the io_event is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 		 * always delivered via the ring buffer. -EINPROGRESS indicates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 		 * cancellation is progress:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 		ret = -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 	percpu_ref_put(&ctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) static long do_io_getevents(aio_context_t ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		long min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 		long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 		struct io_event __user *events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 		struct timespec64 *ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	struct kioctx *ioctx = lookup_ioctx(ctx_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	if (likely(ioctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 		if (likely(min_nr <= nr && min_nr >= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 			ret = read_events(ioctx, min_nr, nr, events, until);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 		percpu_ref_put(&ioctx->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) /* io_getevents:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)  *	Attempts to read at least min_nr events and up to nr events from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)  *	the completion queue for the aio_context specified by ctx_id. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187)  *	it succeeds, the number of read events is returned. May fail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)  *	-EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189)  *	out of range, if timeout is out of range.  May fail with -EFAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)  *	if any of the memory specified is invalid.  May return 0 or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)  *	< min_nr if the timeout specified by timeout has elapsed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)  *	before sufficient events are available, where timeout == NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193)  *	specifies an infinite timeout. Note that the timeout pointed to by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)  *	timeout is relative.  Will fail with -ENOSYS if not implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) #ifdef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		long, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 		long, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 		struct __kernel_timespec __user *, timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	struct timespec64	ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	int			ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	if (timeout && unlikely(get_timespec64(&ts, timeout)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 	if (!ret && signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) struct __aio_sigset {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 	const sigset_t __user	*sigmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 	size_t		sigsetsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) SYSCALL_DEFINE6(io_pgetevents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 		aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 		long, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 		long, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 		struct __kernel_timespec __user *, timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 		const struct __aio_sigset __user *, usig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 	struct __aio_sigset	ksig = { NULL, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 	struct timespec64	ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	bool interrupted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	if (timeout && unlikely(get_timespec64(&ts, timeout)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	interrupted = signal_pending(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	restore_saved_sigmask_unless(interrupted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	if (interrupted && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 		ret = -ERESTARTNOHAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) SYSCALL_DEFINE6(io_pgetevents_time32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 		aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 		long, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 		long, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 		struct old_timespec32 __user *, timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		const struct __aio_sigset __user *, usig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	struct __aio_sigset	ksig = { NULL, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 	struct timespec64	ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 	bool interrupted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 	interrupted = signal_pending(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 	restore_saved_sigmask_unless(interrupted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	if (interrupted && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		ret = -ERESTARTNOHAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) #if defined(CONFIG_COMPAT_32BIT_TIME)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 		__s32, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 		__s32, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 		struct old_timespec32 __user *, timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	struct timespec64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	if (timeout && get_old_timespec32(&t, timeout))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	if (!ret && signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) struct __compat_aio_sigset {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 	compat_uptr_t		sigmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 	compat_size_t		sigsetsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) #if defined(CONFIG_COMPAT_32BIT_TIME)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) COMPAT_SYSCALL_DEFINE6(io_pgetevents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 		compat_aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 		compat_long_t, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 		compat_long_t, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 		struct old_timespec32 __user *, timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 		const struct __compat_aio_sigset __user *, usig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	struct __compat_aio_sigset ksig = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	struct timespec64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	bool interrupted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	if (timeout && get_old_timespec32(&t, timeout))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	interrupted = signal_pending(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 	restore_saved_sigmask_unless(interrupted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	if (interrupted && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 		ret = -ERESTARTNOHAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 		compat_aio_context_t, ctx_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 		compat_long_t, min_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 		compat_long_t, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 		struct io_event __user *, events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 		struct __kernel_timespec __user *, timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 		const struct __compat_aio_sigset __user *, usig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	struct __compat_aio_sigset ksig = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	struct timespec64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	bool interrupted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 	if (timeout && get_timespec64(&t, timeout))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 	interrupted = signal_pending(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 	restore_saved_sigmask_unless(interrupted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 	if (interrupted && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 		ret = -ERESTARTNOHAND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) #endif