Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * fs/direct-io.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright (C) 2002, Linus Torvalds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * O_DIRECT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * 04Jul2002	Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *		Initial version
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * 11Sep2002	janetinc@us.ibm.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * 		added readv/writev support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * 29Oct2002	Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *		rewrote bio_add_page() support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  * 30Oct2002	pbadari@us.ibm.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *		added support for non-aligned IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  * 06Nov2002	pbadari@us.ibm.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  *		added asynchronous IO support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  * 21Jul2003	nathans@sgi.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  *		added IO completion notifier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/fscrypt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/task_io_accounting_ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/buffer_head.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/rwsem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * How many user pages to map in one call to get_user_pages().  This determines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  * the size of a structure in the slab cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #define DIO_PAGES	64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  * Flags for dio_complete()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #define DIO_COMPLETE_ASYNC		0x01	/* This is async IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #define DIO_COMPLETE_INVALIDATE		0x02	/* Can invalidate pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * This code generally works in units of "dio_blocks".  A dio_block is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * somewhere between the hard sector size and the filesystem block size.  it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * is determined on a per-invocation basis.   When talking to the filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * we need to convert dio_blocks to fs_blocks by scaling the dio_block quantity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  * down by dio->blkfactor.  Similarly, fs-blocksize quantities are converted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * to bio_block quantities by shifting left by blkfactor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * If blkfactor is zero then the user's request was aligned to the filesystem's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  * blocksize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) /* dio_state only used in the submission path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) struct dio_submit {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	struct bio *bio;		/* bio under assembly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	unsigned blkbits;		/* doesn't change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	unsigned blkfactor;		/* When we're using an alignment which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 					   is finer than the filesystem's soft
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 					   blocksize, this specifies how much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 					   finer.  blkfactor=2 means 1/4-block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 					   alignment.  Does not change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	unsigned start_zero_done;	/* flag: sub-blocksize zeroing has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 					   been performed at the start of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 					   write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	int pages_in_io;		/* approximate total IO pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	sector_t block_in_file;		/* Current offset into the underlying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 					   file in dio_block units. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	unsigned blocks_available;	/* At block_in_file.  changes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	int reap_counter;		/* rate limit reaping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	sector_t final_block_in_request;/* doesn't change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	int boundary;			/* prev block is at a boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	get_block_t *get_block;		/* block mapping function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	dio_submit_t *submit_io;	/* IO submition function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	loff_t logical_offset_in_bio;	/* current first logical block in bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	sector_t final_block_in_bio;	/* current final block in bio + 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	sector_t next_block_for_io;	/* next block to be put under IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 					   in dio_blocks units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	 * Deferred addition of a page to the dio.  These variables are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	 * private to dio_send_cur_page(), submit_page_section() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	 * dio_bio_add_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	struct page *cur_page;		/* The page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	unsigned cur_page_offset;	/* Offset into it, in bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	unsigned cur_page_len;		/* Nr of bytes at cur_page_offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	sector_t cur_page_block;	/* Where it starts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	loff_t cur_page_fs_offset;	/* Offset in file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	struct iov_iter *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	 * Page queue.  These variables belong to dio_refill_pages() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	 * dio_get_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	unsigned head;			/* next page to process */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	unsigned tail;			/* last valid page + 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	size_t from, to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) /* dio_state communicated between submission path and end_io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) struct dio {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	int flags;			/* doesn't change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	int op_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	blk_qc_t bio_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	struct gendisk *bio_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	loff_t i_size;			/* i_size when submitted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	dio_iodone_t *end_io;		/* IO completion function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	void *private;			/* copy from map_bh.b_private */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	/* BIO completion state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	spinlock_t bio_lock;		/* protects BIO fields below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	int page_errors;		/* errno from get_user_pages() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	int is_async;			/* is IO async ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	bool defer_completion;		/* defer AIO completion to workqueue? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	bool should_dirty;		/* if pages should be dirtied */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	int io_error;			/* IO error in completion path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	unsigned long refcount;		/* direct_io_worker() and bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	struct bio *bio_list;		/* singly linked via bi_private */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	struct task_struct *waiter;	/* waiting task (NULL if none) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	/* AIO related stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	struct kiocb *iocb;		/* kiocb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	ssize_t result;                 /* IO result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	 * pages[] (and any fields placed after it) are not zeroed out at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	 * allocation time.  Don't add new fields after pages[] unless you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	 * wish that they not be zeroed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 		struct page *pages[DIO_PAGES];	/* page buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 		struct work_struct complete_work;/* deferred AIO completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) static struct kmem_cache *dio_cache __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160)  * How many pages are in the queue?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) static inline unsigned dio_pages_present(struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	return sdio->tail - sdio->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168)  * Go grab and pin some userspace pages.   Typically we'll get 64 at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 				&sdio->from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		struct page *page = ZERO_PAGE(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		 * A memory fault, but the filesystem has some outstanding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 		 * mapped blocks.  We need to use those blocks up to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 		 * leaking stale data in the file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 		if (dio->page_errors == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 			dio->page_errors = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 		get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		dio->pages[0] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		sdio->head = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		sdio->tail = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 		sdio->from = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		sdio->to = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	if (ret >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 		iov_iter_advance(sdio->iter, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		ret += sdio->from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 		sdio->head = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	return ret;	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)  * Get another userspace page.  Returns an ERR_PTR on error.  Pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208)  * buffered inside the dio so that we can call get_user_pages() against a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  * decent number of pages, less frequently.  To provide nicer use of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  * L1 cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) static inline struct page *dio_get_page(struct dio *dio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 					struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	if (dio_pages_present(sdio) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 		ret = dio_refill_pages(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 			return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 		BUG_ON(dio_pages_present(sdio) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	return dio->pages[sdio->head];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  * dio_complete() - called when all DIO BIO I/O has been completed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * This drops i_dio_count, lets interested parties know that a DIO operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  * has completed, and calculates the resulting return code for the operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232)  * It lets the filesystem know if it registered an interest earlier via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233)  * get_block.  Pass the private field of the map buffer_head so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234)  * filesystems can use it to hold additional state between get_block calls and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235)  * dio_complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	loff_t offset = dio->iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	ssize_t transferred = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	 * AIO submission can race with bio completion to get here while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	 * expecting to have the last io completed by bio completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	 * In that case -EIOCBQUEUED is in fact not an error we want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	 * to preserve through this call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	if (ret == -EIOCBQUEUED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	if (dio->result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		transferred = dio->result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 		/* Check for short read case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		if ((dio->op == REQ_OP_READ) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		    ((offset + transferred) > dio->i_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 			transferred = dio->i_size - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		/* ignore EFAULT if some IO has been done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		if (unlikely(ret == -EFAULT) && transferred)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 		ret = dio->page_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		ret = dio->io_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		ret = transferred;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	if (dio->end_io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		// XXX: ki_pos??
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 		err = dio->end_io(dio->iocb, offset, ret, dio->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 			ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	 * Try again to invalidate clean pages which might have been cached by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	 * non-direct readahead, or faulted in by get_user_pages() if the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	 * of the write was an mmap'ed region of the file we're writing.  Either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	 * this invalidation fails, tough, the write still worked...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	 * And this page cache invalidation has to be after dio->end_io(), as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	 * some filesystems convert unwritten extents to real allocations in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	 * end_io() when necessary, otherwise a racing buffer read would cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	 * zeros from unwritten extents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	if (flags & DIO_COMPLETE_INVALIDATE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	    ret > 0 && dio->op == REQ_OP_WRITE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	    dio->inode->i_mapping->nrpages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		err = invalidate_inode_pages2_range(dio->inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 					offset >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 					(offset + ret - 1) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 			dio_warn_stale_pagecache(dio->iocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	inode_dio_end(dio->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	if (flags & DIO_COMPLETE_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		 * generic_write_sync expects ki_pos to have been updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		 * already, but the submission path only does this for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		 * synchronous I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 		dio->iocb->ki_pos += transferred;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		if (ret > 0 && dio->op == REQ_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 			ret = generic_write_sync(dio->iocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 		dio->iocb->ki_complete(dio->iocb, ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	kmem_cache_free(dio_cache, dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) static void dio_aio_complete_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	struct dio *dio = container_of(work, struct dio, complete_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329)  * Asynchronous IO callback. 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) static void dio_bio_end_aio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	struct dio *dio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	unsigned long remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	bool defer_completion = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	/* cleanup the bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	dio_bio_complete(dio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	remaining = --dio->refcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	if (remaining == 1 && dio->waiter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 		wake_up_process(dio->waiter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	if (remaining == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		 * Defer completion when defer_completion is set or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		 * when the inode has pages mapped and this is AIO write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		 * We need to invalidate those pages because there is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 		 * chance they contain stale data in the case buffered IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		 * went in between AIO submission and completion into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 		 * same region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		if (dio->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 			defer_completion = dio->defer_completion ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 					   (dio->op == REQ_OP_WRITE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 					    dio->inode->i_mapping->nrpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		if (defer_completion) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			INIT_WORK(&dio->complete_work, dio_aio_complete_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 			queue_work(dio->inode->i_sb->s_dio_done_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 				   &dio->complete_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 			dio_complete(dio, 0, DIO_COMPLETE_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  * The BIO completion handler simply queues the BIO up for the process-context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372)  * handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374)  * During I/O bi_private points at the dio.  After I/O, bi_private is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375)  * implement a singly-linked list of completed BIOs, at dio->bio_list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) static void dio_bio_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	struct dio *dio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	bio->bi_private = dio->bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	dio->bio_list = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	if (--dio->refcount == 1 && dio->waiter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 		wake_up_process(dio->waiter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	      struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	      sector_t first_sector, int nr_vecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	struct inode *inode = dio->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	 * bio_alloc() is guaranteed to return a bio when allowed to sleep and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	 * we request a valid number of vectors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	fscrypt_set_bio_crypt_ctx(bio, inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 				  sdio->cur_page_fs_offset >> inode->i_blkbits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 				  GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	bio_set_dev(bio, bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	bio->bi_iter.bi_sector = first_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	bio_set_op_attrs(bio, dio->op, dio->op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	if (dio->is_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		bio->bi_end_io = dio_bio_end_aio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		bio->bi_end_io = dio_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	bio->bi_write_hint = dio->iocb->ki_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	sdio->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422)  * In the AIO read case we speculatively dirty the pages before starting IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423)  * During IO completion, any of these pages which happen to have been written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424)  * back will be redirtied by bio_check_pages_dirty().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426)  * bios hold a dio reference between submit_bio and ->end_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	struct bio *bio = sdio->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	bio->bi_private = dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	dio->refcount++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 		bio_set_pages_dirty(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	dio->bio_disk = bio->bi_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	if (sdio->submit_io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		dio->bio_cookie = BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		dio->bio_cookie = submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	sdio->bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	sdio->boundary = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	sdio->logical_offset_in_bio = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456)  * Release any resources in case of a failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	while (sdio->head < sdio->tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		put_page(dio->pages[sdio->head++]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465)  * Wait for the next BIO to complete.  Remove it and return it.  NULL is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466)  * returned once all BIOs have been completed.  This must only be called once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * all bios have been issued so that dio->refcount can only decrease.  This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * requires that that the caller hold a reference on the dio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) static struct bio *dio_await_one(struct dio *dio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	struct bio *bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	 * Wait as long as the list is empty and there are bios in flight.  bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	 * completion drops the count, maybe adds to the list, and wakes while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	 * holding the bio_lock so we don't need set_current_state()'s barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	 * and can call it after testing our condition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	while (dio->refcount > 1 && dio->bio_list == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 		__set_current_state(TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		dio->waiter = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		    !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			blk_io_schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		/* wake up sets us TASK_RUNNING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		dio->waiter = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	if (dio->bio_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		bio = dio->bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		dio->bio_list = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503)  * Process one completed BIO.  No locks are held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	blk_status_t err = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 			dio->io_error = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 			dio->io_error = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	if (dio->is_async && should_dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		bio_check_pages_dirty(bio);	/* transfers ownership */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 		bio_release_pages(bio, should_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527)  * Wait on and process all in-flight BIOs.  This must only be called once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528)  * all bios have been issued so that the refcount can only decrease.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529)  * This just waits for all bios to make it through dio_bio_complete.  IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530)  * errors are propagated through dio->io_error and should be propagated via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531)  * dio_complete().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) static void dio_await_completion(struct dio *dio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		bio = dio_await_one(dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		if (bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			dio_bio_complete(dio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	} while (bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544)  * A really large O_DIRECT read or write can generate a lot of BIOs.  So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545)  * to keep the memory consumption sane we periodically reap any completed BIOs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546)  * during the BIO generation phase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548)  * This also helps to limit the peak amount of pinned userspace memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	if (sdio->reap_counter++ >= 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		while (dio->bio_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 			unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 			struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 			int ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 			bio = dio->bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 			dio->bio_list = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 			spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 			ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 			if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 				ret = ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		sdio->reap_counter = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574)  * Create workqueue for deferred direct IO completions. We allocate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575)  * workqueue when it's first needed. This avoids creating workqueue for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576)  * filesystems that don't need it and also allows us to create the workqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577)  * late enough so the we can include s_id in the name of the workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) int sb_init_dio_done_wq(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	struct workqueue_struct *old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	struct workqueue_struct *wq = alloc_workqueue("dio/%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 						      WQ_MEM_RECLAIM, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 						      sb->s_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	if (!wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	 * This has to be atomic as more DIOs can race to create the workqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	/* Someone created workqueue before us? Free ours... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	if (old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		destroy_workqueue(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) static int dio_set_defer_completion(struct dio *dio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	struct super_block *sb = dio->inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	if (dio->defer_completion)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	dio->defer_completion = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	if (!sb->s_dio_done_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		return sb_init_dio_done_wq(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610)  * Call into the fs to map some more disk blocks.  We record the current number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611)  * of available blocks at sdio->blocks_available.  These are in units of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612)  * fs blocksize, i_blocksize(inode).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614)  * The fs is allowed to map lots of blocks at once.  If it wants to do that,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615)  * it uses the passed inode-relative block number as the file offset, as usual.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617)  * get_block() is passed the number of i_blkbits-sized blocks which direct_io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618)  * has remaining to do.  The fs should not map more than this number of blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620)  * If the fs has mapped a lot of blocks, it should populate bh->b_size to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621)  * indicate how much contiguous disk space has been made available at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622)  * bh->b_blocknr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624)  * If *any* of the mapped blocks are new, then the fs must set buffer_new().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625)  * This isn't very efficient...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627)  * In the case of filesystem holes: the fs may return an arbitrarily-large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628)  * hole by returning an appropriate value in b_size and by clearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629)  * buffer_mapped().  However the direct-io code will only process holes one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630)  * block at a time - it will repeatedly call get_block() as it walks the hole.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 			   struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	sector_t fs_startblk;	/* Into file, in filesystem-sized blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	sector_t fs_endblk;	/* Into file, in filesystem-sized blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	unsigned long fs_count;	/* Number of filesystem-sized blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	int create;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	 * If there was a memory error and we've overwritten all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	 * mapped blocks then we can now return that memory error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	ret = dio->page_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 		fs_startblk = sdio->block_in_file >> sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		fs_endblk = (sdio->final_block_in_request - 1) >>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 					sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		fs_count = fs_endblk - fs_startblk + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		map_bh->b_state = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		map_bh->b_size = fs_count << i_blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		 * For writes that could fill holes inside i_size on a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		 * DIO_SKIP_HOLES filesystem we forbid block creations: only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		 * overwrites are permitted. We will return early to the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		 * once we see an unmapped buffer head returned, and the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		 * will fall back to buffered I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		 * Otherwise the decision is left to the get_blocks method,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		 * which may decide to handle it or also return an unmapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		 * buffer head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 		create = dio->op == REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		if (dio->flags & DIO_SKIP_HOLES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			i_size = i_size_read(dio->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 				create = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		ret = (*sdio->get_block)(dio->inode, fs_startblk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 						map_bh, create);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		/* Store for completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		dio->private = map_bh->b_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		if (ret == 0 && buffer_defer_completion(map_bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 			ret = dio_set_defer_completion(dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689)  * There is no bio.  Make one now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		sector_t start_sector, struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	int ret, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	ret = dio_bio_reap(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	sector = start_sector << (sdio->blkbits - 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	BUG_ON(nr_pages <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	sdio->boundary = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710)  * Attempt to put the current chunk of 'cur_page' into the current BIO.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711)  * that was successful then update final_block_in_bio and take a ref against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712)  * the just-added page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714)  * Return zero on success.  Non-zero means the caller needs to start a new BIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) static inline int dio_bio_add_page(struct dio_submit *sdio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	ret = bio_add_page(sdio->bio, sdio->cur_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 			sdio->cur_page_len, sdio->cur_page_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	if (ret == sdio->cur_page_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		 * Decrement count only, if we are done with this page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 			sdio->pages_in_io--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		get_page(sdio->cur_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		sdio->final_block_in_bio = sdio->cur_page_block +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 			(sdio->cur_page_len >> sdio->blkbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 		ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739)  * Put cur_page under IO.  The section of cur_page which is described by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740)  * cur_page_offset,cur_page_len is put into a BIO.  The section of cur_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741)  * starts on-disk at cur_page_block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743)  * We take a ref against the page here (on behalf of its presence in the bio).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745)  * The caller of this function is responsible for removing cur_page from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746)  * dio, and for dropping the refcount which came from that presence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 		struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	if (sdio->bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 		loff_t cur_offset = sdio->cur_page_fs_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 		loff_t bio_next_offset = sdio->logical_offset_in_bio +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 			sdio->bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 		 * See whether this new request is contiguous with the old.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		 * Btrfs cannot handle having logically non-contiguous requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		 * submitted.  For example if you have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		 * Logical:  [0-4095][HOLE][8192-12287]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 		 * Physical: [0-4095]      [4096-8191]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		 * We cannot submit those pages together as one BIO.  So if our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		 * current logical offset in the file does not equal what would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		 * be the next logical offset in the bio, submit the bio we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		 * have.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		 * When fscrypt inline encryption is used, data unit number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		 * (DUN) contiguity is also required.  Normally that's implied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		 * by logical contiguity.  However, certain IV generation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		 * methods (e.g. IV_INO_LBLK_32) don't guarantee it.  So, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		 * must explicitly check fscrypt_mergeable_bio() too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 		if (sdio->final_block_in_bio != sdio->cur_page_block ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		    cur_offset != bio_next_offset ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		    !fscrypt_mergeable_bio(sdio->bio, dio->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 					   cur_offset >> dio->inode->i_blkbits))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 			dio_bio_submit(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	if (sdio->bio == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	if (dio_bio_add_page(sdio) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		dio_bio_submit(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 			ret = dio_bio_add_page(sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 			BUG_ON(ret != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804)  * An autonomous function to put a chunk of a page under deferred IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806)  * The caller doesn't actually know (or care) whether this piece of page is in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  * a BIO, or is under IO or whatever.  We just take care of all possible 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * situations here.  The separation between the logic of do_direct_IO() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * that of submit_page_section() is important for clarity.  Please don't break.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  * The chunk of page starts on-disk at blocknr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  * We perform deferred IO, by recording the last-submitted page inside our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  * private part of the dio structure.  If possible, we just expand the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  * across that page here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817)  * If that doesn't work out then we put the old page into the bio and add this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818)  * page to the dio instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		    unsigned offset, unsigned len, sector_t blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		    struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	int boundary = sdio->boundary;	/* dio_send_cur_page may clear it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	if (dio->op == REQ_OP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		 * Read accounting is performed in submit_bio()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		task_io_account_write(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	 * Can we just grow the current page's presence in the dio?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	if (sdio->cur_page == page &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	    sdio->cur_page_offset + sdio->cur_page_len == offset &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	    sdio->cur_page_block +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	    (sdio->cur_page_len >> sdio->blkbits) == blocknr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		sdio->cur_page_len += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	 * If there's a deferred page already there then send it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	if (sdio->cur_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		ret = dio_send_cur_page(dio, sdio, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		put_page(sdio->cur_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		sdio->cur_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	get_page(page);		/* It is in dio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	sdio->cur_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	sdio->cur_page_offset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	sdio->cur_page_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	sdio->cur_page_block = blocknr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	 * If boundary then we want to schedule the IO now to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	 * avoid metadata seeks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	if (boundary) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		ret = dio_send_cur_page(dio, sdio, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		if (sdio->bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 			dio_bio_submit(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		put_page(sdio->cur_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		sdio->cur_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879)  * If we are not writing the entire block and get_block() allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880)  * the block for us, we need to fill-in the unused portion of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881)  * block with zeros. This happens only if user-buffer, fileoffset or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882)  * io length is not filesystem block-size multiple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884)  * `end' is zero if we're doing the start of the IO, 1 at the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885)  * IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		int end, struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	unsigned dio_blocks_per_fs_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	unsigned this_chunk_blocks;	/* In dio_blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	unsigned this_chunk_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	sdio->start_zero_done = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	if (!sdio->blkfactor || !buffer_new(map_bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	dio_blocks_per_fs_block = 1 << sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	this_chunk_blocks = sdio->block_in_file & (dio_blocks_per_fs_block - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	if (!this_chunk_blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	 * We need to zero out part of an fs block.  It is either at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	 * beginning or the end of the fs block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	if (end) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	page = ZERO_PAGE(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 				sdio->next_block_for_io, map_bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	sdio->next_block_for_io += this_chunk_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923)  * Walk the user pages, and the file, mapping blocks to disk and generating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924)  * a sequence of (page,offset,len,block) mappings.  These mappings are injected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925)  * into submit_page_section(), which takes care of the next stage of submission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927)  * Direct IO against a blockdev is different from a file.  Because we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928)  * happily perform page-sized but 512-byte aligned IOs.  It is important that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929)  * blockdev IO be able to have fine alignment and large sizes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931)  * So what we do is to permit the ->get_block function to populate bh.b_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932)  * with the size of IO which is permitted at this offset and this i_blkbits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934)  * For best results, the blockdev should be set up with 512-byte i_blkbits and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935)  * it should set b_size to PAGE_SIZE or more inside get_block().  This gives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936)  * fine alignment but still allows this function to work in PAGE_SIZE units.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			struct buffer_head *map_bh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	const unsigned blkbits = sdio->blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	const unsigned i_blkbits = blkbits + sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	while (sdio->block_in_file < sdio->final_block_in_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		size_t from, to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		page = dio_get_page(dio, sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 			ret = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		from = sdio->head ? 0 : sdio->from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		sdio->head++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		while (from < to) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 			unsigned this_chunk_bytes;	/* # of bytes mapped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			unsigned this_chunk_blocks;	/* # of blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			unsigned u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			if (sdio->blocks_available == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 				 * Need to go and map some more disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 				unsigned long blkmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 				unsigned long dio_remainder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 				ret = get_more_blocks(dio, sdio, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 				if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 					put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 				if (!buffer_mapped(map_bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 					goto do_holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 				sdio->blocks_available =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 						map_bh->b_size >> blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 				sdio->next_block_for_io =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 					map_bh->b_blocknr << sdio->blkfactor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 				if (buffer_new(map_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 					clean_bdev_aliases(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 						map_bh->b_bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 						map_bh->b_blocknr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 						map_bh->b_size >> i_blkbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 				if (!sdio->blkfactor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 					goto do_holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 				blkmask = (1 << sdio->blkfactor) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 				dio_remainder = (sdio->block_in_file & blkmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 				 * If we are at the start of IO and that IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 				 * starts partway into a fs-block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 				 * dio_remainder will be non-zero.  If the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 				 * is a read then we can simply advance the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 				 * cursor to the first block which is to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 				 * read.  But if the IO is a write and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 				 * block was newly allocated we cannot do that;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 				 * the start of the fs block must be zeroed out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 				 * on-disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 				if (!buffer_new(map_bh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 					sdio->next_block_for_io += dio_remainder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 				sdio->blocks_available -= dio_remainder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) do_holes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			/* Handle holes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 			if (!buffer_mapped(map_bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 				loff_t i_size_aligned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 				/* AKPM: eargh, -ENOTBLK is a hack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 				if (dio->op == REQ_OP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 					put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 					return -ENOTBLK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 				 * Be sure to account for a partial block as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 				 * last block in the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 				i_size_aligned = ALIGN(i_size_read(dio->inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 							1 << blkbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 				if (sdio->block_in_file >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 						i_size_aligned >> blkbits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 					/* We hit eof */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 					put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 				zero_user(page, from, 1 << blkbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 				sdio->block_in_file++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 				from += 1 << blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 				dio->result += 1 << blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 				goto next_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 			 * If we're performing IO which has an alignment which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 			 * is finer than the underlying fs, go check to see if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 			 * we must zero out the start of this block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 			if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 				dio_zero_block(dio, sdio, 0, map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 			 * Work out, in this_chunk_blocks, how much disk we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 			 * can add to this page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 			this_chunk_blocks = sdio->blocks_available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 			u = (to - from) >> blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 			if (this_chunk_blocks > u)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 				this_chunk_blocks = u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 			u = sdio->final_block_in_request - sdio->block_in_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 			if (this_chunk_blocks > u)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 				this_chunk_blocks = u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 			this_chunk_bytes = this_chunk_blocks << blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 			BUG_ON(this_chunk_bytes == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 			if (this_chunk_blocks == sdio->blocks_available)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 				sdio->boundary = buffer_boundary(map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 			ret = submit_page_section(dio, sdio, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 						  from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 						  this_chunk_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 						  sdio->next_block_for_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 						  map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 				put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 			sdio->next_block_for_io += this_chunk_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 			sdio->block_in_file += this_chunk_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 			from += this_chunk_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 			dio->result += this_chunk_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 			sdio->blocks_available -= this_chunk_blocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) next_block:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 			BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 			if (sdio->block_in_file == sdio->final_block_in_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		/* Drop the ref which was taken in get_user_pages() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) static inline int drop_refcount(struct dio *dio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	int ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	 * Sync will always be dropping the final ref and completing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	 * operation.  AIO can if it was a broken operation described above or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	 * in fact if all the bios race to complete before we get here.  In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	 * that case dio_complete() translates the EIOCBQUEUED into the proper
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	 * return code that the caller will hand to ->complete().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	 * This is managed by the bio_lock instead of being an atomic_t so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	 * completion paths can drop their ref and use the remaining count to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	 * decide to wake the submission path atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	spin_lock_irqsave(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	ret2 = --dio->refcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	spin_unlock_irqrestore(&dio->bio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	return ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)  * This is a library function for use by filesystem drivers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)  * The locking rules are governed by the flags parameter:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)  *  - if the flags value contains DIO_LOCKING we use a fancy locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)  *    scheme for dumb filesystems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)  *    For writes this function is called under i_mutex and returns with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)  *    i_mutex held, for reads, i_mutex is not held on entry, but it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)  *    taken and dropped again before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)  *  - if the flags value does NOT contain DIO_LOCKING we don't use any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)  *    internal locking but rather rely on the filesystem to synchronize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)  *    direct I/O reads/writes versus each other and truncate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)  * To help with locking against truncate we incremented the i_dio_count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)  * counter before starting direct I/O, and decrement it once we are done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)  * Truncate can wait for it to reach zero to provide exclusion.  It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)  * expected that filesystem provide exclusion between new direct I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)  * and truncates.  For DIO_LOCKING filesystems this is done by i_mutex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)  * but other filesystems need to take care of this on their own.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)  * NOTE: if you pass "sdio" to anything by pointer make sure that function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)  * is always inlined. Otherwise gcc is unable to split the structure into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)  * individual fields and will generate much worse code. This is important
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)  * for the whole file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) static inline ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		      struct block_device *bdev, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		      get_block_t get_block, dio_iodone_t end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 		      dio_submit_t submit_io, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	unsigned blkbits = i_blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	unsigned blocksize_mask = (1 << blkbits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	ssize_t retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	const size_t count = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	loff_t offset = iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	const loff_t end = offset + count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	struct dio *dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	struct dio_submit sdio = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	struct buffer_head map_bh = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	unsigned long align = offset | iov_iter_alignment(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	 * Avoid references to bdev if not absolutely needed to give
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	 * the early prefetch in the caller enough time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	/* watch out for a 0 len io from a tricksy fs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	if (iov_iter_rw(iter) == READ && !count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	if (!dio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	 * Believe it or not, zeroing out the page array caused a .5%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	 * performance regression in a database benchmark.  So, we take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	 * care to only zero out what's needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	memset(dio, 0, offsetof(struct dio, pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	dio->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		/* will be released by direct_io_worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	/* Once we sampled i_size check for reads beyond EOF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	dio->i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		goto fail_dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	if (align & blocksize_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		if (bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		blocksize_mask = (1 << blkbits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 		if (align & blocksize_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 			goto fail_dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		struct address_space *mapping = iocb->ki_filp->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 		retval = filemap_write_and_wait_range(mapping, offset, end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		if (retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 			goto fail_dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	 * For file extending writes updating i_size before data writeouts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	 * complete can expose uninitialized blocks in dumb filesystems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	 * In that case we need to wait for I/O completion even if asked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	 * for an asynchronous write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	if (is_sync_kiocb(iocb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		dio->is_async = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	else if (iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 		dio->is_async = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		dio->is_async = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	dio->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	if (iov_iter_rw(iter) == WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		dio->op = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		dio->op_flags = REQ_SYNC | REQ_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		if (iocb->ki_flags & IOCB_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 			dio->op_flags |= REQ_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		dio->op = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	if (iocb->ki_flags & IOCB_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		dio->op_flags |= REQ_HIPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	 * so that we can call ->fsync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	if (dio->is_async && iov_iter_rw(iter) == WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 		retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		if (iocb->ki_flags & IOCB_DSYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			retval = dio_set_defer_completion(dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 		else if (!dio->inode->i_sb->s_dio_done_wq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 			 * In case of AIO write racing with buffered read we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 			 * need to defer completion. We can't decide this now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 			 * however the workqueue needs to be initialized here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 			retval = sb_init_dio_done_wq(dio->inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 		if (retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 			goto fail_dio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	 * Will be decremented at I/O completion time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	inode_dio_begin(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	sdio.blkbits = blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	sdio.blkfactor = i_blkbits - blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	sdio.block_in_file = offset >> blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	sdio.get_block = get_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	dio->end_io = end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	sdio.submit_io = submit_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	sdio.final_block_in_bio = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	sdio.next_block_for_io = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	dio->iocb = iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	spin_lock_init(&dio->bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	dio->refcount = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	sdio.iter = iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	sdio.final_block_in_request = end >> blkbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	 * In case of non-aligned buffers, we may need 2 more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	 * pages since we need to zero out first and last block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	if (unlikely(sdio.blkfactor))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 		sdio.pages_in_io = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	retval = do_direct_IO(dio, &sdio, &map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	if (retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		dio_cleanup(dio, &sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	if (retval == -ENOTBLK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		 * The remaining part of the request will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		 * be handled by buffered I/O when we return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	 * There may be some unwritten disk at the end of a part-written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	 * fs-block-sized block.  Go zero that now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	dio_zero_block(dio, &sdio, 1, &map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	if (sdio.cur_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		ssize_t ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		if (retval == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			retval = ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		put_page(sdio.cur_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 		sdio.cur_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	if (sdio.bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 		dio_bio_submit(dio, &sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	 * It is possible that, we return short IO due to end of file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	 * In that case, we need to release all the pages we got hold on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	dio_cleanup(dio, &sdio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	 * All block lookups have been performed. For READ requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	 * we can let i_mutex go now that its achieved its purpose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	 * of protecting us from looking up uninitialized blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		inode_unlock(dio->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	 * The only time we want to leave bios in flight is when a successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	 * partial aio read or full aio write have been setup.  In that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	 * bio completion will call aio_complete.  The only time it's safe to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	 * This had *better* be the only place that raises -EIOCBQUEUED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	BUG_ON(retval == -EIOCBQUEUED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	if (dio->is_async && retval == 0 && dio->result &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	    (iov_iter_rw(iter) == READ || dio->result == count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 		retval = -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 		dio_await_completion(dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	if (drop_refcount(dio) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 		retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		BUG_ON(retval != -EIOCBQUEUED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) fail_dio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	kmem_cache_free(dio_cache, dio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 			     struct block_device *bdev, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 			     get_block_t get_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 			     dio_iodone_t end_io, dio_submit_t submit_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 			     int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	 * The block device state is needed in the end to finally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	 * submit everything.  Since it's likely to be cache cold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	 * prefetch it here as first thing to hide some of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	 * latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	 * Attempt to prefetch the pieces we likely need later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	prefetch(&bdev->bd_disk->part_tbl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	prefetch(bdev->bd_disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 				     end_io, submit_io, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) EXPORT_SYMBOL_NS(__blockdev_direct_IO, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) static __init int dio_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	dio_cache = KMEM_CACHE(dio, SLAB_PANIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) module_init(dio_init)