Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * mm/readahead.c - address_space-level file readahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * Copyright (C) 2002, Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * 09Apr2002	Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *		Initial version.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/task_io_accounting_ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/fadvise.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31)  * Initialise a struct file's readahead state.  Assumes that the caller has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32)  * memset *ra to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	ra->prev_pos = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) EXPORT_SYMBOL_GPL(file_ra_state_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43)  * see if a page needs releasing upon read_cache_pages() failure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44)  * - the caller of read_cache_pages() may have set PG_private or PG_fscache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45)  *   before calling, such as the NFS fs marking pages that are cached locally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46)  *   on disk, thus we need to give the fs a chance to clean up in the event of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47)  *   an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) static void read_cache_pages_invalidate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 					     struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	if (page_has_private(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 			BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		page->mapping = mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 		do_invalidatepage(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64)  * release a list of pages, invalidating them first if need be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) static void read_cache_pages_invalidate_pages(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 					      struct list_head *pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	struct page *victim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	while (!list_empty(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		victim = lru_to_page(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		list_del(&victim->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		read_cache_pages_invalidate_page(mapping, victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79)  * read_cache_pages - populate an address space with some pages & start reads against them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80)  * @mapping: the address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81)  * @pages: The address of a list_head which contains the target pages.  These
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82)  *   pages have their ->index populated and are otherwise uninitialised.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83)  * @filler: callback routine for filling a single page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84)  * @data: private data for the callback routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86)  * Hides the details of the LRU cache etc from the filesystems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88)  * Returns: %0 on success, error return by @filler otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) int read_cache_pages(struct address_space *mapping, struct list_head *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 			int (*filler)(void *, struct page *), void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	while (!list_empty(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		page = lru_to_page(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		if (add_to_page_cache_lru(page, mapping, page->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 				readahead_gfp_mask(mapping))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 			read_cache_pages_invalidate_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		ret = filler(data, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 			read_cache_pages_invalidate_pages(mapping, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 		task_io_account_read(PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) EXPORT_SYMBOL(read_cache_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) gfp_t readahead_gfp_mask(struct address_space *x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	gfp_t mask = mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	trace_android_rvh_set_readahead_gfp_mask(&mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) EXPORT_SYMBOL_GPL(readahead_gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static void read_pages(struct readahead_control *rac, struct list_head *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		bool skip_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	const struct address_space_operations *aops = rac->mapping->a_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	if (!readahead_count(rac))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	if (aops->readahead) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 		aops->readahead(rac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		/* Clean up the remaining pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		while ((page = readahead_page(rac))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	} else if (aops->readpages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		aops->readpages(rac->file, rac->mapping, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 				readahead_count(rac));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		/* Clean up the remaining pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		put_pages_list(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		rac->_index += rac->_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		rac->_nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		while ((page = readahead_page(rac))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 			aops->readpage(rac->file, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	BUG_ON(!list_empty(pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	BUG_ON(readahead_count(rac));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	if (skip_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 		rac->_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)  * page_cache_ra_unbounded - Start unchecked readahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)  * @ractl: Readahead control.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)  * @nr_to_read: The number of pages to read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)  * @lookahead_size: Where to start the next readahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)  * This function is for filesystems to call when they want to start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)  * readahead beyond a file's stated i_size.  This is almost certainly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)  * not the function you want to call.  Use page_cache_async_readahead()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)  * or page_cache_sync_readahead() instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)  * Context: File is referenced by caller.  Mutexes may be held by caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)  * May sleep, but will not reenter filesystem to reclaim memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) void page_cache_ra_unbounded(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 		unsigned long nr_to_read, unsigned long lookahead_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	struct address_space *mapping = ractl->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	unsigned long index = readahead_index(ractl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	LIST_HEAD(page_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	 * Partway through the readahead operation, we will have added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	 * locked pages to the page cache, but will not yet have submitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	 * them for I/O.  Adding another page may need to allocate memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	 * which can trigger memory reclaim.  Telling the VM we're in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	 * the middle of a filesystem operation will cause it to not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	 * touch file-backed pages, preventing a deadlock.  Most (all?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	 * filesystems already specify __GFP_NOFS in their mapping's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	 * gfp_mask, but let's be explicit here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	unsigned int nofs = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	 * Preallocate as many pages as we will need.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	for (i = 0; i < nr_to_read; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 		struct page *page = xa_load(&mapping->i_pages, index + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 		BUG_ON(index + i != ractl->_index + ractl->_nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 		if (page && !xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 			 * Page already present?  Kick off the current batch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 			 * of contiguous pages before continuing with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 			 * next batch.  This page may be the one we would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 			 * have intended to mark as Readahead, but we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 			 * have a stable reference to this page, and it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 			 * not worth getting one just for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 			read_pages(ractl, &page_pool, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 		page = __page_cache_alloc(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 		if (mapping->a_ops->readpages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			page->index = index + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 			list_add(&page->lru, &page_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 		} else if (add_to_page_cache_lru(page, mapping, index + i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 					gfp_mask) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 			read_pages(ractl, &page_pool, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 		if (i == nr_to_read - lookahead_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 			SetPageReadahead(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 		ractl->_nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 * Now start the IO.  We ignore I/O errors - if the page is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 * uptodate then the caller will launch readpage again, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 * will then handle the error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	read_pages(ractl, &page_pool, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	memalloc_nofs_restore(nofs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)  * do_page_cache_ra() actually reads a chunk of disk.  It allocates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)  * the pages first, then submits them for I/O. This avoids the very bad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)  * behaviour which would occur if page allocations are causing VM writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)  * We really don't want to intermingle reads and writes like that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) void do_page_cache_ra(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 		unsigned long nr_to_read, unsigned long lookahead_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	struct inode *inode = ractl->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	unsigned long index = readahead_index(ractl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	loff_t isize = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	pgoff_t end_index;	/* The last page we want to read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	if (isize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	end_index = (isize - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	if (index > end_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	/* Don't read past the page containing the last byte of the file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	if (nr_to_read > end_index - index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 		nr_to_read = end_index - index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)  * Chunk the readahead into 2 megabyte units, so that we don't pin too much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)  * memory at once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) void force_page_cache_ra(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 		struct file_ra_state *ra, unsigned long nr_to_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	struct address_space *mapping = ractl->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	unsigned long max_pages, index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 			!mapping->a_ops->readahead))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	 * If the request exceeds the readahead window, allow the read to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	 * be up to the optimal hardware IO size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	index = readahead_index(ractl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	while (nr_to_read) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		if (this_chunk > nr_to_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 			this_chunk = nr_to_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 		ractl->_index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 		do_page_cache_ra(ractl, this_chunk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 		index += this_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 		nr_to_read -= this_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)  * Set the initial window size, round to next power of 2 and square
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)  * for small size, x 4 for medium, and x 2 for large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)  * for 128k (32 page) max ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)  * 1-8 page = 32k initial, > 8 page = 128k initial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	unsigned long newsize = roundup_pow_of_two(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	if (newsize <= max / 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 		newsize = newsize * 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	else if (newsize <= max / 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 		newsize = newsize * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 		newsize = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	return newsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)  *  Get the previous window size, ramp it up, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)  *  return it as the new window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) static unsigned long get_next_ra_size(struct file_ra_state *ra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 				      unsigned long max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	unsigned long cur = ra->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	if (cur < max / 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 		return 4 * cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	if (cur <= max / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 		return 2 * cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	return max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)  * On-demand readahead design.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)  * The fields in struct file_ra_state represent the most-recently-executed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)  * readahead attempt:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)  *                        |<----- async_size ---------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)  *     |------------------- size -------------------->|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)  *     |==================#===========================|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)  *     ^start             ^page marked with PG_readahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)  * To overlap application thinking time and disk I/O time, we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)  * `readahead pipelining': Do not wait until the application consumed all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)  * readahead pages and stalled on the missing page at readahead_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)  * Instead, submit an asynchronous readahead I/O as soon as there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)  * only async_size pages left in the readahead window. Normally async_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)  * will be equal to size, for maximum pipelining.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)  * In interleaved sequential reads, concurrent streams on the same fd can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)  * be invalidating each other's readahead state. So we flag the new readahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)  * page at (start+size-async_size) with PG_readahead, and use it as readahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)  * indicator. The flag won't be set on already cached pages, to avoid the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)  * readahead-for-nothing fuss, saving pointless page cache lookups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)  * prev_pos tracks the last visited byte in the _previous_ read request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)  * It should be maintained by the caller, and will be used for detecting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)  * small random reads. Note that the readahead algorithm checks loosely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)  * for sequential patterns. Hence interleaved reads might be served as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)  * sequential ones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)  * There is a special-case: if the first page which the application tries to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)  * read happens to be the first page of the file, it is assumed that a linear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)  * read is about to happen and the window is immediately set to the initial size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)  * based on I/O request size and the max_readahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)  * The code ramps up the readahead size aggressively at first, but slow down as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)  * it approaches max_readhead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)  * Count contiguously cached pages from @index-1 to @index-@max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)  * this count is a conservative estimation of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)  * 	- length of the sequential read sequence, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)  * 	- thrashing threshold in memory tight systems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) static pgoff_t count_history_pages(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 				   pgoff_t index, unsigned long max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	pgoff_t head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	head = page_cache_prev_miss(mapping, index - 1, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	return index - 1 - head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)  * page cache context based read-ahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) static int try_context_readahead(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 				 struct file_ra_state *ra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 				 pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 				 unsigned long req_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 				 unsigned long max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	pgoff_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	size = count_history_pages(mapping, index, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	 * not enough history pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	 * it could be a random read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	if (size <= req_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	 * starts from beginning of file:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	 * it is a strong indication of long-run stream (or whole-file-read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	if (size >= index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 		size *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	ra->start = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	ra->size = min(size + req_size, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	ra->async_size = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)  * A minimal readahead algorithm for trivial sequential/random reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) static void ondemand_readahead(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		struct file_ra_state *ra, bool hit_readahead_marker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 		unsigned long req_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	unsigned long max_pages = ra->ra_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	unsigned long add_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 	unsigned long index = readahead_index(ractl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	pgoff_t prev_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	 * If the request exceeds the readahead window, allow the read to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	 * be up to the optimal hardware IO size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	if (req_size > max_pages && bdi->io_pages > max_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		max_pages = min(req_size, bdi->io_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 	trace_android_vh_ra_tuning_max_page(ractl, &max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	 * start of file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	if (!index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 		goto initial_readahead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 	 * It's the expected callback index, assume sequential access.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	 * Ramp up sizes, and push forward the readahead window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	if ((index == (ra->start + ra->size - ra->async_size) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	     index == (ra->start + ra->size))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 		ra->start += ra->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 		ra->size = get_next_ra_size(ra, max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 		ra->async_size = ra->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 		goto readit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 	 * Hit a marked page without valid readahead state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 	 * E.g. interleaved reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 	 * Query the pagecache for async_size, which normally equals to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	 * readahead size. Ramp it up and use it as the new readahead size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 	if (hit_readahead_marker) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 		pgoff_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 		start = page_cache_next_miss(ractl->mapping, index + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 				max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 		if (!start || start - index > max_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 		ra->start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 		ra->size = start - index;	/* old async_size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 		ra->size += req_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 		ra->size = get_next_ra_size(ra, max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 		ra->async_size = ra->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 		goto readit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	 * oversize read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 	if (req_size > max_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 		goto initial_readahead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	 * sequential cache miss
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	 * trivial case: (index - prev_index) == 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	 * unaligned reads: (index - prev_index) == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	if (index - prev_index <= 1UL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 		goto initial_readahead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 	 * Query the page cache and look for the traces(cached history pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	 * that a sequential stream would leave behind.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 	if (try_context_readahead(ractl->mapping, ra, index, req_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 			max_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 		goto readit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	 * standalone, small random read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	 * Read as is, and do not pollute the readahead state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 	do_page_cache_ra(ractl, req_size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) initial_readahead:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 	ra->start = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 	ra->size = get_init_ra_size(req_size, max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) readit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 	 * Will this read hit the readahead marker made by itself?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 	 * If so, trigger the readahead marker hit now, and merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 	 * the resulted next readahead window into the current one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	 * Take care of maximum IO pages as above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	if (index == ra->start && ra->size == ra->async_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 		add_pages = get_next_ra_size(ra, max_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 		if (ra->size + add_pages <= max_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 			ra->async_size = add_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 			ra->size += add_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 			ra->size = max_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 			ra->async_size = max_pages >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 	ractl->_index = ra->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	do_page_cache_ra(ractl, ra->size, ra->async_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) void page_cache_sync_ra(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 		struct file_ra_state *ra, unsigned long req_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	 * Even if read-ahead is disabled, issue this request as read-ahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 	 * as we'll need it to satisfy the requested range. The forced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 	 * read-ahead will do the right thing and limit the read to just the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	 * requested range, which we'll set to 1 page for this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	if (!ra->ra_pages || blk_cgroup_congested()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 		if (!ractl->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 		req_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 		do_forced_ra = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 	/* be dumb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	if (do_forced_ra) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 		force_page_cache_ra(ractl, ra, req_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 	/* do read-ahead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 	ondemand_readahead(ractl, ra, false, req_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) EXPORT_SYMBOL_GPL(page_cache_sync_ra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) void page_cache_async_ra(struct readahead_control *ractl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 		struct file_ra_state *ra, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 		unsigned long req_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 	/* no read-ahead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	if (!ra->ra_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 	 * Same bit is used for PG_readahead and PG_reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 	if (PageWriteback(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	ClearPageReadahead(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 	 * Defer asynchronous read-ahead on IO congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 	if (inode_read_congested(ractl->mapping->host))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 	if (blk_cgroup_congested())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	/* do read-ahead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 	ondemand_readahead(ractl, ra, true, req_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) EXPORT_SYMBOL_GPL(page_cache_async_ra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 	struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 	ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 	if (!f.file || !(f.file->f_mode & FMODE_READ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 	 * The readahead() syscall is intended to run only on files
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 	 * that can execute readahead. If readahead is not possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 	 * on this file, then we must return -EINVAL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) 	    !S_ISREG(file_inode(f.file)->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 	ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 	return ksys_readahead(fd, offset, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) }