^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * linux/mm/filemap.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1994-1999 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * This file handles the generic file mmap semantics used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * most "normal" filesystems (but you don't /have/ to use this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * the NFS filesystem used to do this differently, for example)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/compiler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/kernel_stat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/error-injection.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/cleancache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/shmem_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/delayacct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/ramfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/page_idle.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <asm/pgalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <trace/events/filemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * FIXME: remove all knowledge of the buffer layer from the core VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <linux/buffer_head.h> /* for try_to_free_buffers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <asm/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * Shared mappings implemented 30.11.1994. It's not fully working yet,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Shared mappings now work. 15.8.1995 Bruno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * finished 'unifying' the page and buffer cache and SMP-threaded the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * Lock ordering:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * ->i_mmap_rwsem (truncate_pagecache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * ->private_lock (__free_pte->__set_page_dirty_buffers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * ->swap_lock (exclusive_swap_page, others)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * ->i_pages lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * ->i_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * ->i_mmap_rwsem (truncate->unmap_mapping_range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * ->mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * ->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * ->page_table_lock or pte_lock (various, mainly in memory.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * ->i_pages lock (arch-dependent flush_dcache_mmap_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * ->mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * ->lock_page (access_process_vm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * ->i_mutex (generic_perform_write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * ->mmap_lock (fault_in_pages_readable->do_page_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * bdi->wb.list_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * sb_lock (fs/fs-writeback.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * ->i_pages lock (__sync_single_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * ->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * ->anon_vma.lock (vma_adjust)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * ->anon_vma.lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * ->page_table_lock or pte_lock (anon_vma_prepare and various)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * ->page_table_lock or pte_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * ->swap_lock (try_to_unmap_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * ->private_lock (try_to_unmap_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * ->i_pages lock (try_to_unmap_one)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * ->pgdat->lru_lock (follow_page->mark_page_accessed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * ->pgdat->lru_lock (check_pte_range->isolate_lru_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * ->private_lock (page_remove_rmap->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * ->i_pages lock (page_remove_rmap->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * ->inode->i_lock (page_remove_rmap->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * ->memcg->move_lock (page_remove_rmap->lock_page_memcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * bdi.wb->list_lock (zap_pte_range->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * ->inode->i_lock (zap_pte_range->set_page_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * ->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * ->tasklist_lock (memory_failure, collect_procs_ao)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static void page_cache_delete(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct page *page, void *shadow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) XA_STATE(xas, &mapping->i_pages, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) unsigned int nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) mapping_set_update(&xas, mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* hugetlb pages are represented by a single entry in the xarray */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (!PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) xas_set_order(&xas, page->index, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) nr = compound_nr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) VM_BUG_ON_PAGE(nr != 1 && shadow, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) xas_store(&xas, shadow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) xas_init_marks(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* Leave page->index set: truncation lookup relies upon it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (shadow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) mapping->nrexceptional += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * Make sure the nrexceptional update is committed before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * the nrpages update so that final truncate racing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * with reclaim does not see both counters 0 at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * same time and miss a shadow entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) mapping->nrpages -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static void unaccount_page_cache_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * if we're uptodate, flush out into the cleancache, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * invalidate any existing cleancache entries. We can't leave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * stale data around in the cleancache once our page is gone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (PageUptodate(page) && PageMappedToDisk(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) cleancache_put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) cleancache_invalidate_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) VM_BUG_ON_PAGE(page_mapped(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) int mapcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) current->comm, page_to_pfn(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) dump_page(page, "still mapped when deleted");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) mapcount = page_mapcount(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (mapping_exiting(mapping) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) page_count(page) >= mapcount + 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * All vmas have already been torn down, so it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * a good bet that actually the page is unmapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * and we'd prefer not to leak it: if we're wrong,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * some other bad page check should catch it later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) page_mapcount_reset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) page_ref_sub(page, mapcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /* hugetlb pages do not participate in page cache accounting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) nr = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) __mod_lruvec_page_state(page, NR_SHMEM, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) __dec_node_page_state(page, NR_SHMEM_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) } else if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) __dec_node_page_state(page, NR_FILE_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) filemap_nr_thps_dec(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * At this point page must be either written or cleaned by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * truncate. Dirty page here signals a bug and loss of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * unwritten data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * This fixes dirty accounting after removing the page entirely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * but leaves PageDirty set: it has no effect for truncated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * page and anyway will be cleared before returning page into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (WARN_ON_ONCE(PageDirty(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * Delete a page from the page cache and free it. Caller has to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * sure the page is locked and that nobody else uses it - or that usage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * is safe. The caller must hold the i_pages lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) void __delete_from_page_cache(struct page *page, void *shadow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) struct address_space *mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) trace_mm_filemap_delete_from_page_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) unaccount_page_cache_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) page_cache_delete(mapping, page, shadow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static void page_cache_free_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) void (*freepage)(struct page *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) freepage = mapping->a_ops->freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) freepage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (PageTransHuge(page) && !PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) page_ref_sub(page, thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) VM_BUG_ON_PAGE(page_count(page) <= 0, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * delete_from_page_cache - delete page from page cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * @page: the page which the kernel is trying to remove from page cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * This must be called only on pages that have been verified to be in the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * cache and locked. It will never put the page into the free list, the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * has a reference on the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) void delete_from_page_cache(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) struct address_space *mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) xa_lock_irqsave(&mapping->i_pages, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) __delete_from_page_cache(page, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) xa_unlock_irqrestore(&mapping->i_pages, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) page_cache_free_page(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) EXPORT_SYMBOL(delete_from_page_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * page_cache_delete_batch - delete several pages from page cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * @mapping: the mapping to which pages belong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * @pvec: pagevec with pages to delete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * The function walks over mapping->i_pages and removes pages passed in @pvec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * from the mapping. The function expects @pvec to be sorted by page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * and is optimised for it to be dense.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * It tolerates holes in @pvec (mapping entries at those indices are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * modified). The function expects only THP head pages to be present in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * @pvec.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * The function expects the i_pages lock to be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) static void page_cache_delete_batch(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct pagevec *pvec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) int total_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) mapping_set_update(&xas, mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) xas_for_each(&xas, page, ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (i >= pagevec_count(pvec))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /* A swap/dax/shadow entry got inserted? Skip it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * A page got inserted in our range? Skip it. We have our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * pages locked so they are protected from being removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * If we see a page whose index is higher than ours, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * means our page has been removed, which shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * possible because we're holding the PageLock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (page != pvec->pages[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) WARN_ON_ONCE(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) if (page->index == xas.xa_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) /* Leave page->index set: truncation lookup relies on it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * Move to the next page in the vector if this is a regular
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * page or the index is of the last sub-page of this compound
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (page->index + compound_nr(page) - 1 == xas.xa_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) xas_store(&xas, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) total_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) mapping->nrpages -= total_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) void delete_from_page_cache_batch(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct pagevec *pvec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (!pagevec_count(pvec))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) xa_lock_irqsave(&mapping->i_pages, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) for (i = 0; i < pagevec_count(pvec); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) unaccount_page_cache_page(mapping, pvec->pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) page_cache_delete_batch(mapping, pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) xa_unlock_irqrestore(&mapping->i_pages, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) for (i = 0; i < pagevec_count(pvec); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) page_cache_free_page(mapping, pvec->pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) int filemap_check_errors(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /* Check for outstanding write errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (test_bit(AS_ENOSPC, &mapping->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) test_and_clear_bit(AS_ENOSPC, &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (test_bit(AS_EIO, &mapping->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) test_and_clear_bit(AS_EIO, &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) EXPORT_SYMBOL(filemap_check_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) static int filemap_check_and_keep_errors(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /* Check for outstanding write errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (test_bit(AS_EIO, &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (test_bit(AS_ENOSPC, &mapping->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * @mapping: address space structure to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * @start: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * @end: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * @sync_mode: enable synchronous operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * Start writeback against all of a mapping's dirty pages that lie
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * within the byte offsets <start, end> inclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * opposed to a regular memory cleansing writeback. The difference between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * these two operations is that if a dirty page/buffer is encountered, it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * be waited upon, and not just skipped over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * Return: %0 on success, negative error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) loff_t end, int sync_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) .sync_mode = sync_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) .nr_to_write = LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) .range_start = start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) .range_end = end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (!mapping_can_writeback(mapping) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) wbc_attach_fdatawrite_inode(&wbc, mapping->host);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) ret = do_writepages(mapping, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) wbc_detach_inode(&wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) static inline int __filemap_fdatawrite(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int sync_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) int filemap_fdatawrite(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) EXPORT_SYMBOL(filemap_fdatawrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) loff_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) EXPORT_SYMBOL(filemap_fdatawrite_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * filemap_flush - mostly a non-blocking flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * @mapping: target address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * This is a mostly non-blocking flush. Not suitable for data-integrity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * purposes - I/O may not be started against all dirty pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * Return: %0 on success, negative error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) int filemap_flush(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) EXPORT_SYMBOL(filemap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * filemap_range_has_page - check if a page exists in range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * @mapping: address space within which to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * @start_byte: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * @end_byte: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * Find at least one page in the range supplied, usually used to check if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * direct writing in this range will trigger a writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * Return: %true if at least one page exists in the specified range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * %false otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) bool filemap_range_has_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) loff_t start_byte, loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) pgoff_t max = end_byte >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) if (end_byte < start_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) page = xas_find(&xas, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) /* Shadow entries don't count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * We don't need to try to pin this page; we're about to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * release the RCU lock anyway. It is enough to know that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * there was a page here recently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) return page != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) EXPORT_SYMBOL(filemap_range_has_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) static void __filemap_fdatawait_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) loff_t start_byte, loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) pgoff_t index = start_byte >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) pgoff_t end = end_byte >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) if (end_byte < start_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) while (index <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) end, PAGECACHE_TAG_WRITEBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) struct page *page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) ClearPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * filemap_fdatawait_range - wait for writeback to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * @mapping: address space structure to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * @start_byte: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * @end_byte: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * Walk the list of under-writeback pages of the given address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * in the given range and wait for all of them. Check error status of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * the address space and return it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * Since the error status of the address space is cleared by this function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * callers are responsible for checking the return value and handling and/or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) * reporting the error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) * Return: error status of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) __filemap_fdatawait_range(mapping, start_byte, end_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return filemap_check_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) EXPORT_SYMBOL(filemap_fdatawait_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * filemap_fdatawait_range_keep_errors - wait for writeback to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * @mapping: address space structure to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * @start_byte: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * @end_byte: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * Walk the list of under-writeback pages of the given address space in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * given range and wait for all of them. Unlike filemap_fdatawait_range(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * this function does not clear error status of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * Use this function if callers don't handle errors themselves. Expected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * fsfreeze(8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) loff_t start_byte, loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) __filemap_fdatawait_range(mapping, start_byte, end_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return filemap_check_and_keep_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * file_fdatawait_range - wait for writeback to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * @file: file pointing to address space structure to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * @start_byte: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * @end_byte: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * Walk the list of under-writeback pages of the address space that file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * refers to, in the given range and wait for all of them. Check error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * status of the address space vs. the file->f_wb_err cursor and return it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * Since the error status of the file is advanced by this function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * callers are responsible for checking the return value and handling and/or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * reporting the error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * Return: error status of the address space vs. the file->f_wb_err cursor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) __filemap_fdatawait_range(mapping, start_byte, end_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) return file_check_and_advance_wb_err(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) EXPORT_SYMBOL(file_fdatawait_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * @mapping: address space structure to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * Walk the list of under-writeback pages of the given address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * and wait for all of them. Unlike filemap_fdatawait(), this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * does not clear error status of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * Use this function if callers don't handle errors themselves. Expected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * fsfreeze(8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * Return: error status of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) int filemap_fdatawait_keep_errors(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) return filemap_check_and_keep_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) /* Returns true if writeback might be needed or already in progress. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) static bool mapping_needs_writeback(struct address_space *mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) if (dax_mapping(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) return mapping->nrexceptional;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return mapping->nrpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * filemap_write_and_wait_range - write out & wait on a file range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * @mapping: the address_space for the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * @lstart: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * @lend: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * Write out and wait upon file offsets lstart->lend, inclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * Note that @lend is inclusive (describes the last byte to be written) so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * that this function can be used to write to the very end-of-file (end = -1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * Return: error status of the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) int filemap_write_and_wait_range(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) loff_t lstart, loff_t lend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (mapping_needs_writeback(mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) err = __filemap_fdatawrite_range(mapping, lstart, lend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) WB_SYNC_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * Even if the above returned error, the pages may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * written partially (e.g. -ENOSPC), so we wait for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * But the -EIO is special case, it may indicate the worst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * thing (e.g. bug) happened, so we avoid waiting for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (err != -EIO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) int err2 = filemap_fdatawait_range(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) lstart, lend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) /* Clear any previously stored errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) filemap_check_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) err = filemap_check_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) EXPORT_SYMBOL(filemap_write_and_wait_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) void __filemap_set_wb_err(struct address_space *mapping, int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) errseq_t eseq = errseq_set(&mapping->wb_err, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) trace_filemap_set_wb_err(mapping, eseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) EXPORT_SYMBOL(__filemap_set_wb_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * file_check_and_advance_wb_err - report wb error (if any) that was previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * and advance wb_err to current one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * @file: struct file on which the error is being reported
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * When userland calls fsync (or something like nfsd does the equivalent), we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * want to report any writeback errors that occurred since the last fsync (or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * since the file was opened if there haven't been any).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * Grab the wb_err from the mapping. If it matches what we have in the file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * then just quickly return 0. The file is all caught up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * If it doesn't match, then take the mapping value, set the "seen" flag in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * it and try to swap it into place. If it works, or another task beat us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * to it with the new value, then update the f_wb_err and return the error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * portion. The error at this point must be reported via proper channels
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * (a'la fsync, or NFS COMMIT operation, etc.).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) * While we handle mapping->wb_err with atomic operations, the f_wb_err
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * value is protected by the f_lock since we must ensure that it reflects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * the latest value swapped in for this file descriptor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) * Return: %0 on success, negative error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) int file_check_and_advance_wb_err(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) errseq_t old = READ_ONCE(file->f_wb_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) /* Locklessly handle the common case where nothing has changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) if (errseq_check(&mapping->wb_err, old)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) /* Something changed, must use slow path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) spin_lock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) old = file->f_wb_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) err = errseq_check_and_advance(&mapping->wb_err,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) &file->f_wb_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) trace_file_check_and_advance_wb_err(file, old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) spin_unlock(&file->f_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * We're mostly using this function as a drop in replacement for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * that the legacy code would have had on these flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) clear_bit(AS_EIO, &mapping->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) clear_bit(AS_ENOSPC, &mapping->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) EXPORT_SYMBOL(file_check_and_advance_wb_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * file_write_and_wait_range - write out & wait on a file range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * @file: file pointing to address_space with pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * @lstart: offset in bytes where the range starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * @lend: offset in bytes where the range ends (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * Write out and wait upon file offsets lstart->lend, inclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * Note that @lend is inclusive (describes the last byte to be written) so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) * that this function can be used to write to the very end-of-file (end = -1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * After writing out and waiting on the data, we check and advance the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * f_wb_err cursor to the latest value, and return any errors detected there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * Return: %0 on success, negative error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) int err = 0, err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) if (mapping_needs_writeback(mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) err = __filemap_fdatawrite_range(mapping, lstart, lend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) WB_SYNC_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) /* See comment of filemap_write_and_wait() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) if (err != -EIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) __filemap_fdatawait_range(mapping, lstart, lend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) err2 = file_check_and_advance_wb_err(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) EXPORT_SYMBOL(file_write_and_wait_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * replace_page_cache_page - replace a pagecache page with a new one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * @old: page to be replaced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * @new: page to replace with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * @gfp_mask: allocation mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * This function replaces a page in the pagecache with a new one. On
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * success it acquires the pagecache reference for the new page and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * drops it for the old page. Both the old and new pages must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) * locked. This function does not add the new page to the LRU, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * caller must do that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * The remove + add is atomic. This function cannot fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * Return: %0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct address_space *mapping = old->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) void (*freepage)(struct page *) = mapping->a_ops->freepage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) pgoff_t offset = old->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) XA_STATE(xas, &mapping->i_pages, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) VM_BUG_ON_PAGE(!PageLocked(old), old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) VM_BUG_ON_PAGE(!PageLocked(new), new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) VM_BUG_ON_PAGE(new->mapping, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) get_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) new->mapping = mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) new->index = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) mem_cgroup_migrate(old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) xas_lock_irqsave(&xas, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) xas_store(&xas, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) old->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) /* hugetlb pages do not participate in page cache accounting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (!PageHuge(old))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) __dec_lruvec_page_state(old, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (!PageHuge(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) __inc_lruvec_page_state(new, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (PageSwapBacked(old))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) __dec_lruvec_page_state(old, NR_SHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) if (PageSwapBacked(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) __inc_lruvec_page_state(new, NR_SHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) xas_unlock_irqrestore(&xas, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (freepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) freepage(old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) put_page(old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) EXPORT_SYMBOL_GPL(replace_page_cache_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) noinline int __add_to_page_cache_locked(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) pgoff_t offset, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) void **shadowp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) XA_STATE(xas, &mapping->i_pages, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) int huge = PageHuge(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) bool charged = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) VM_BUG_ON_PAGE(PageSwapBacked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) mapping_set_update(&xas, mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) page->mapping = mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) page->index = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (!huge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) error = mem_cgroup_charge(page, current->mm, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) charged = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) gfp &= GFP_RECLAIM_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) unsigned int order = xa_get_order(xas.xa, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) void *entry, *old = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (order > thp_order(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) order, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) xas_for_each_conflict(&xas, entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) old = entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) if (!xa_is_value(entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) xas_set_err(&xas, -EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (shadowp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) *shadowp = old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /* entry may have been split before we acquired lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) order = xa_get_order(xas.xa, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) if (order > thp_order(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) xas_split(&xas, old, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) xas_store(&xas, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (xas_error(&xas))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) mapping->nrexceptional--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) mapping->nrpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) /* hugetlb pages do not participate in page cache accounting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (!huge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) __inc_lruvec_page_state(page, NR_FILE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) } while (xas_nomem(&xas, gfp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) if (xas_error(&xas)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) error = xas_error(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (charged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) mem_cgroup_uncharge(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) trace_mm_filemap_add_to_page_cache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) /* Leave page->index set: truncation relies upon it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * add_to_page_cache_locked - add a locked page to the pagecache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * @page: page to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) * @mapping: the page's address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * @offset: page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * @gfp_mask: page allocation mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * This function is used to add a page to the pagecache. It must be locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) * This function does not add the page to the LRU. The caller must do that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) * Return: %0 on success, negative error code otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) pgoff_t offset, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) return __add_to_page_cache_locked(page, mapping, offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) gfp_mask, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) EXPORT_SYMBOL(add_to_page_cache_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) pgoff_t offset, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) void *shadow = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) __SetPageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ret = __add_to_page_cache_locked(page, mapping, offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) gfp_mask, &shadow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) __ClearPageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * The page might have been evicted from cache only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * recently, in which case it should be activated like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * any other repeatedly accessed page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * The exception is pages getting rewritten; evicting other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * data from the working set, only to cache data that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * get overwritten with something else, is a waste of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) WARN_ON_ONCE(PageActive(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) if (!(gfp_mask & __GFP_WRITE) && shadow)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) workingset_refault(page, shadow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) lru_cache_add(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) struct page *__page_cache_alloc(gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (cpuset_do_page_mem_spread()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) unsigned int cpuset_mems_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) cpuset_mems_cookie = read_mems_allowed_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) n = cpuset_mem_spread_node();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) page = __alloc_pages_node(n, gfp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) return alloc_pages(gfp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) EXPORT_SYMBOL(__page_cache_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * In order to wait for pages to become available there must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * waitqueues associated with pages. By using a hash table of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * waitqueues where the bucket discipline is to maintain all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * waiters on the same queue and wake all when any of the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * become available, and for the woken contexts to check to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * sure the appropriate page became available, this saves space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) * at a cost of "thundering herd" phenomena during rare hash
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) * collisions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) #define PAGE_WAIT_TABLE_BITS 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) static wait_queue_head_t *page_waitqueue(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) void __init pagecache_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) init_waitqueue_head(&page_wait_table[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) page_writeback_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * The page wait code treats the "wait->flags" somewhat unusually, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * we have multiple different kinds of waits, not just the usual "exclusive"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * We have:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * (a) no special bits set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * We're just waiting for the bit to be released, and when a waker
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * calls the wakeup function, we set WQ_FLAG_WOKEN and wake it up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * and remove it from the wait queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * Simple and straightforward.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * (b) WQ_FLAG_EXCLUSIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * The waiter is waiting to get the lock, and only one waiter should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * be woken up to avoid any thundering herd behavior. We'll set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) * WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * This is the traditional exclusive wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * (c) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * The waiter is waiting to get the bit, and additionally wants the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * lock to be transferred to it for fair lock behavior. If the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * cannot be taken, we stop walking the wait queue without waking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * the waiter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * This is the "fair lock handoff" case, and in addition to setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * WQ_FLAG_WOKEN, we set WQ_FLAG_DONE to let the waiter easily see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * that it now has the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) struct wait_page_key *key = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) struct wait_page_queue *wait_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) = container_of(wait, struct wait_page_queue, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (!wake_page_match(wait_page, key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * If it's a lock handoff wait, we get the bit for it, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * stop walking (and do not wake it up) if we can't.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) flags = wait->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (flags & WQ_FLAG_EXCLUSIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (test_bit(key->bit_nr, &key->page->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) if (flags & WQ_FLAG_CUSTOM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) if (test_and_set_bit(key->bit_nr, &key->page->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) flags |= WQ_FLAG_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * We are holding the wait-queue lock, but the waiter that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * is waiting for this will be checking the flags without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * any locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * So update the flags atomically, and wake up the waiter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * afterwards to avoid any races. This store-release pairs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * with the load-acquire in wait_on_page_bit_common().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) wake_up_state(wait->private, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * Ok, we have successfully done what we're waiting for,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * and we can unconditionally remove the wait entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * Note that this pairs with the "finish_wait()" in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) * waiter, and has to be the absolute last thing we do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) * After this list_del_init(&wait->entry) the wait entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * might be de-allocated and the process might even have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) * exited.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) list_del_init_careful(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) return (flags & WQ_FLAG_EXCLUSIVE) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) static void wake_up_page_bit(struct page *page, int bit_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) struct wait_page_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) wait_queue_entry_t bookmark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) key.page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) key.bit_nr = bit_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) key.page_match = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) bookmark.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) bookmark.private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) bookmark.func = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) INIT_LIST_HEAD(&bookmark.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) spin_lock_irqsave(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) while (bookmark.flags & WQ_FLAG_BOOKMARK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * Take a breather from holding the lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * allow pages that finish wake up asynchronously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * to acquire the lock and remove themselves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * from wait queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) spin_unlock_irqrestore(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) spin_lock_irqsave(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * It is possible for other pages to have collided on the waitqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) * hash, so in that case check for a page match. That prevents a long-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) * term waiter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) * It is still possible to miss a case here, when we woke page waiters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * and removed them from the waitqueue, but there are still other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) * page waiters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) if (!waitqueue_active(q) || !key.page_match) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) ClearPageWaiters(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * It's possible to miss clearing Waiters here, when we woke
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * our page waiters, but the hashed waitqueue has waiters for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * other pages on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * That's okay, it's a rare case. The next waker will clear it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) spin_unlock_irqrestore(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) static void wake_up_page(struct page *page, int bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (!PageWaiters(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) wake_up_page_bit(page, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * A choice of three behaviors for wait_on_page_bit_common():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) enum behavior {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) EXCLUSIVE, /* Hold ref to page and take the bit when woken, like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * __lock_page() waiting on then setting PG_locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) SHARED, /* Hold ref to page and check the bit when woken, like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) * wait_on_page_writeback() waiting on PG_writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) DROP, /* Drop ref to page before wait, no check when woken,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * like put_and_wait_on_page_locked() on PG_locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * Attempt to check (or get) the page bit, and mark us done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * if successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) struct wait_queue_entry *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (wait->flags & WQ_FLAG_EXCLUSIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if (test_and_set_bit(bit_nr, &page->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) } else if (test_bit(bit_nr, &page->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) /* How many times do we accept lock stealing from under a waiter? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) int sysctl_page_lock_unfairness = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) static inline __sched int wait_on_page_bit_common(wait_queue_head_t *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) struct page *page, int bit_nr, int state, enum behavior behavior)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) int unfairness = sysctl_page_lock_unfairness;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) struct wait_page_queue wait_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) wait_queue_entry_t *wait = &wait_page.wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) bool thrashing = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) bool delayacct = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (bit_nr == PG_locked &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) !PageUptodate(page) && PageWorkingset(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (!PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) delayacct_thrashing_start();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) delayacct = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) thrashing = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) init_wait(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) wait->func = wake_page_function;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) wait_page.page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) wait_page.bit_nr = bit_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) wait->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) if (behavior == EXCLUSIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) wait->flags = WQ_FLAG_EXCLUSIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (--unfairness < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) wait->flags |= WQ_FLAG_CUSTOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) * Do one last check whether we can get the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) * page bit synchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) * Do the SetPageWaiters() marking before that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) * to let any waker we _just_ missed know they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) * need to wake us up (otherwise they'll never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) * even go to the slow case that looks at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) * page queue), and add ourselves to the wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) * queue if we need to sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) * This part needs to be done under the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) * lock to avoid races.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) spin_lock_irq(&q->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) SetPageWaiters(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (!trylock_page_bit_common(page, bit_nr, wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) __add_wait_queue_entry_tail(q, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) spin_unlock_irq(&q->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * From now on, all the logic will be based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) * the WQ_FLAG_WOKEN and WQ_FLAG_DONE flag, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * see whether the page bit testing has already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * been done by the wake function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) * We can drop our reference to the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) if (behavior == DROP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * Note that until the "finish_wait()", or until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * we see the WQ_FLAG_WOKEN flag, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) * be very careful with the 'wait->flags', because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * we may race with a waker that sets them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) set_current_state(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) /* Loop until we've been woken or interrupted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) flags = smp_load_acquire(&wait->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (!(flags & WQ_FLAG_WOKEN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (signal_pending_state(state, current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) io_schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) /* If we were non-exclusive, we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) if (behavior != EXCLUSIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) /* If the waker got the lock for us, we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) if (flags & WQ_FLAG_DONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) * Otherwise, if we're getting the lock, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) * try to get it ourselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * And if that fails, we'll have to retry this all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) wait->flags |= WQ_FLAG_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * If a signal happened, this 'finish_wait()' may remove the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * waiter from the wait-queues, but the PageWaiters bit will remain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * set. That's ok. The next wakeup will take care of it, and trying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * to do it here would be difficult and prone to races.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) finish_wait(q, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (thrashing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) if (delayacct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) delayacct_thrashing_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * NOTE! The wait->flags weren't stable until we've done the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * 'finish_wait()', and we could have exited the loop above due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) * to a signal, and had a wakeup event happen after the signal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) * test but before the 'finish_wait()'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) * So only after the finish_wait() can we reliably determine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) * if we got woken up or not, so we can now figure out the final
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) * return value based on that state without races.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) * waiter, but an exclusive one requires WQ_FLAG_DONE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) if (behavior == EXCLUSIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) __sched void wait_on_page_bit(struct page *page, int bit_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) EXPORT_SYMBOL(wait_on_page_bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) __sched int wait_on_page_bit_killable(struct page *page, int bit_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) EXPORT_SYMBOL(wait_on_page_bit_killable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) static int __wait_on_page_locked_async(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) struct wait_page_queue *wait, bool set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) struct wait_queue_head *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) wait->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) wait->bit_nr = PG_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) spin_lock_irq(&q->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) __add_wait_queue_entry_tail(q, &wait->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) SetPageWaiters(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) if (set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) ret = !trylock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) ret = PageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) * If we were succesful now, we know we're still on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) * waitqueue as we're still under the lock. This means it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) * safe to remove and return success, we know the callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) * isn't going to trigger.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) __remove_wait_queue(q, &wait->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) ret = -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) spin_unlock_irq(&q->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) static int wait_on_page_locked_async(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) struct wait_page_queue *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) if (!PageLocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) return __wait_on_page_locked_async(compound_head(page), wait, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * @page: The page to wait for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * The caller should hold a reference on @page. They expect the page to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * become unlocked relatively soon, but do not wish to hold up migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * (for example) by holding the reference while waiting for the page to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) * come unlocked. After this function returns, the caller should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) * dereference @page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) void put_and_wait_on_page_locked(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) wait_queue_head_t *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) * @page: Page defining the wait queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * @waiter: Waiter to add to the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * Add an arbitrary @waiter to the wait queue for the nominated @page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) spin_lock_irqsave(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) __add_wait_queue_entry_tail(q, waiter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) SetPageWaiters(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) spin_unlock_irqrestore(&q->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) EXPORT_SYMBOL_GPL(add_page_wait_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) #ifndef clear_bit_unlock_is_negative_byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) * PG_waiters is the high bit in the same byte as PG_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * On x86 (and on many other architectures), we can clear PG_lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * test the sign bit at the same time. But if the architecture does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * not support that special operation, we just do this all by hand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) * instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * The read of PG_waiters has to be after (or concurrently with) PG_locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * being cleared, but a memory barrier should be unnecessary since it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * in the same byte as PG_locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) clear_bit_unlock(nr, mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) /* smp_mb__after_atomic(); */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) return test_bit(PG_waiters, mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * unlock_page - unlock a locked page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * @page: the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * Unlocks the page and wakes up sleepers in wait_on_page_locked().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * Also wakes sleepers in wait_on_page_writeback() because the wakeup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * mechanism between PageLocked pages and PageWriteback pages is shared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) * Note that this depends on PG_waiters being the sign bit in the byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) * clear the PG_locked bit and test PG_waiters at the same time fairly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) * portably (architectures that do LL/SC can test any bit, while x86 can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) * test the sign bit).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) void unlock_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) BUILD_BUG_ON(PG_waiters != 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) wake_up_page_bit(page, PG_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) EXPORT_SYMBOL(unlock_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * end_page_writeback - end writeback against a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * @page: the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) void end_page_writeback(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * TestClearPageReclaim could be used here but it is an atomic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) * operation and overkill in this particular case. Failing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) * shuffle a page marked for immediate reclaim is too mild to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) * justify taking an atomic operation penalty at the end of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * ever page writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) if (PageReclaim(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) ClearPageReclaim(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) rotate_reclaimable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * Writeback does not hold a page reference of its own, relying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * on truncation to wait for the clearing of PG_writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * But here we must make sure that the page is not freed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) * reused before the wake_up_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (!test_clear_page_writeback(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) wake_up_page(page, PG_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) EXPORT_SYMBOL(end_page_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) * After completing I/O on a page, call this routine to update the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * flags appropriately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) void page_endio(struct page *page, bool is_write, int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) if (!is_write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) ClearPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) SetPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) SetPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) mapping_set_error(mapping, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) EXPORT_SYMBOL_GPL(page_endio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) * __lock_page - get a lock on the page, assuming we need to sleep to get it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) * @__page: the page to lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) __sched void __lock_page(struct page *__page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) struct page *page = compound_head(__page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) EXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) EXPORT_SYMBOL(__lock_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) __sched int __lock_page_killable(struct page *__page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) struct page *page = compound_head(__page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) wait_queue_head_t *q = page_waitqueue(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) EXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) EXPORT_SYMBOL_GPL(__lock_page_killable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) __sched int __lock_page_async(struct page *page, struct wait_page_queue *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) return __wait_on_page_locked_async(page, wait, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * Return values:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * 1 - page is locked; mmap_lock is still held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * 0 - page is not locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) * mmap_lock has been released (mmap_read_unlock(), unless flags had both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) * which case mmap_lock is still held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) * with the page locked and the mmap_lock unperturbed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) __sched int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) if (fault_flag_allow_retry_first(flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * CAUTION! In this case, mmap_lock is not released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * even though return 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) if (flags & FAULT_FLAG_RETRY_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) if (flags & FAULT_FLAG_KILLABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) wait_on_page_locked_killable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) if (flags & FAULT_FLAG_KILLABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) ret = __lock_page_killable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) __lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * page_cache_next_miss() - Find the next gap in the page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * @mapping: Mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * @index: Index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * @max_scan: Maximum range to search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) * gap with the lowest index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * This function may be called under the rcu_read_lock. However, this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * not atomically search a snapshot of the cache at a single point in time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) * For example, if a gap is created at index 5, then subsequently a gap is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * created at index 10, page_cache_next_miss covering both indices may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * return 10 if called under the rcu_read_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * Return: The index of the gap if found, otherwise an index outside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * range specified (in which case 'return - index >= max_scan' will be true).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) * In the rare case of index wrap-around, 0 will be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) pgoff_t page_cache_next_miss(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) pgoff_t index, unsigned long max_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) XA_STATE(xas, &mapping->i_pages, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) while (max_scan--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) void *entry = xas_next(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (!entry || xa_is_value(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (xas.xa_index == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) return xas.xa_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) EXPORT_SYMBOL(page_cache_next_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) * page_cache_prev_miss() - Find the previous gap in the page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) * @mapping: Mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) * @index: Index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) * @max_scan: Maximum range to search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) * Search the range [max(index - max_scan + 1, 0), index] for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) * gap with the highest index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) * This function may be called under the rcu_read_lock. However, this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) * not atomically search a snapshot of the cache at a single point in time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * For example, if a gap is created at index 10, then subsequently a gap is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) * created at index 5, page_cache_prev_miss() covering both indices may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) * return 5 if called under the rcu_read_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) * Return: The index of the gap if found, otherwise an index outside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) * range specified (in which case 'index - return >= max_scan' will be true).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) * In the rare case of wrap-around, ULONG_MAX will be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) pgoff_t page_cache_prev_miss(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) pgoff_t index, unsigned long max_scan)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) XA_STATE(xas, &mapping->i_pages, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) while (max_scan--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) void *entry = xas_prev(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) if (!entry || xa_is_value(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) if (xas.xa_index == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) return xas.xa_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) EXPORT_SYMBOL(page_cache_prev_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * find_get_entry - find and get a page cache entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * @mapping: the address_space to search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * @index: The page cache index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) * Looks up the page cache slot at @mapping & @offset. If there is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) * page cache page, the head page is returned with an increased refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) * If the slot holds a shadow entry of a previously evicted page, or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) * swap entry from shmem/tmpfs, it is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * Return: The head page or shadow entry, %NULL if nothing is found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) XA_STATE(xas, &mapping->i_pages, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) page = xas_load(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) * A shadow entry of a recently evicted page, or a swap entry from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * shmem/tmpfs. Return it without attempting to raise page count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) if (!page || xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) * Has the page moved or been split?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) * This is part of the lockless pagecache protocol. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) * include/linux/pagemap.h for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) if (unlikely(page != xas_reload(&xas))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * find_lock_entry - Locate and lock a page cache entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) * @mapping: The address_space to search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) * @index: The page cache index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) * Looks up the page at @mapping & @index. If there is a page in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) * cache, the head page is returned locked and with an increased refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * If the slot holds a shadow entry of a previously evicted page, or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * swap entry from shmem/tmpfs, it is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * Context: May sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) * Return: The head page or shadow entry, %NULL if nothing is found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) page = find_get_entry(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) if (page && !xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) /* Has the page been truncated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (unlikely(page->mapping != mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) VM_BUG_ON_PAGE(!thp_contains(page, index), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * pagecache_get_page - Find and get a reference to a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * @mapping: The address_space to search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * @index: The page index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) * @fgp_flags: %FGP flags modify how the page is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * Looks up the page cache entry at @mapping & @index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) * @fgp_flags can be zero or more of these flags:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) * * %FGP_ACCESSED - The page will be marked accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * * %FGP_LOCK - The page is returned locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) * * %FGP_HEAD - If the page is present and a THP, return the head page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) * rather than the exact page specified by the index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * * %FGP_CREAT - If no page is present then a new page is allocated using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * @gfp_mask and added to the page cache and the VM's LRU list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * The page is returned locked and with an increased refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * page is already in cache. If the page was allocated, unlock it before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * returning so the caller can do the same dance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * * %FGP_WRITE - The page will be written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * * %FGP_NOWAIT - Don't get blocked by page lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * if the %GFP flags specified for %FGP_CREAT are atomic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * If there is a page cache page, it is returned with an increased refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) * Return: The found page or %NULL otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) int fgp_flags, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) page = find_get_entry(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) trace_android_vh_pagecache_get_page(mapping, index, fgp_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) gfp_mask, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) if (fgp_flags & FGP_LOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) if (fgp_flags & FGP_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /* Has the page been truncated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) if (unlikely(page->mapping != mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) VM_BUG_ON_PAGE(!thp_contains(page, index), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) if (fgp_flags & FGP_ACCESSED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) else if (fgp_flags & FGP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) /* Clear idle flag for buffer write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) if (page_is_idle(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) clear_page_idle(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) if (!(fgp_flags & FGP_HEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) page = find_subpage(page, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) no_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) if (!page && (fgp_flags & FGP_CREAT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) gfp_mask |= __GFP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) if (fgp_flags & FGP_NOFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) gfp_mask &= ~__GFP_FS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) page = __page_cache_alloc(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) fgp_flags |= FGP_LOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) /* Init accessed so avoid atomic mark_page_accessed later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) if (fgp_flags & FGP_ACCESSED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) __SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (err == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) * add_to_page_cache_lru locks the page, and for mmap we expect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) * an unlocked page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) if (page && (fgp_flags & FGP_FOR_MMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) EXPORT_SYMBOL(pagecache_get_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) * find_get_entries - gang pagecache lookup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * @mapping: The address_space to search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) * @start: The starting page cache index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) * @nr_entries: The maximum number of entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) * @entries: Where the resulting entries are placed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) * @indices: The cache indices corresponding to the entries in @entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * find_get_entries() will search for and return a group of up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * @nr_entries entries in the mapping. The entries are placed at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * @entries. find_get_entries() takes a reference against any actual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * pages it returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) * The search returns a group of mapping-contiguous page cache entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) * with ascending indexes. There may be holes in the indices due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) * not-present pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) * Any shadow entries of evicted pages, or swap entries from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) * shmem/tmpfs, are included in the returned array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) * If it finds a Transparent Huge Page, head or tail, find_get_entries()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) * stops at that page: the caller is likely to have a better way to handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) * the compound page as a whole, and then skip its extent, than repeatedly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) * calling find_get_entries() to return all its tails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) * Return: the number of pages and shadow entries which were found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) unsigned find_get_entries(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) pgoff_t start, unsigned int nr_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) struct page **entries, pgoff_t *indices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) XA_STATE(xas, &mapping->i_pages, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) if (!nr_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) xas_for_each(&xas, page, ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) * A shadow entry of a recently evicted page, a swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) * entry from shmem/tmpfs or a DAX entry. Return it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) * without attempting to raise page count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) goto export;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) /* Has the page moved or been split? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) if (unlikely(page != xas_reload(&xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) goto put_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) * Terminate early on finding a THP, to allow the caller to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) * handle it all at once; but continue if this is hugetlbfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) if (PageTransHuge(page) && !PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) page = find_subpage(page, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) nr_entries = ret + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) export:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) indices[ret] = xas.xa_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) entries[ret] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (++ret == nr_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) put_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) * find_get_pages_range - gang pagecache lookup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) * @mapping: The address_space to search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) * @start: The starting page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) * @end: The final page index (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) * @nr_pages: The maximum number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) * @pages: Where the resulting pages are placed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) * find_get_pages_range() will search for and return a group of up to @nr_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * pages in the mapping starting at index @start and up to index @end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * (inclusive). The pages are placed at @pages. find_get_pages_range() takes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * a reference against the returned pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) * The search returns a group of mapping-contiguous pages with ascending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) * indexes. There may be holes in the indices due to not-present pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) * We also update @start to index the next page for the traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) * Return: the number of pages which were found. If this number is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) * smaller than @nr_pages, the end of specified range has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) * reached.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) pgoff_t end, unsigned int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) XA_STATE(xas, &mapping->i_pages, *start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) unsigned ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) if (unlikely(!nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) xas_for_each(&xas, page, end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) /* Skip over shadow, swap and DAX entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) /* Has the page moved or been split? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) if (unlikely(page != xas_reload(&xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) goto put_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) pages[ret] = find_subpage(page, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (++ret == nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) *start = xas.xa_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) put_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) * We come here when there is no page beyond @end. We take care to not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) * overflow the index @start as it confuses some of the callers. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) * breaks the iteration when there is a page at index -1 but that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) * already broken anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) if (end == (pgoff_t)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) *start = (pgoff_t)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) *start = end + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) * find_get_pages_contig - gang contiguous pagecache lookup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) * @mapping: The address_space to search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) * @index: The starting page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * @nr_pages: The maximum number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) * @pages: Where the resulting pages are placed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) * find_get_pages_contig() works exactly like find_get_pages(), except
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) * that the returned number of pages are guaranteed to be contiguous.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) * Return: the number of pages which were found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) unsigned int nr_pages, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) XA_STATE(xas, &mapping->i_pages, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) if (unlikely(!nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) for (page = xas_load(&xas); page; page = xas_next(&xas)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) * If the entry has been swapped out, we can stop looking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) * No current caller is looking for DAX entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) /* Has the page moved or been split? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) if (unlikely(page != xas_reload(&xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) goto put_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) pages[ret] = find_subpage(page, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) if (++ret == nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) put_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) EXPORT_SYMBOL(find_get_pages_contig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) * find_get_pages_range_tag - find and return pages in given range matching @tag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) * @mapping: the address_space to search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) * @index: the starting page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) * @end: The final page index (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * @tag: the tag index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * @nr_pages: the maximum number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * @pages: where the resulting pages are placed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) * Like find_get_pages, except we only return pages which are tagged with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) * @tag. We update @index to index the next page for the traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) * Return: the number of pages which were found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) XA_STATE(xas, &mapping->i_pages, *index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) unsigned ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) if (unlikely(!nr_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) xas_for_each_marked(&xas, page, end, tag) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) * Shadow entries should never be tagged, but this iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) * is lockless so there is a window for page reclaim to evict
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) * a page we saw tagged. Skip over it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) /* Has the page moved or been split? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) if (unlikely(page != xas_reload(&xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) goto put_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) pages[ret] = find_subpage(page, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (++ret == nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) *index = xas.xa_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) put_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) * We come here when we got to @end. We take care to not overflow the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) * index @index as it confuses some of the callers. This breaks the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) * iteration when there is a page at index -1 but that is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) * broken anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) if (end == (pgoff_t)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) *index = (pgoff_t)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) *index = end + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) EXPORT_SYMBOL(find_get_pages_range_tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) * CD/DVDs are error prone. When a medium error occurs, the driver may fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) * a _large_ part of the i/o request. Imagine the worst scenario:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * ---R__________________________________________B__________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * ^ reading here ^ bad block(assume 4k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) * read(R) => miss => readahead(R...B) => media error => frustrating retries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) * => failing the whole request => read(R) => read(R+1) =>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) * It is going insane. Fix it by quickly scaling down the readahead size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) static void shrink_readahead_size_eio(struct file_ra_state *ra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) ra->ra_pages /= 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * generic_file_buffered_read - generic file read routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * @iocb: the iocb to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) * @iter: data destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) * @written: already copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * This is a generic file read routine, and uses the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * mapping->a_ops->readpage() function for the actual low-level stuff.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * This is really ugly. But the goto's actually try to clarify some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * of the logic when it comes to error handling etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * * total number of bytes copied, including those the were already @written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) * * negative error code if nothing was copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) ssize_t generic_file_buffered_read(struct kiocb *iocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) struct iov_iter *iter, ssize_t written)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) struct file *filp = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) struct address_space *mapping = filp->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) struct file_ra_state *ra = &filp->f_ra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) loff_t *ppos = &iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) pgoff_t last_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) pgoff_t prev_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) unsigned long offset; /* offset into pagecache page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) unsigned int prev_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) index = *ppos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) prev_index = ra->prev_pos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) prev_offset = ra->prev_pos & (PAGE_SIZE-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) offset = *ppos & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * If we've already successfully copied some data, then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * can no longer safely return -EIOCBQUEUED. Hence mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * an async read NOWAIT at that point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) if (written && (iocb->ki_flags & IOCB_WAITQ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) iocb->ki_flags |= IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) pgoff_t end_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) loff_t isize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) unsigned long nr, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) find_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) error = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) page = find_get_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) if (iocb->ki_flags & IOCB_NOIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) goto would_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) page_cache_sync_readahead(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) ra, filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) index, last_index - index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) page = find_get_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) if (unlikely(page == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) goto no_cached_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) if (PageReadahead(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) if (iocb->ki_flags & IOCB_NOIO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) page_cache_async_readahead(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) ra, filp, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) index, last_index - index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * See comment in do_read_cache_page on why
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * wait_on_page_locked is used to avoid unnecessarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * serialisations and why it's safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) if (iocb->ki_flags & IOCB_WAITQ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) if (written) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) error = wait_on_page_locked_async(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) iocb->ki_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) goto would_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) error = wait_on_page_locked_killable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (unlikely(error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) goto readpage_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) goto page_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) if (inode->i_blkbits == PAGE_SHIFT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) !mapping->a_ops->is_partially_uptodate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) goto page_not_up_to_date;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) /* pipes can't handle partially uptodate pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) if (unlikely(iov_iter_is_pipe(iter)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) goto page_not_up_to_date;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) goto page_not_up_to_date;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) /* Did it get truncated before we got the lock? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) if (!page->mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) goto page_not_up_to_date_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) if (!mapping->a_ops->is_partially_uptodate(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) offset, iter->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) goto page_not_up_to_date_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) page_ok:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) * i_size must be checked after we know the page is Uptodate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) * Checking i_size after the check allows us to calculate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) * the correct value for "nr", which means the zero-filled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) * part of the page is not copied back to userspace (unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) * another truncate extends the file - this is desired though).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) isize = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) end_index = (isize - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) if (unlikely(!isize || index > end_index)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) /* nr is the maximum number of bytes to copy from this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) nr = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) if (index == end_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) nr = ((isize - 1) & ~PAGE_MASK) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) if (nr <= offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) nr = nr - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) /* If users can be writing to this page using arbitrary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) * virtual addresses, take care about potential aliasing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) * before reading the page on the kernel side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) if (mapping_writably_mapped(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) * When a sequential read accesses a page several times,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) * only mark it as accessed the first time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) if (prev_index != index || offset != prev_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) prev_index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) * Ok, we have the page, and it's up-to-date, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * now we can copy it to user space...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) ret = copy_page_to_iter(page, offset, nr, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) offset += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) index += offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) offset &= ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) prev_offset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) written += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) if (!iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) if (ret < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) error = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) page_not_up_to_date:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) /* Get exclusive access to the page ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) if (iocb->ki_flags & IOCB_WAITQ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) if (written) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) error = lock_page_async(page, iocb->ki_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) error = lock_page_killable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) if (unlikely(error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) goto readpage_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) page_not_up_to_date_locked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* Did it get truncated before we got the lock? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) if (!page->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) /* Did somebody else fill it already? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) goto page_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) readpage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) goto would_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) * A previous I/O error may have been due to temporary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) * failures, eg. multipath errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) * PG_error will be set again if readpage fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) ClearPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) /* Start the actual read. The read will unlock the page. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) error = mapping->a_ops->readpage(filp, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (unlikely(error)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (error == AOP_TRUNCATED_PAGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) goto find_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) goto readpage_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) if (iocb->ki_flags & IOCB_WAITQ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) if (written) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) error = lock_page_async(page, iocb->ki_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) error = lock_page_killable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) if (unlikely(error))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) goto readpage_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) if (page->mapping == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) * invalidate_mapping_pages got it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) goto find_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) shrink_readahead_size_eio(ra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) error = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) goto readpage_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) goto page_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) readpage_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) /* UHHUH! A synchronous read error occurred. Report it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) no_cached_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) * Ok, it wasn't cached, so we need to create a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * page..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) page = page_cache_alloc(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) error = add_to_page_cache_lru(page, mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) mapping_gfp_constraint(mapping, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) if (error == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) goto find_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) goto readpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) would_block:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) error = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) ra->prev_pos = prev_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) ra->prev_pos <<= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) ra->prev_pos |= prev_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) file_accessed(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) return written ? written : error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) EXPORT_SYMBOL_GPL(generic_file_buffered_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) * generic_file_read_iter - generic filesystem read routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) * @iocb: kernel I/O control block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) * @iter: destination for the data read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) * This is the "read_iter()" routine for all filesystems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) * that can use the page cache directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) * be returned when no data can be read without waiting for I/O requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) * to complete; it doesn't prevent readahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) * requests shall be made for the read or for readahead. When no data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) * can be read, -EAGAIN shall be returned. When readahead would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) * triggered, a partial, possibly empty read shall be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) * * number of bytes copied, even for partial reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) * * negative error code (or 0 if IOCB_NOIO) if nothing was read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) size_t count = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) ssize_t retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) if (!count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) goto out; /* skip atime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) if (iocb->ki_flags & IOCB_DIRECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) loff_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) if (filemap_range_has_page(mapping, iocb->ki_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) iocb->ki_pos + count - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) retval = filemap_write_and_wait_range(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) iocb->ki_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) iocb->ki_pos + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) retval = mapping->a_ops->direct_IO(iocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) if (retval >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) iocb->ki_pos += retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) count -= retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) iov_iter_revert(iter, count - iov_iter_count(iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * Btrfs can have a short DIO read if we encounter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * compressed extents, so if there was an error, or if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * we've already read everything we wanted to, or if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) * there was a short read because we hit EOF, go ahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) * and return. Otherwise fallthrough to buffered io for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) * the rest of the read. Buffered reads will not work for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) * DAX files, so don't bother trying.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) if (retval < 0 || !count || iocb->ki_pos >= size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) IS_DAX(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) retval = generic_file_buffered_read(iocb, iter, retval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) EXPORT_SYMBOL(generic_file_read_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) #define MMAP_LOTSAMISS (100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) * @vmf - the vm_fault for this fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) * @page - the page to lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) * @fpin - the pointer to the file we may pin (or is already pinned).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) * This works similar to lock_page_or_retry in that it can drop the mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) * It differs in that it actually returns the page locked if it returns 1 and 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) * will point to the pinned file and needs to be fput()'ed at a later point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) struct file **fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) if (trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * NOTE! This will make us return with VM_FAULT_RETRY, but with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) * the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) * is supposed to work. We have way too many special cases..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) if (vmf->flags & FAULT_FLAG_KILLABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) if (__lock_page_killable(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) * We didn't have the right flags to drop the mmap_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) * but all fault_handlers only check for fatal signals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * if we return VM_FAULT_RETRY, so we need to drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * mmap_lock here and return 0 if we don't have a fpin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) if (*fpin == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) mmap_read_unlock(vmf->vma->vm_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) __lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) * Synchronous readahead happens when we don't even find a page in the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) * cache at all. We don't want to perform IO under the mmap sem, so if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) * to drop the mmap sem we return the file that was pinned in order for us to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) * that. If we didn't pin a file then we return NULL. The file that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) * returned needs to be fput()'ed when we're done with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) struct file *file = vmf->vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) struct file_ra_state *ra = &file->f_ra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) struct file *fpin = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) unsigned int mmap_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) /* If we don't want any read-ahead, don't bother */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) if (vmf->vma->vm_flags & VM_RAND_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) if (!ra->ra_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) if (vmf->vma->vm_flags & VM_SEQ_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) fpin = maybe_unlock_mmap_for_io(vmf, fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) page_cache_sync_ra(&ractl, ra, ra->ra_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) /* Avoid banging the cache line if not needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) mmap_miss = READ_ONCE(ra->mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) if (mmap_miss < MMAP_LOTSAMISS * 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) * Do we miss much more than hit in this file? If so,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) * stop bothering with read-ahead. It will only hurt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) if (mmap_miss > MMAP_LOTSAMISS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) * mmap read-around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) fpin = maybe_unlock_mmap_for_io(vmf, fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) ra->size = ra->ra_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) ra->async_size = ra->ra_pages / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) ractl._index = ra->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) do_page_cache_ra(&ractl, ra->size, ra->async_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) * Asynchronous readahead happens when we find the page and PG_readahead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) * so we want to possibly extend the readahead further. We return the file that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) * was pinned if we have to drop the mmap_lock in order to do IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) struct file *file = vmf->vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) struct file_ra_state *ra = &file->f_ra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) struct file *fpin = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) unsigned int mmap_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) pgoff_t offset = vmf->pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) /* If we don't want any read-ahead, don't bother */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) mmap_miss = READ_ONCE(ra->mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) if (mmap_miss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) WRITE_ONCE(ra->mmap_miss, --mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) if (PageReadahead(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) fpin = maybe_unlock_mmap_for_io(vmf, fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) page_cache_async_readahead(mapping, ra, file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) page, offset, ra->ra_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) return fpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) * filemap_fault - read in file data for page fault handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) * @vmf: struct vm_fault containing details of the fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) * filemap_fault() is invoked via the vma operations vector for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) * mapped memory region to read in file data during a page fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) * The goto's are kind of ugly, but this streamlines the normal case of having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) * it in the page cache, and handles the special cases reasonably without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) * having a lot of duplicated code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) * vma->vm_mm->mmap_lock must be held on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) * If our return value does not have VM_FAULT_RETRY set, the mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) * has not been released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) * Return: bitwise-OR of %VM_FAULT_ codes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) vm_fault_t filemap_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) struct file *file = vmf->vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) struct file *fpin = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) struct file_ra_state *ra = &file->f_ra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) pgoff_t offset = vmf->pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) pgoff_t max_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) vm_fault_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) bool retry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) if (unlikely(offset >= max_off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) trace_android_vh_filemap_fault_get_page(vmf, &page, &retry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) if (unlikely(retry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) goto out_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) if (unlikely(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) goto page_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) * Do we have something in the page cache already?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) page = find_get_page(mapping, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) * We found the page, so try async readahead before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) * waiting for the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) fpin = do_async_mmap_readahead(vmf, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) } else if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) /* No page in the page cache at all */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) count_vm_event(PGMAJFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) ret = VM_FAULT_MAJOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) fpin = do_sync_mmap_readahead(vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) retry_find:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) page = pagecache_get_page(mapping, offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) FGP_CREAT|FGP_FOR_MMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) vmf->gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) if (fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) goto out_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) return VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) goto out_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) /* Did it get truncated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) if (unlikely(compound_head(page)->mapping != mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) goto retry_find;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) * We have a locked page in the page cache, now we need to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) * that it's up-to-date. If not, it is going to be due to an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) if (unlikely(!PageUptodate(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) goto page_not_uptodate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) * We've made it this far and we had to drop our mmap_lock, now is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) * time to return to the upper layer and have it re-find the vma and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) * redo the fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) if (fpin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) goto out_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) page_ok:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) * Found the page and have a reference on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) * We must recheck i_size under page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) if (unlikely(offset >= max_off)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) vmf->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) return ret | VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) page_not_uptodate:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) * Umm, take care of errors if the page isn't up-to-date.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) * Try to re-read it _once_. We do this synchronously,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) * because there really aren't any performance issues here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) * and we need to check for errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) ClearPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) fpin = maybe_unlock_mmap_for_io(vmf, fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) error = mapping->a_ops->readpage(file, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) if (!error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) if (!PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) error = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) if (fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) goto out_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) if (!error || error == AOP_TRUNCATED_PAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) goto retry_find;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) shrink_readahead_size_eio(ra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) out_retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) * We dropped the mmap_lock, we need to return to the fault handler to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) * re-find the vma and come back and find our hopefully still populated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) trace_android_vh_filemap_fault_cache_page(vmf, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) if (fpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) fput(fpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) return ret | VM_FAULT_RETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) EXPORT_SYMBOL(filemap_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) struct mm_struct *mm = vmf->vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) /* Huge page is mapped? No need to proceed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) if (pmd_trans_huge(*vmf->pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) vm_fault_t ret = do_set_pmd(vmf, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) /* The page is mapped successfully, reference consumed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) if (pmd_none(*vmf->pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) vmf->ptl = pmd_lock(mm, vmf->pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) if (likely(pmd_none(*vmf->pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) mm_inc_nr_ptes(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) vmf->prealloc_pte = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) spin_unlock(vmf->ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) /* See comment in handle_pte_fault() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) if (pmd_devmap_trans_unstable(vmf->pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) static struct page *next_uptodate_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) struct xa_state *xas, pgoff_t end_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) unsigned long max_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) if (xas_retry(xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) if (xa_is_value(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) if (PageLocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) if (!page_cache_get_speculative(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) /* Has the page moved or been split? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) if (unlikely(page != xas_reload(xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) if (!PageUptodate(page) || PageReadahead(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) if (PageHWPoison(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) if (page->mapping != mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) if (!PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) if (xas->xa_index >= max_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) static inline struct page *first_map_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) struct xa_state *xas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) pgoff_t end_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) return next_uptodate_page(xas_find(xas, end_pgoff),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) mapping, xas, end_pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) static inline struct page *next_map_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) struct xa_state *xas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) pgoff_t end_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) return next_uptodate_page(xas_next_entry(xas, end_pgoff),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) mapping, xas, end_pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) bool filemap_allow_speculation(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) EXPORT_SYMBOL_GPL(filemap_allow_speculation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) vm_fault_t filemap_map_pages(struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) pgoff_t start_pgoff, pgoff_t end_pgoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) struct file *file = vma->vm_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) pgoff_t last_pgoff = start_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) XA_STATE(xas, &mapping->i_pages, start_pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) vm_fault_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) head = first_map_page(mapping, &xas, end_pgoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) if (filemap_map_pmd(vmf, head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) if (pmd_none(*vmf->pmd) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) vmf->flags & FAULT_FLAG_SPECULATIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) ret = VM_FAULT_RETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) if (!pte_map_lock_addr(vmf, addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) unlock_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) put_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) ret = VM_FAULT_RETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) page = find_subpage(head, xas.xa_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) if (PageHWPoison(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) if (mmap_miss > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) mmap_miss--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) vmf->pte += xas.xa_index - last_pgoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) last_pgoff = xas.xa_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) if (!pte_none(*vmf->pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) /* We're about to handle the fault */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) if (vmf->address == addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) do_set_pte(vmf, page, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) /* no need to invalidate: a not-present page won't be cached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) update_mmu_cache(vma, addr, vmf->pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) unlock_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) unlock_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) put_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) pte_unmap_unlock(vmf->pte, vmf->ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) EXPORT_SYMBOL(filemap_map_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) struct page *page = vmf->page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) struct inode *inode = file_inode(vmf->vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) vm_fault_t ret = VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) sb_start_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) file_update_time(vmf->vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) if (page->mapping != inode->i_mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) * We mark the page dirty already here so that when freeze is in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) * progress, we are guaranteed that writeback during freezing will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) * see the dirty page and writeprotect it again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) wait_for_stable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) sb_end_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) const struct vm_operations_struct generic_file_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) .fault = filemap_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) .map_pages = filemap_map_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) .page_mkwrite = filemap_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) .allow_speculation = filemap_allow_speculation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) /* This is used for a general mmap of a disk file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) if (!mapping->a_ops->readpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) return -ENOEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) vma->vm_ops = &generic_file_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) * This is for filesystems which do not implement ->writepage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) return generic_file_mmap(file, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) return VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) #endif /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) EXPORT_SYMBOL(filemap_page_mkwrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) EXPORT_SYMBOL(generic_file_mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) EXPORT_SYMBOL(generic_file_readonly_mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) static struct page *wait_on_page_read(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) if (!IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) page = ERR_PTR(-EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) static struct page *do_read_cache_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) int (*filler)(void *, struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) page = find_get_page(mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) page = __page_cache_alloc(gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) err = add_to_page_cache_lru(page, mapping, index, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) if (err == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) /* Presumably ENOMEM for xarray node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) filler:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) if (filler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) err = filler(data, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) err = mapping->a_ops->readpage(data, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) page = wait_on_page_read(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) * Page is not up to date and may be locked due to one of the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) * case a: Page is being filled and the page lock is held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) * case b: Read/write error clearing the page uptodate status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) * case c: Truncation in progress (page locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) * case d: Reclaim in progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) * Case a, the page will be up to date when the page is unlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) * There is no need to serialise on the page lock here as the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) * is pinned so the lock gives no additional protection. Even if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) * page is truncated, the data is still valid if PageUptodate as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) * it's a race vs truncate race.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) * Case b, the page will not be up to date
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) * Case c, the page may be truncated but in itself, the data may still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) * be valid after IO completes as it's a read vs truncate race. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) * operation must restart if the page is not uptodate on unlock but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) * otherwise serialising on page lock to stabilise the mapping gives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) * no additional guarantees to the caller as the page lock is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) * released before return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) * Case d, similar to truncation. If reclaim holds the page lock, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) * will be a race with remove_mapping that determines if the mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) * is valid on unlock but otherwise the data is valid and there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) * no need to serialise with page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) * As the page lock gives no additional guarantee, we optimistically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) * wait on the page to be unlocked and check if it's up to date and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) * use the page if it is. Otherwise, the page lock is required to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) * distinguish between the different cases. The motivation is that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) * avoid spurious serialisations and wakeups when multiple processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) * wait on the same page for IO to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) /* Distinguish between all the cases under the safety of the lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) /* Case c or d, restart the operation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) if (!page->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) /* Someone else locked and filled the page in a very small window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) * A previous I/O error may have been due to temporary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) * failures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) * Clear page error before actual read, PG_error will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) * set again if read page fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) ClearPageError(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) goto filler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) * read_cache_page - read into page cache, fill it if needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) * @mapping: the page's address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) * @index: the page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) * @filler: function to perform the read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) * @data: first arg to filler(data, page) function, often left as NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) * Read into the page cache. If a page already exists, and PageUptodate() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) * not set, try to fill the page and wait for it to become unlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) * If the page does not get brought uptodate, return -EIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) * Return: up to date page on success, ERR_PTR() on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) struct page *read_cache_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) int (*filler)(void *, struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) return do_read_cache_page(mapping, index, filler, data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) mapping_gfp_mask(mapping));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) EXPORT_SYMBOL(read_cache_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) * read_cache_page_gfp - read into page cache, using specified page allocation flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) * @mapping: the page's address_space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) * @index: the page index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) * @gfp: the page allocator flags to use if allocating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) * This is the same as "read_mapping_page(mapping, index, NULL)", but with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) * any new page allocations done using the specified allocation flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) * If the page does not get brought uptodate, return -EIO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) * Return: up to date page on success, ERR_PTR() on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) struct page *read_cache_page_gfp(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) pgoff_t index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) return do_read_cache_page(mapping, index, NULL, NULL, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) EXPORT_SYMBOL(read_cache_page_gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) int pagecache_write_begin(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) loff_t pos, unsigned len, unsigned flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) struct page **pagep, void **fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) const struct address_space_operations *aops = mapping->a_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) return aops->write_begin(file, mapping, pos, len, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) pagep, fsdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) EXPORT_SYMBOL(pagecache_write_begin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) int pagecache_write_end(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) loff_t pos, unsigned len, unsigned copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) struct page *page, void *fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) const struct address_space_operations *aops = mapping->a_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) EXPORT_SYMBOL(pagecache_write_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) * Warn about a page cache invalidation failure during a direct I/O write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) void dio_warn_stale_pagecache(struct file *filp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) char pathname[128];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) errseq_set(&inode->i_mapping->wb_err, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) if (__ratelimit(&_rs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) path = file_path(filp, pathname, sizeof(pathname));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) if (IS_ERR(path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) path = "(unknown)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) current->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) loff_t pos = iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) ssize_t written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) size_t write_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) pgoff_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) write_len = iov_iter_count(from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) end = (pos + write_len - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) if (iocb->ki_flags & IOCB_NOWAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) /* If there are pages to writeback, return */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) if (filemap_range_has_page(inode->i_mapping, pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) pos + write_len - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) written = filemap_write_and_wait_range(mapping, pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) pos + write_len - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) if (written)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) * After a write we want buffered reads to be sure to go to disk to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) * the new data. We invalidate clean cached page from the region we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) * about to write. We do this *before* the write so that we can return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) * without clobbering -EIOCBQUEUED from ->direct_IO().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) written = invalidate_inode_pages2_range(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) pos >> PAGE_SHIFT, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * If a page can not be invalidated, return 0 to fall back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) * to buffered write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) if (written) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) if (written == -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) written = mapping->a_ops->direct_IO(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) * Finally, try again to invalidate clean pages which might have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) * cached by non-direct readahead, or faulted in by get_user_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) * if the source of the write was an mmap'ed region of the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) * we're writing. Either one is a pretty crazy thing to do,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) * so we don't support it 100%. If this invalidation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) * fails, tough, the write still worked...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) * Most of the time we do not need this since dio_complete() will do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) * the invalidation for us. However there are some file systems that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) * do not end up with dio_complete() being called, so let's not break
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) * them by removing it completely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) * Noticeable example is a blkdev_direct_IO().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) * Skip invalidation for async writes or if mapping has no pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) if (written > 0 && mapping->nrpages &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) dio_warn_stale_pagecache(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) if (written > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) pos += written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) write_len -= written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) i_size_write(inode, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) mark_inode_dirty(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) iocb->ki_pos = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) iov_iter_revert(from, write_len - iov_iter_count(from));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) return written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) EXPORT_SYMBOL(generic_file_direct_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) * Find or create a page at the given pagecache position. Return the locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) * page. This function is specifically for buffered writes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) struct page *grab_cache_page_write_begin(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) pgoff_t index, unsigned flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) if (flags & AOP_FLAG_NOFS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) fgp_flags |= FGP_NOFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) page = pagecache_get_page(mapping, index, fgp_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) mapping_gfp_mask(mapping));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) wait_for_stable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) EXPORT_SYMBOL(grab_cache_page_write_begin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) ssize_t generic_perform_write(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) struct iov_iter *i, loff_t pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) const struct address_space_operations *a_ops = mapping->a_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) long status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) ssize_t written = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) unsigned int flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) unsigned long offset; /* Offset into pagecache page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) unsigned long bytes; /* Bytes to write to page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) size_t copied; /* Bytes copied from user */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) void *fsdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) offset = (pos & (PAGE_SIZE - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) bytes = min_t(unsigned long, PAGE_SIZE - offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) iov_iter_count(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) * Bring in the user page that we will copy from _first_.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) * Otherwise there's a nasty deadlock on copying from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) * same page as we're writing to, without it being marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) * up-to-date.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) * Not only is this an optimisation, but it is also required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) * to check that the address is actually valid, when atomic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) * usercopies are used, below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) status = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) status = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) status = a_ops->write_begin(file, mapping, pos, bytes, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) &page, &fsdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) if (unlikely(status < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) if (mapping_writably_mapped(mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) status = a_ops->write_end(file, mapping, pos, bytes, copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) page, fsdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) if (unlikely(status < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) copied = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) iov_iter_advance(i, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) if (unlikely(copied == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) * If we were unable to copy any data at all, we must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) * fall back to a single segment length write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) * If we didn't fallback here, we could livelock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) * because not all segments in the iov can be copied at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) * once without a pagefault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) bytes = min_t(unsigned long, PAGE_SIZE - offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) iov_iter_single_seg_count(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) pos += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) written += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) balance_dirty_pages_ratelimited(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) } while (iov_iter_count(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) return written ? written : status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) EXPORT_SYMBOL(generic_perform_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) * __generic_file_write_iter - write data to a file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) * @iocb: IO state structure (file, offset, etc.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) * @from: iov_iter with data to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) * This function does all the work needed for actually writing data to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) * file. It does all basic checks, removes SUID from the file, updates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) * modification times and calls proper subroutines depending on whether we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) * do direct IO or a standard buffered write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) * It expects i_mutex to be grabbed unless we work on a block device or similar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) * object which does not need locking at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) * This function does *not* take care of syncing data in case of O_SYNC write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) * A caller has to handle it. This is mainly due to the fact that we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) * avoid syncing under i_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) * * number of bytes written, even for truncated writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) * * negative error code if no data has been written at all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) struct address_space * mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) ssize_t written = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) ssize_t err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) ssize_t status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) /* We can write back this queue in page reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) current->backing_dev_info = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) err = file_remove_privs(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) err = file_update_time(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) if (iocb->ki_flags & IOCB_DIRECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) loff_t pos, endbyte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) written = generic_file_direct_write(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) * If the write stopped short of completing, fall back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) * buffered writes. Some filesystems do this for writes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) * holes, for example. For DAX files, a buffered write will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) * not succeed (even if it did, DAX does not handle dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) * page-cache pages correctly).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) status = generic_perform_write(file, from, pos = iocb->ki_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) * If generic_perform_write() returned a synchronous error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) * then we want to return the number of bytes which were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) * direct-written, or the error code if that was zero. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) * that this differs from normal direct-io semantics, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) * will return -EFOO even if some bytes were written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) if (unlikely(status < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) err = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) * We need to ensure that the page cache pages are written to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) * disk and invalidated to preserve the expected O_DIRECT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) * semantics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) endbyte = pos + status - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) err = filemap_write_and_wait_range(mapping, pos, endbyte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) if (err == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) iocb->ki_pos = endbyte + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) written += status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) invalidate_mapping_pages(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) pos >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) endbyte >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) * We don't know how much we wrote, so just return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) * the number of bytes which were direct-written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) written = generic_perform_write(file, from, iocb->ki_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) if (likely(written > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) iocb->ki_pos += written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) current->backing_dev_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) return written ? written : err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) EXPORT_SYMBOL(__generic_file_write_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) * generic_file_write_iter - write data to a file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) * @iocb: IO state structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) * @from: iov_iter with data to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) * This is a wrapper around __generic_file_write_iter() to be used by most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) * filesystems. It takes care of syncing the file in case of O_SYNC file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) * and acquires i_mutex as needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) * Return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) * * negative error code if no data has been written at all of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) * vfs_fsync_range() failed for a synchronous write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) * * number of bytes written, even for truncated writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) struct inode *inode = file->f_mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) ret = generic_write_checks(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) ret = __generic_file_write_iter(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) ret = generic_write_sync(iocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) EXPORT_SYMBOL(generic_file_write_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) * try_to_release_page() - release old fs-specific metadata on a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) * @page: the page which the kernel is trying to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) * @gfp_mask: memory allocation flags (and I/O mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) * The address_space is to try to release any data against the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) * (presumably at page->private).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) * This may also be called if PG_fscache is set on a page, indicating that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) * page is known to the local caching routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) * The @gfp_mask argument specifies whether I/O may be performed to release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) * Return: %1 if the release was successful, otherwise return zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) int try_to_release_page(struct page *page, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) struct address_space * const mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) if (PageWriteback(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) if (mapping && mapping->a_ops->releasepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) return mapping->a_ops->releasepage(page, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) return try_to_free_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) EXPORT_SYMBOL(try_to_release_page);