^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * mm/rmap.c - physical to virtual reverse mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Released under the General Public License (GPL).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Simple, low overhead reverse mapping scheme.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Please try to keep this thing as modular as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Provides methods for unmapping each kind of mapped page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * the anon methods track anonymous pages, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * the file methods track pages belonging to an inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Original design by Rik van Riel <riel@conectiva.com.br> 2001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Contributions by Hugh Dickins 2003, 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Lock ordering in mm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * inode->i_mutex (while writing or truncating, not reading or faulting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * mm->mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * page->flags PG_locked (lock_page) * (see huegtlbfs below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * mapping->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * anon_vma->rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * mm->page_table_lock or pte_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * swap_lock (in swap_duplicate, swap_info_get)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * mmlist_lock (in mmput, drain_mmlist and others)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * mapping->private_lock (in __set_page_dirty_buffers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * i_pages lock (widely used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * sb_lock (within inode_lock in fs/fs-writeback.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * i_pages lock (widely used, in set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * in arch-dependent flush_dcache_mmap_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * within bdi.wb->list_lock in __sync_single_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * ->tasklist_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * pte map lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * * hugetlbfs PageHuge() pages take locks in this order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * mapping->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * page->flags PG_locked (lock_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <linux/huge_mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <linux/page_idle.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #include <trace/events/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) static struct kmem_cache *anon_vma_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static struct kmem_cache *anon_vma_chain_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static inline struct anon_vma *anon_vma_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) atomic_set(&anon_vma->refcount, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) anon_vma->degree = 1; /* Reference for first vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) anon_vma->parent = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Initialise the anon_vma root to point to itself. If called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * from fork, the root will be reset to the parents anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) anon_vma->root = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static inline void anon_vma_free(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) VM_BUG_ON(atomic_read(&anon_vma->refcount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Synchronize against page_lock_anon_vma_read() such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * we can safely hold the lock without the anon_vma getting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Relies on the full mb implied by the atomic_dec_and_test() from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * put_anon_vma() against the acquire barrier implied by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * down_read_trylock() from page_lock_anon_vma_read(). This orders:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * page_lock_anon_vma_read() VS put_anon_vma()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * down_read_trylock() atomic_dec_and_test()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * LOCK MB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * atomic_read() rwsem_is_locked()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * LOCK should suffice since the actual taking of the lock must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * happen _before_ what follows.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (rwsem_is_locked(&anon_vma->root->rwsem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) kmem_cache_free(anon_vma_cachep, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static void anon_vma_chain_link(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) struct anon_vma_chain *avc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) avc->vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) avc->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) list_add(&avc->same_vma, &vma->anon_vma_chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * __anon_vma_prepare - attach an anon_vma to a memory region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * @vma: the memory region in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * This makes sure the memory mapping described by 'vma' has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * an 'anon_vma' attached to it, so that we can associate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * anonymous pages mapped into it with that anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * The common case will be that we already have one, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * is handled inline by anon_vma_prepare(). But if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * not we either need to find an adjacent mapping that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * can re-use the anon_vma from (very common when the only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * reason for splitting a vma has been mprotect()), or we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * allocate a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * Anon-vma allocations are very subtle, because we may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * optimistically looked up an anon_vma in page_lock_anon_vma_read()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * and that may actually touch the spinlock even in the newly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * allocated vma (it depends on RCU to make sure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * anon_vma isn't actually destroyed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * As a result, we need to do proper anon_vma locking even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * for the new allocation. At the same time, we do not want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * to do any locking for the common case of already having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * an anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * This must be called with the mmap_lock held for reading.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) int __anon_vma_prepare(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct anon_vma *anon_vma, *allocated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) goto out_enomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) anon_vma = find_mergeable_anon_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) allocated = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (!anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) anon_vma = anon_vma_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (unlikely(!anon_vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) goto out_enomem_free_avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) allocated = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /* page_table_lock to protect against threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (likely(!vma->anon_vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) vma->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) anon_vma_chain_link(vma, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) /* vma reference or self-parent link for new root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) anon_vma->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) allocated = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) avc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (unlikely(allocated))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) put_anon_vma(allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (unlikely(avc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) out_enomem_free_avc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) out_enomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * This is a useful helper function for locking the anon_vma root as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * we traverse the vma->anon_vma_chain, looping over anon_vma's that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * have the same vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * Such anon_vma's should have the same root, so you'd expect to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * just a single mutex_lock for the whole traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) struct anon_vma *new_root = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (new_root != root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (WARN_ON_ONCE(root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) up_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) root = new_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) down_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) return root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) static inline void unlock_anon_vma_root(struct anon_vma *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) up_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * Attach the anon_vmas from src to dst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * Returns 0 on success, -ENOMEM on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * anon_vma_fork(). The first three want an exact copy of src, while the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * and reuse existing anon_vma which has no vmas and only one child anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * This prevents degradation of anon_vma hierarchy to endless linear chain in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * case of constantly forking task. On the other hand, an anon_vma with more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * than one child isn't reused even if there was no alive vma, thus rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * walker has a good chance of avoiding scanning the whole hierarchy when it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * searches where page is mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) struct anon_vma_chain *avc, *pavc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) struct anon_vma *root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (unlikely(!avc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) goto enomem_failure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) anon_vma = pavc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) root = lock_anon_vma_root(root, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) anon_vma_chain_link(dst, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * Reuse existing anon_vma if its degree lower than two,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * that means it has no vma and only one anon_vma child.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * Do not chose parent anon_vma, otherwise first child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * will always reuse it. Root anon_vma is never reused:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * it has self-parent reference and at least one child.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (!dst->anon_vma && src->anon_vma &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) anon_vma != src->anon_vma && anon_vma->degree < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) dst->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (dst->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) dst->anon_vma->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) enomem_failure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * dst->anon_vma is dropped here otherwise its degree can be incorrectly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * decremented in unlink_anon_vmas().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * We can safely do this because callers of anon_vma_clone() don't care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * about dst->anon_vma if anon_vma_clone() failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) dst->anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) unlink_anon_vmas(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * Attach vma to its own anon_vma, as well as to the anon_vmas that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * the corresponding VMA in the parent process is attached to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * Returns 0 on success, non-zero on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /* Don't bother if the parent process has no anon_vma here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (!pvma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) vma->anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * First, attach the new VMA to the parent VMA's anon_vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * so rmap can find non-COWed pages in child processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) error = anon_vma_clone(vma, pvma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) /* An existing anon_vma has been reused, all done then. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /* Then add our own anon_vma. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) anon_vma = anon_vma_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) goto out_error_free_anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * The root anon_vma's spinlock is the lock actually used when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * lock any of the anon_vmas in this anon_vma tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) anon_vma->root = pvma->anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) anon_vma->parent = pvma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * With refcounts, an anon_vma can stay around longer than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * process it belongs to. The root anon_vma needs to be pinned until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * this anon_vma is freed, because the lock lives in the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) get_anon_vma(anon_vma->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /* Mark this anon_vma as the one where our new (COWed) pages go. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) vma->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) anon_vma_chain_link(vma, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) anon_vma->parent->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) out_error_free_anon_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) unlink_anon_vmas(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) void unlink_anon_vmas(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) struct anon_vma_chain *avc, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) struct anon_vma *root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * Unlink each anon_vma chained to the VMA. This list is ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * from newest to oldest, ensuring the root anon_vma gets freed last.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) struct anon_vma *anon_vma = avc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) root = lock_anon_vma_root(root, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * Leave empty anon_vmas on the list - we'll need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * to free them outside the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) anon_vma->parent->degree--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) list_del(&avc->same_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) vma->anon_vma->degree--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * Iterate the list once more, it now only contains empty and unlinked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * needing to write-acquire the anon_vma->root->rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct anon_vma *anon_vma = avc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) VM_WARN_ON(anon_vma->degree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) list_del(&avc->same_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) static void anon_vma_ctor(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) struct anon_vma *anon_vma = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) init_rwsem(&anon_vma->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) atomic_set(&anon_vma->refcount, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) anon_vma->rb_root = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) void __init anon_vma_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) anon_vma_ctor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) SLAB_PANIC|SLAB_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * Since there is no serialization what so ever against page_remove_rmap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * the best this function can do is return a locked anon_vma that might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * have been relevant to this page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * The page might have been remapped to a different anon_vma or the anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * returned may already be freed (and even reused).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * In case it was remapped to a different anon_vma, the new anon_vma will be a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * child of the old anon_vma, and the anon_vma lifetime rules will therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * ensure that any anon_vma obtained from the page will still be valid for as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * long as we observe page_mapped() [ hence all those page_mapped() tests ].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * All users of this function must be very careful when walking the anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * chain and verify that the page in question is indeed mapped in it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * [ something equivalent to page_mapped_in_vma() ].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * if there is a mapcount, we can dereference the anon_vma after observing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * those.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) struct anon_vma *page_get_anon_vma(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) unsigned long anon_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) anon_mapping = (unsigned long)READ_ONCE(page->mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) if (!atomic_inc_not_zero(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * If this page is still mapped, then its anon_vma cannot have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * freed. But if it has been unmapped, we have no security against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * anon_vma structure being freed and reused (for another anon_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * above cannot corrupt).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * Similar to page_get_anon_vma() except it locks the anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * Its a little more complex as it tries to keep the fast path to a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * reference like with page_get_anon_vma() and then block on the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct anon_vma *page_lock_anon_vma_read(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) struct anon_vma *root_anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) unsigned long anon_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) anon_mapping = (unsigned long)READ_ONCE(page->mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) root_anon_vma = READ_ONCE(anon_vma->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (down_read_trylock(&root_anon_vma->rwsem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * If the page is still mapped, then this anon_vma is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * its anon_vma, and holding the mutex ensures that it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * not go away, see anon_vma_free().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) up_read(&root_anon_vma->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) /* trylock failed, we got to sleep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (!atomic_inc_not_zero(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) /* we pinned the anon_vma, its safe to sleep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) anon_vma_lock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (atomic_dec_and_test(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * Oops, we held the last refcount, release the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * and bail -- can't simply use put_anon_vma() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * we'll deadlock on the anon_vma_lock_write() recursion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) __put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * Flush TLB entries for recently unmapped pages from remote CPUs. It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * important if a PTE was dirty when it was unmapped that it's flushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * before any IO is initiated on the page to prevent lost writes. Similarly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) * it must be flushed before freeing to prevent data leakage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) void try_to_unmap_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) if (!tlb_ubc->flush_required)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) arch_tlbbatch_flush(&tlb_ubc->arch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) tlb_ubc->flush_required = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) tlb_ubc->writable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /* Flush iff there are potentially writable TLB entries that can race with IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) void try_to_unmap_flush_dirty(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (tlb_ubc->writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) try_to_unmap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) tlb_ubc->flush_required = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * Ensure compiler does not re-order the setting of tlb_flush_batched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * before the PTE is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) mm->tlb_flush_batched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) * If the PTE was dirty then it's best to assume it's writable. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) * before the page is queued for IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) tlb_ubc->writable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) * Returns true if the TLB flush should be deferred to the end of a batch of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * unmap operations to reduce IPIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) bool should_defer = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (!(flags & TTU_BATCH_FLUSH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) /* If remote CPUs need to be flushed then defer batch the flush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) should_defer = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return should_defer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * releasing the PTL if TLB flushes are batched. It's possible for a parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * operation such as mprotect or munmap to race between reclaim unmapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * the page and flushing the page. If this race occurs, it potentially allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * access to data via a stale TLB entry. Tracking all mm's that have TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * batching in flight would be expensive during reclaim so instead track
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * whether TLB batching occurred in the past and if so then do a flush here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * if required. This will cost one additional flush per reclaim cycle paid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * by the first operation at risk such as mprotect and mumap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * This must be called under the PTL so that an access to tlb_flush_batched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * via the PTL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) void flush_tlb_batched_pending(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (data_race(mm->tlb_flush_batched)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) flush_tlb_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * Do not allow the compiler to re-order the clearing of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * tlb_flush_batched before the tlb is flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) mm->tlb_flush_batched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * At what user virtual address is page expected in vma?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * Caller should check the page is actually part of the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) struct anon_vma *page__anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * Note: swapoff's unuse_vma() is more efficient with this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * check, and needs it to match anon_vma when KSM is active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) if (!vma->anon_vma || !page__anon_vma ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) vma->anon_vma->root != page__anon_vma->root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) } else if (!vma->vm_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) return vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) pmd_t *pmd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) pmd_t pmde;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) pgd = pgd_offset(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (!p4d_present(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (!pud_present(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * without holding anon_vma lock for write. So when looking for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * genuine pmde (in which to find pte), test present and !THP together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) pmde = *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) if (!pmd_present(pmde) || pmd_trans_huge(pmde))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) pmd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) struct page_referenced_arg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) int mapcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) int referenced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) unsigned long vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) struct mem_cgroup *memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * arg: page_referenced_arg will be passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) struct page_referenced_arg *pra = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) .page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) .vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) .address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) int referenced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) pra->vm_flags |= VM_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) return false; /* To break the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) if (pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (ptep_clear_flush_young_notify(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) pvmw.pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * Don't treat a reference through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * a sequentially read mapping as such.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * If the page has been used in another mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * we will catch it; if this other mapping is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * already gone, the unmap path will have set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) * PG_referenced or activated the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) if (likely(!(vma->vm_flags & VM_SEQ_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) if (pmdp_clear_flush_young_notify(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) pvmw.pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) /* unexpected pmd-mapped page? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) pra->mapcount--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) if (referenced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) clear_page_idle(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (test_and_clear_page_young(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (referenced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) pra->referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) pra->vm_flags |= vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) trace_android_vh_page_referenced_one_end(vma, page, referenced);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (!pra->mapcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) return false; /* To break the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) struct page_referenced_arg *pra = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) struct mem_cgroup *memcg = pra->memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (!mm_match_cgroup(vma->vm_mm, memcg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * page_referenced - test if the page was referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * @page: the page to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * @is_locked: caller holds lock on the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * @memcg: target memory cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * Quick test_and_clear_referenced for all mappings to a page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * returns the number of ptes which referenced the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) int page_referenced(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) int is_locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) struct mem_cgroup *memcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) unsigned long *vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) int we_locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) struct page_referenced_arg pra = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) .mapcount = total_mapcount(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) .memcg = memcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) .rmap_one = page_referenced_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) .arg = (void *)&pra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) .anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) *vm_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (!pra.mapcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (!page_rmapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) we_locked = trylock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) if (!we_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * If we are reclaiming on behalf of a cgroup, skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * counting on behalf of references from different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) if (memcg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) rwc.invalid_vma = invalid_page_referenced_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) *vm_flags = pra.vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) if (we_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) return pra.referenced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) .page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) .vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) .address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) .flags = PVMW_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) int *cleaned = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * We have to assume the worse case ie pmd for invalidation. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * the page can not be free from this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) 0, vma, vma->vm_mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) vma_address_end(page, vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) if (pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) pte_t *pte = pvmw.pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) if (!pte_dirty(*pte) && !pte_write(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) flush_cache_page(vma, address, pte_pfn(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) entry = ptep_clear_flush(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) entry = pte_wrprotect(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) entry = pte_mkclean(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) set_pte_at(vma->vm_mm, address, pte, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) pmd_t *pmd = pvmw.pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) pmd_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) flush_cache_page(vma, address, page_to_pfn(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) entry = pmdp_invalidate(vma, address, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) entry = pmd_wrprotect(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) entry = pmd_mkclean(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) set_pmd_at(vma->vm_mm, address, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) /* unexpected pmd-mapped page? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * No need to call mmu_notifier_invalidate_range() as we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * downgrading page table protection not changing it to point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) * to a new page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) (*cleaned)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (vma->vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) int page_mkclean(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) int cleaned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) .arg = (void *)&cleaned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) .rmap_one = page_mkclean_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) .invalid_vma = invalid_mkclean_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) return cleaned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) EXPORT_SYMBOL_GPL(page_mkclean);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * page_move_anon_rmap - move a page to our anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * @page: the page to move to our anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * @vma: the vma the page belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * When a page belongs exclusively to one process after a COW event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * that page can be moved into the anon_vma that belongs to just that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * process, so the rmap code will not search the parent or sibling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) VM_BUG_ON_VMA(!anon_vma, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * simultaneously, so a concurrent reader (eg page_referenced()'s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * PageAnon()) will not see one without the other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * __page_set_anon_rmap - set up new anonymous rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * @page: Page or Hugepage to add to rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * @vma: VM area to add page to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * @address: User virtual address of the mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * @exclusive: the page is exclusively owned by the current process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) static void __page_set_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) struct vm_area_struct *vma, unsigned long address, int exclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) BUG_ON(!anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * If the page isn't exclusively mapped into this vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * we must use the _oldest_ possible anon_vma for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * page mapping!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (!exclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) anon_vma = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) page->mapping = (struct address_space *) anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) page->index = linear_page_index(vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) * __page_check_anon_rmap - sanity check anonymous rmap addition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * @page: the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * @vma: the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * @address: the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) static void __page_check_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * The page's anon-rmap details (mapping and index) are guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * be set up correctly at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * We have exclusion against page_add_anon_rmap because the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * always holds the page locked, except if called from page_dup_rmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) * in which case the page is already known to be setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * We have exclusion against page_add_new_anon_rmap because those pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * are initially only visible via the pagetables, and the pte is locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * over the call to page_add_new_anon_rmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * page_add_anon_rmap - add pte mapping to an anonymous page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * @page: the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * @vma: the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * @address: the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * @compound: charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * The caller needs to hold the pte lock, and the page must be locked in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) * the anon_vma case: to serialize mapping,index checking after setting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) * and to ensure that PageAnon is not being upgraded racily to PageKsm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * (but PageKsm is never downgraded to PageAnon).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) void page_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) struct vm_area_struct *vma, unsigned long address, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * Special version of the above for do_swap_page, which often runs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * into pages that are exclusively owned by the current process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * Everybody else should continue to use page_add_anon_rmap above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) void do_page_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) struct vm_area_struct *vma, unsigned long address, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) bool compound = flags & RMAP_COMPOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) bool first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (unlikely(PageKsm(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) atomic_t *mapcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) VM_BUG_ON_PAGE(!PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) mapcount = compound_mapcount_ptr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) first = atomic_inc_and_test(mapcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) first = atomic_inc_and_test(&page->_mapcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) if (first) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) int nr = compound ? thp_nr_pages(page) : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * We use the irq-unsafe __{inc|mod}_zone_page_stat because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * pte lock(a spinlock) is held, which implies preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) if (compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) __inc_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (unlikely(PageKsm(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) /* address might be in next vma when migration races vma_adjust */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) if (first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) __page_set_anon_rmap(page, vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) flags & RMAP_EXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) __page_check_anon_rmap(page, vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * __page_add_new_anon_rmap - add pte mapping to a new anonymous page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) * @page: the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) * @vma: the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) * @address: the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * @compound: charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * Same as page_add_anon_rmap but must only be called on *new* pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) * This means the inc-and-test can be bypassed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * Page does not have to be locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) void __page_add_new_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) struct vm_area_struct *vma, unsigned long address, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) int nr = compound ? thp_nr_pages(page) : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) __SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) VM_BUG_ON_PAGE(!PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) /* increment count (starts at -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) atomic_set(compound_mapcount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) atomic_set(compound_pincount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) __inc_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) /* Anon THP always mapped first with PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) VM_BUG_ON_PAGE(PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) /* increment count (starts at -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) atomic_set(&page->_mapcount, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) __page_set_anon_rmap(page, vma, address, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * page_add_file_rmap - add pte mapping to a file page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) * @page: the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) * @compound: charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) * The caller needs to hold the pte lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) void page_add_file_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) int i, nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (compound && PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (atomic_inc_and_test(&page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) __inc_node_page_state(page, NR_FILE_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) if (PageTransCompound(page) && page_mapping(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) VM_WARN_ON_ONCE(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) SetPageDoubleMap(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if (PageMlocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) clear_page_mlock(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) if (!atomic_inc_and_test(&page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) static void page_remove_file_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) int i, nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) VM_BUG_ON_PAGE(compound && !PageHead(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) /* Hugepages are not counted in NR_FILE_MAPPED for now. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) if (unlikely(PageHuge(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /* hugetlb pages are always mapped with pmds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) atomic_dec(compound_mapcount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) /* page still mapped by someone else? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (compound && PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) if (atomic_add_negative(-1, &page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) __dec_node_page_state(page, NR_FILE_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (!atomic_add_negative(-1, &page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * pte lock(a spinlock) is held, which implies preemption disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) static void page_remove_anon_compound_rmap(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) int i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) /* Hugepages are not counted in NR_ANON_PAGES for now. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (unlikely(PageHuge(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) __dec_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) if (TestClearPageDoubleMap(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) * Subpages can be mapped with PTEs too. Check how many of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) * them are still mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) if (atomic_add_negative(-1, &page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * Queue the page for deferred split if at least one small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * page of the compound page is unmapped, but at least one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * small page is still mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) if (nr && nr < thp_nr_pages(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) deferred_split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) nr = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) if (nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) * page_remove_rmap - take down pte mapping from a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) * @page: page to remove mapping from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) * @compound: uncharge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) * The caller needs to hold the pte lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) void page_remove_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) if (!PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) page_remove_file_rmap(page, compound);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) page_remove_anon_compound_rmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) /* page still mapped by someone else? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) if (!atomic_add_negative(-1, &page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * We use the irq-unsafe __{inc|mod}_zone_page_stat because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * pte lock(a spinlock) is held, which implies preemption disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) __dec_lruvec_page_state(page, NR_ANON_MAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) if (PageTransCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) deferred_split_huge_page(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) * It would be tidy to reset the PageAnon mapping here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * but that might overwrite a racing page_add_anon_rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) * which increments mapcount after us but sets mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) * before us: so leave the reset to free_unref_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) * and remember that it's only reliable while mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) * Leaving it set also helps swapoff to reinstate ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) * faster for those pages still in swapcache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) * @arg: enum ttu_flags will be passed to this argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) .page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) .vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) .address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) pte_t pteval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) struct page *subpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) enum ttu_flags flags = (enum ttu_flags)(long)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) * When racing against e.g. zap_pte_range() on another cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * in between its ptep_get_and_clear_full() and page_remove_rmap(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * try_to_unmap() may return false when it is about to become true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) * if page table locking is skipped: use TTU_SYNC to wait for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) if (flags & TTU_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) pvmw.flags = PVMW_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) /* munlock has nothing to gain from examining un-locked vmas */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) is_zone_device_page(page) && !is_device_private_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) if (flags & TTU_SPLIT_HUGE_PMD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) split_huge_pmd_address(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) flags & TTU_SPLIT_FREEZE, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * For THP, we have to assume the worse case ie pmd for invalidation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) * For hugetlb, it could be much worse if we need to do pud
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * invalidation in the case of pmd sharing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * Note that the page can not be free in this function as call of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) * try_to_unmap() must hold a reference on the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) range.end = PageKsm(page) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) address + PAGE_SIZE : vma_address_end(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) address, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) * If sharing is possible, start and end will be adjusted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) * accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) adjust_range_if_pmd_sharing_possible(vma, &range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) &range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) /* PMD-mapped THP migration entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) if (!pvmw.pte && (flags & TTU_MIGRATION)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) set_pmd_migration_entry(&pvmw, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * If the page is mlock()d, we cannot swap it out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * If it's recently referenced (perhaps page_referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * skipped over this mm) then we should reactivate it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) if (!(flags & TTU_IGNORE_MLOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) /* PTE-mapped THP are never mlocked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (!PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * Holding pte lock, we do *not* need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * mmap_lock here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) mlock_vma_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) if (flags & TTU_MUNLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) /* Unexpected PMD-mapped THP? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) VM_BUG_ON_PAGE(!pvmw.pte, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) if (PageHuge(page) && !PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) * To call huge_pmd_unshare, i_mmap_rwsem must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) * held in write mode. Caller needs to explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * do this outside rmap routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * huge_pmd_unshare unmapped an entire PMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * page. There is no way of knowing exactly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) * which PMDs may be cached for this mm, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) * we must flush them all. start/end were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) * already adjusted above to cover this range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) flush_cache_range(vma, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) flush_tlb_range(vma, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) mmu_notifier_invalidate_range(mm, range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) * The ref count of the PMD page was dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * which is part of the way map counting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) * is done for shared PMDs. Return 'true'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * here. When there is no other sharing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * huge_pmd_unshare returns false and we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * unmap the actual page and drop map count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) if (IS_ENABLED(CONFIG_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) (flags & TTU_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * Store the pfn of the page in a special migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * pte. do_swap_page() will wait until the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) * pte is removed and then restart fault handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) entry = make_migration_entry(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) * pteval maps a zone device page and is therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) * a swap pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (pte_swp_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) if (pte_swp_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) * No need to invalidate here it will synchronize on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) * against the special swap migration pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * The assignment to subpage above was computed from a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * swap PTE which results in an invalid pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * Since only PAGE_SIZE pages can currently be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) * migrated, just set it to page. This will need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * changed when hugepage migrations to device private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * memory are supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) subpage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) /* Nuke the page table entry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) if (should_defer_flush(mm, flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) * We clear the PTE but do not flush so potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) * a remote CPU could still be writing to the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) * If the entry was previously clean then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) * architecture must guarantee that a clear->dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) * transition on a cached TLB entry is written through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * and traps if the PTE is unmapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) pteval = ptep_get_and_clear(mm, address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) pteval = ptep_clear_flush(vma, address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) /* Move the dirty bit to the page. Now the pte is gone. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) if (pte_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) /* Update high watermark before we lower rss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) update_hiwater_rss(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) hugetlb_count_sub(compound_nr(page), mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) set_huge_swap_pte_at(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) pvmw.pte, pteval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) vma_mmu_pagesize(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) dec_mm_counter(mm, mm_counter(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * The guest indicated that the page content is of no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * interest anymore. Simply discard the pte, vmscan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * will take care of the rest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) * A future reference will then fault in a new zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) * page. When userfaultfd is active, we must not drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * this page though, as its main user (postcopy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) * migration) will not expect userfaults on already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) * copied pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) dec_mm_counter(mm, mm_counter(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) /* We have to invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) } else if (IS_ENABLED(CONFIG_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) if (arch_unmap_one(mm, vma, address, pteval) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * Store the pfn of the page in a special migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * pte. do_swap_page() will wait until the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * pte is removed and then restart fault handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) entry = make_migration_entry(subpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) pte_write(pteval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) if (pte_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) if (pte_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) set_pte_at(mm, address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * No need to invalidate here it will synchronize on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * against the special swap migration pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) } else if (PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) swp_entry_t entry = { .val = page_private(subpage) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * Store the swap location in the pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * See handle_pte_fault() ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) /* We have to invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) /* MADV_FREE page check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (!PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) if (!PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) /* Invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) mmu_notifier_invalidate_range(mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) address, address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) dec_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) * If the page was redirtied, it cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) * discarded. Remap the page to page table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) if (swap_duplicate(entry) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) if (arch_unmap_one(mm, vma, address, pteval) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) if (list_empty(&mm->mmlist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) spin_lock(&mmlist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) if (list_empty(&mm->mmlist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) list_add(&mm->mmlist, &init_mm.mmlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) spin_unlock(&mmlist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) dec_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) inc_mm_counter(mm, MM_SWAPENTS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) if (pte_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) if (pte_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) set_pte_at(mm, address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) /* Invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) * This is a locked file-backed page, thus it cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * be removed from the page cache and replaced by a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * page before mmu_notifier_invalidate_range_end, so no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) * concurrent thread might update its page table to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) * point at new page while a device still is using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) dec_mm_counter(mm, mm_counter_file(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * No need to call mmu_notifier_invalidate_range() it has be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) * done above for all cases requiring it to happen under page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) * table lock before mmu_notifier_invalidate_range_end()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) page_remove_rmap(subpage, PageHuge(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) trace_android_vh_try_to_unmap_one(vma, page, address, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) return vma_is_temporary_stack(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) static int page_not_mapped(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return !page_mapped(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) * try_to_unmap - try to remove all page table mappings to a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) * @page: the page to get unmapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) * @flags: action and flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * Tries to remove all the page table entries which are mapping this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * page, used in the pageout path. Caller must hold the page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * If unmap is successful, return true. Otherwise, false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) bool try_to_unmap(struct page *page, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) .rmap_one = try_to_unmap_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) .arg = (void *)flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) .done = page_not_mapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) .anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) * During exec, a temporary VMA is setup and later moved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * The VMA is moved under the anon_vma lock but not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * page tables leading to a race where migration cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) * find the migration ptes. Rather than increasing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) * locking requirements of exec(), migration skips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) * temporary VMAs until after exec() completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) && !PageKsm(page) && PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) rwc.invalid_vma = invalid_migration_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) if (flags & TTU_RMAP_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) rmap_walk_locked(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * When racing against e.g. zap_pte_range() on another cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * in between its ptep_get_and_clear_full() and page_remove_rmap(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) * try_to_unmap() may return false when it is about to become true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * if page table locking is skipped: use TTU_SYNC to wait for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) return !page_mapcount(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * try_to_munlock - try to munlock a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * @page: the page to be munlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * Called from munlock code. Checks all of the VMAs mapping the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * to make sure nobody else has this page mlocked. The page will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * returned with PG_mlocked cleared if no other vmas have it mlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) void try_to_munlock(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) .rmap_one = try_to_unmap_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) .arg = (void *)TTU_MUNLOCK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) .done = page_not_mapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) .anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) void __put_anon_vma(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) struct anon_vma *root = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) anon_vma_free(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) if (root != anon_vma && atomic_dec_and_test(&root->refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) anon_vma_free(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) static struct anon_vma *rmap_walk_anon_lock(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) if (rwc->anon_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) return rwc->anon_lock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * because that depends on page_mapped(); but not all its usages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * are holding mmap_lock. Users without mmap_lock are required to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * take a reference count to prevent the anon_vma disappearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) anon_vma_lock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) * rmap_walk_anon - do something to anonymous page using the object-based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) * rmap method
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) * @page: the page to be handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) * @rwc: control variable according to each walk type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * Find all the mappings of a page using the mapping pointer and the vma chains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) * contained in the anon_vma struct it points to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * where the page was found will be held for write. So, we won't recheck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * vm_flags for that VMA. That should be OK, because that vma shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) * LOCKED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) pgoff_t pgoff_start, pgoff_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) /* anon_vma disappear under us? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) VM_BUG_ON_PAGE(!anon_vma, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) anon_vma = rmap_walk_anon_lock(page, rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) pgoff_start = page_to_pgoff(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) pgoff_start, pgoff_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) struct vm_area_struct *vma = avc->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) unsigned long address = vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) VM_BUG_ON_VMA(address == -EFAULT, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) if (!rwc->rmap_one(page, vma, address, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) if (rwc->done && rwc->done(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * rmap_walk_file - do something to file page using the object-based rmap method
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * @page: the page to be handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) * @rwc: control variable according to each walk type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) * Find all the mappings of a page using the mapping pointer and the vma chains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) * contained in the address_space struct it points to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) * where the page was found will be held for write. So, we won't recheck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) * vm_flags for that VMA. That should be OK, because that vma shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) * LOCKED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) struct address_space *mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) pgoff_t pgoff_start, pgoff_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * The page lock not only makes sure that page->mapping cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) * suddenly be NULLified by truncation, it makes sure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * structure at mapping cannot be freed and reused yet,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) * so we can safely take mapping->i_mmap_rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) pgoff_start = page_to_pgoff(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) vma_interval_tree_foreach(vma, &mapping->i_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) pgoff_start, pgoff_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) unsigned long address = vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) VM_BUG_ON_VMA(address == -EFAULT, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) if (!rwc->rmap_one(page, vma, address, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) if (rwc->done && rwc->done(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (unlikely(PageKsm(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) rmap_walk_ksm(page, rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) else if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) rmap_walk_anon(page, rwc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) rmap_walk_file(page, rwc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /* Like rmap_walk, but caller holds relevant rmap lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) /* no ksm support for now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) VM_BUG_ON_PAGE(PageKsm(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) rmap_walk_anon(page, rwc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) rmap_walk_file(page, rwc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * The following two functions are for anonymous (private mapped) hugepages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * Unlike common anonymous pages, anonymous hugepages have no accounting code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * and no lru code, because we handle hugepages differently from common pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) void hugepage_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) int first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) BUG_ON(!anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) /* address might be in next vma when migration races vma_adjust */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) first = atomic_inc_and_test(compound_mapcount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) if (first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) __page_set_anon_rmap(page, vma, address, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) void hugepage_add_new_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) BUG_ON(address < vma->vm_start || address >= vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) atomic_set(compound_mapcount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) atomic_set(compound_pincount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) __page_set_anon_rmap(page, vma, address, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) #endif /* CONFIG_HUGETLB_PAGE */