Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2)  * mm/rmap.c - physical to virtual reverse mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Released under the General Public License (GPL).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * Simple, low overhead reverse mapping scheme.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * Please try to keep this thing as modular as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * Provides methods for unmapping each kind of mapped page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * the anon methods track anonymous pages, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * the file methods track pages belonging to an inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  * Contributions by Hugh Dickins 2003, 2004
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  * Lock ordering in mm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  * inode->i_mutex	(while writing or truncating, not reading or faulting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  *   mm->mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  *         mapping->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  *           anon_vma->rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  *             mm->page_table_lock or pte_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *               swap_lock (in swap_duplicate, swap_info_get)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  *                 mmlist_lock (in mmput, drain_mmlist and others)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  *                 mapping->private_lock (in __set_page_dirty_buffers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  *                     i_pages lock (widely used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *                   i_pages lock (widely used, in set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  *                             in arch-dependent flush_dcache_mmap_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  *                             within bdi.wb->list_lock in __sync_single_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  *   ->tasklist_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  *     pte map lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * * hugetlbfs PageHuge() pages take locks in this order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  *         mapping->i_mmap_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  *             page->flags PG_locked (lock_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <linux/huge_mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) #include <linux/page_idle.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) #include <trace/events/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) static struct kmem_cache *anon_vma_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) static struct kmem_cache *anon_vma_chain_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) static inline struct anon_vma *anon_vma_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	if (anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 		atomic_set(&anon_vma->refcount, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 		anon_vma->degree = 1;	/* Reference for first vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 		anon_vma->parent = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		 * Initialise the anon_vma root to point to itself. If called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		 * from fork, the root will be reset to the parents anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		anon_vma->root = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) static inline void anon_vma_free(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	 * Synchronize against page_lock_anon_vma_read() such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	 * we can safely hold the lock without the anon_vma getting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	 * freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	 * Relies on the full mb implied by the atomic_dec_and_test() from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	 * put_anon_vma() against the acquire barrier implied by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	 * page_lock_anon_vma_read()	VS	put_anon_vma()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	 *   down_read_trylock()		  atomic_dec_and_test()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	 *   LOCK				  MB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	 *   atomic_read()			  rwsem_is_locked()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * LOCK should suffice since the actual taking of the lock must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 * happen _before_ what follows.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 		anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 		anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	kmem_cache_free(anon_vma_cachep, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) static void anon_vma_chain_link(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 				struct anon_vma_chain *avc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 				struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	avc->vma = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	avc->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	list_add(&avc->same_vma, &vma->anon_vma_chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157)  * __anon_vma_prepare - attach an anon_vma to a memory region
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158)  * @vma: the memory region in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160)  * This makes sure the memory mapping described by 'vma' has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161)  * an 'anon_vma' attached to it, so that we can associate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162)  * anonymous pages mapped into it with that anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164)  * The common case will be that we already have one, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165)  * is handled inline by anon_vma_prepare(). But if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166)  * not we either need to find an adjacent mapping that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167)  * can re-use the anon_vma from (very common when the only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168)  * reason for splitting a vma has been mprotect()), or we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169)  * allocate a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171)  * Anon-vma allocations are very subtle, because we may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  * and that may actually touch the spinlock even in the newly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174)  * allocated vma (it depends on RCU to make sure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175)  * anon_vma isn't actually destroyed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177)  * As a result, we need to do proper anon_vma locking even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178)  * for the new allocation. At the same time, we do not want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179)  * to do any locking for the common case of already having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  * an anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182)  * This must be called with the mmap_lock held for reading.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) int __anon_vma_prepare(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	struct anon_vma *anon_vma, *allocated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		goto out_enomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	anon_vma = find_mergeable_anon_vma(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	allocated = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	if (!anon_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		anon_vma = anon_vma_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		if (unlikely(!anon_vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 			goto out_enomem_free_avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 		allocated = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	/* page_table_lock to protect against threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	if (likely(!vma->anon_vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 		vma->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		anon_vma_chain_link(vma, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		/* vma reference or self-parent link for new root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 		anon_vma->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 		allocated = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		avc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	if (unlikely(allocated))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		put_anon_vma(allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	if (unlikely(avc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 		anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  out_enomem_free_avc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  out_enomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233)  * This is a useful helper function for locking the anon_vma root as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234)  * we traverse the vma->anon_vma_chain, looping over anon_vma's that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235)  * have the same vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237)  * Such anon_vma's should have the same root, so you'd expect to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238)  * just a single mutex_lock for the whole traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	struct anon_vma *new_root = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	if (new_root != root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 		if (WARN_ON_ONCE(root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 			up_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		root = new_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		down_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	return root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) static inline void unlock_anon_vma_root(struct anon_vma *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	if (root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 		up_write(&root->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259)  * Attach the anon_vmas from src to dst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260)  * Returns 0 on success, -ENOMEM on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262)  * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  * anon_vma_fork(). The first three want an exact copy of src, while the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264)  * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266)  * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  * and reuse existing anon_vma which has no vmas and only one child anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270)  * This prevents degradation of anon_vma hierarchy to endless linear chain in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271)  * case of constantly forking task. On the other hand, an anon_vma with more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272)  * than one child isn't reused even if there was no alive vma, thus rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273)  * walker has a good chance of avoiding scanning the whole hierarchy when it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274)  * searches where page is mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	struct anon_vma_chain *avc, *pavc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	struct anon_vma *root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 		if (unlikely(!avc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 			unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 			root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 			avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 			if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 				goto enomem_failure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		anon_vma = pavc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		root = lock_anon_vma_root(root, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		anon_vma_chain_link(dst, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 		 * Reuse existing anon_vma if its degree lower than two,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 		 * that means it has no vma and only one anon_vma child.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		 * Do not chose parent anon_vma, otherwise first child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 		 * will always reuse it. Root anon_vma is never reused:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		 * it has self-parent reference and at least one child.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		if (!dst->anon_vma && src->anon_vma &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		    anon_vma != src->anon_vma && anon_vma->degree < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 			dst->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	if (dst->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 		dst->anon_vma->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313)  enomem_failure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	 * decremented in unlink_anon_vmas().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	 * We can safely do this because callers of anon_vma_clone() don't care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	 * about dst->anon_vma if anon_vma_clone() failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	dst->anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	unlink_anon_vmas(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326)  * Attach vma to its own anon_vma, as well as to the anon_vmas that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327)  * the corresponding VMA in the parent process is attached to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328)  * Returns 0 on success, non-zero on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	/* Don't bother if the parent process has no anon_vma here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	if (!pvma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	vma->anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	 * First, attach the new VMA to the parent VMA's anon_vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	 * so rmap can find non-COWed pages in child processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	error = anon_vma_clone(vma, pvma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	/* An existing anon_vma has been reused, all done then. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	/* Then add our own anon_vma. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	anon_vma = anon_vma_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		goto out_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	avc = anon_vma_chain_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	if (!avc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		goto out_error_free_anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	 * The root anon_vma's spinlock is the lock actually used when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	 * lock any of the anon_vmas in this anon_vma tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	anon_vma->root = pvma->anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	anon_vma->parent = pvma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	 * With refcounts, an anon_vma can stay around longer than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	 * process it belongs to. The root anon_vma needs to be pinned until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	 * this anon_vma is freed, because the lock lives in the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	get_anon_vma(anon_vma->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	vma->anon_vma = anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	anon_vma_lock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	anon_vma_chain_link(vma, avc, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	anon_vma->parent->degree++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	anon_vma_unlock_write(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384)  out_error_free_anon_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  out_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	unlink_anon_vmas(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) void unlink_anon_vmas(struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	struct anon_vma_chain *avc, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	struct anon_vma *root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	 * Unlink each anon_vma chained to the VMA.  This list is ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	 * from newest to oldest, ensuring the root anon_vma gets freed last.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		struct anon_vma *anon_vma = avc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		root = lock_anon_vma_root(root, anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 		 * Leave empty anon_vmas on the list - we'll need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		 * to free them outside the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 			anon_vma->parent->degree--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		list_del(&avc->same_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	if (vma->anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		vma->anon_vma->degree--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	unlock_anon_vma_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	 * Iterate the list once more, it now only contains empty and unlinked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	 * needing to write-acquire the anon_vma->root->rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		struct anon_vma *anon_vma = avc->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		VM_WARN_ON(anon_vma->degree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 		put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		list_del(&avc->same_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		anon_vma_chain_free(avc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) static void anon_vma_ctor(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	struct anon_vma *anon_vma = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	init_rwsem(&anon_vma->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	atomic_set(&anon_vma->refcount, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	anon_vma->rb_root = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) void __init anon_vma_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 			anon_vma_ctor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 			SLAB_PANIC|SLAB_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459)  * Since there is no serialization what so ever against page_remove_rmap()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  * the best this function can do is return a locked anon_vma that might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461)  * have been relevant to this page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463)  * The page might have been remapped to a different anon_vma or the anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464)  * returned may already be freed (and even reused).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466)  * In case it was remapped to a different anon_vma, the new anon_vma will be a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * child of the old anon_vma, and the anon_vma lifetime rules will therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * ensure that any anon_vma obtained from the page will still be valid for as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  * long as we observe page_mapped() [ hence all those page_mapped() tests ].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  * All users of this function must be very careful when walking the anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  * chain and verify that the page in question is indeed mapped in it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473)  * [ something equivalent to page_mapped_in_vma() ].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475)  * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476)  * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477)  * if there is a mapcount, we can dereference the anon_vma after observing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478)  * those.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) struct anon_vma *page_get_anon_vma(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	unsigned long anon_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	 * If this page is still mapped, then its anon_vma cannot have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	 * freed.  But if it has been unmapped, we have no security against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	 * anon_vma structure being freed and reused (for another anon_vma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	 * above cannot corrupt).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 		put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517)  * Similar to page_get_anon_vma() except it locks the anon_vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519)  * Its a little more complex as it tries to keep the fast path to a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520)  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521)  * reference like with page_get_anon_vma() and then block on the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) struct anon_vma *page_lock_anon_vma_read(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	struct anon_vma *root_anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	unsigned long anon_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	root_anon_vma = READ_ONCE(anon_vma->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	if (down_read_trylock(&root_anon_vma->rwsem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		 * If the page is still mapped, then this anon_vma is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		 * its anon_vma, and holding the mutex ensures that it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		 * not go away, see anon_vma_free().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			up_read(&root_anon_vma->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 			anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	/* trylock failed, we got to sleep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	if (!page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	/* we pinned the anon_vma, its safe to sleep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	anon_vma_lock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	if (atomic_dec_and_test(&anon_vma->refcount)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		 * Oops, we held the last refcount, release the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		 * and bail -- can't simply use put_anon_vma() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		 * we'll deadlock on the anon_vma_lock_write() recursion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		__put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592)  * Flush TLB entries for recently unmapped pages from remote CPUs. It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593)  * important if a PTE was dirty when it was unmapped that it's flushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594)  * before any IO is initiated on the page to prevent lost writes. Similarly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595)  * it must be flushed before freeing to prevent data leakage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) void try_to_unmap_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	if (!tlb_ubc->flush_required)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	arch_tlbbatch_flush(&tlb_ubc->arch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	tlb_ubc->flush_required = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	tlb_ubc->writable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) /* Flush iff there are potentially writable TLB entries that can race with IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) void try_to_unmap_flush_dirty(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	if (tlb_ubc->writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 		try_to_unmap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	tlb_ubc->flush_required = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	 * Ensure compiler does not re-order the setting of tlb_flush_batched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	 * before the PTE is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	mm->tlb_flush_batched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	 * If the PTE was dirty then it's best to assume it's writable. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	 * before the page is queued for IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	if (writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 		tlb_ubc->writable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642)  * Returns true if the TLB flush should be deferred to the end of a batch of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643)  * unmap operations to reduce IPIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	bool should_defer = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	if (!(flags & TTU_BATCH_FLUSH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	/* If remote CPUs need to be flushed then defer batch the flush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		should_defer = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	return should_defer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661)  * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662)  * releasing the PTL if TLB flushes are batched. It's possible for a parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663)  * operation such as mprotect or munmap to race between reclaim unmapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664)  * the page and flushing the page. If this race occurs, it potentially allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665)  * access to data via a stale TLB entry. Tracking all mm's that have TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666)  * batching in flight would be expensive during reclaim so instead track
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667)  * whether TLB batching occurred in the past and if so then do a flush here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668)  * if required. This will cost one additional flush per reclaim cycle paid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669)  * by the first operation at risk such as mprotect and mumap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671)  * This must be called under the PTL so that an access to tlb_flush_batched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672)  * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673)  * via the PTL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) void flush_tlb_batched_pending(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	if (data_race(mm->tlb_flush_batched)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 		flush_tlb_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		 * Do not allow the compiler to re-order the clearing of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		 * tlb_flush_batched before the tlb is flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		mm->tlb_flush_batched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700)  * At what user virtual address is page expected in vma?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701)  * Caller should check the page is actually part of the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	if (PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 		struct anon_vma *page__anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		 * Note: swapoff's unuse_vma() is more efficient with this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		 * check, and needs it to match anon_vma when KSM is active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 		if (!vma->anon_vma || !page__anon_vma ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		    vma->anon_vma->root != page__anon_vma->root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	} else if (!vma->vm_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	return vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	pmd_t *pmd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	pmd_t pmde;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	pgd = pgd_offset(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	if (!p4d_present(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	if (!pud_present(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	 * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	 * without holding anon_vma lock for write.  So when looking for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	 * genuine pmde (in which to find pte), test present and !THP together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	pmde = *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 		pmd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	return pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) struct page_referenced_arg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	int mapcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	int referenced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	unsigned long vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	struct mem_cgroup *memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764)  * arg: page_referenced_arg will be passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 			unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	struct page_referenced_arg *pra = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		.page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		.vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		.address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	int referenced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 		address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 			page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 			pra->vm_flags |= VM_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 			return false; /* To break the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		if (pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 			if (ptep_clear_flush_young_notify(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 						pvmw.pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 				 * Don't treat a reference through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 				 * a sequentially read mapping as such.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 				 * If the page has been used in another mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 				 * we will catch it; if this other mapping is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 				 * already gone, the unmap path will have set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 				 * PG_referenced or activated the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 				if (likely(!(vma->vm_flags & VM_SEQ_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 					referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 			if (pmdp_clear_flush_young_notify(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 						pvmw.pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 				referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 			/* unexpected pmd-mapped page? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 			WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		pra->mapcount--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	if (referenced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 		clear_page_idle(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	if (test_and_clear_page_young(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	if (referenced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		pra->referenced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		pra->vm_flags |= vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	trace_android_vh_page_referenced_one_end(vma, page, referenced);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	if (!pra->mapcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		return false; /* To break the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	struct page_referenced_arg *pra = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	struct mem_cgroup *memcg = pra->memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	if (!mm_match_cgroup(vma->vm_mm, memcg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841)  * page_referenced - test if the page was referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842)  * @page: the page to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843)  * @is_locked: caller holds lock on the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * @memcg: target memory cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847)  * Quick test_and_clear_referenced for all mappings to a page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848)  * returns the number of ptes which referenced the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) int page_referenced(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		    int is_locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		    struct mem_cgroup *memcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		    unsigned long *vm_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	int we_locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	struct page_referenced_arg pra = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		.mapcount = total_mapcount(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		.memcg = memcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		.rmap_one = page_referenced_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		.arg = (void *)&pra,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		.anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	*vm_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	if (!pra.mapcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	if (!page_rmapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 		we_locked = trylock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		if (!we_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	 * If we are reclaiming on behalf of a cgroup, skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	 * counting on behalf of references from different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	 * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	if (memcg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		rwc.invalid_vma = invalid_page_referenced_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	*vm_flags = pra.vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	if (we_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	return pra.referenced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			    unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		.page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		.vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		.address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		.flags = PVMW_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	int *cleaned = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	 * We have to assume the worse case ie pmd for invalidation. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	 * the page can not be free from this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 				0, vma, vma->vm_mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 				vma_address_end(page, vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 		int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		if (pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 			pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 			pte_t *pte = pvmw.pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			if (!pte_dirty(*pte) && !pte_write(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 			flush_cache_page(vma, address, pte_pfn(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 			entry = ptep_clear_flush(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 			entry = pte_wrprotect(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 			entry = pte_mkclean(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 			set_pte_at(vma->vm_mm, address, pte, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 			ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 			pmd_t *pmd = pvmw.pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 			pmd_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			flush_cache_page(vma, address, page_to_pfn(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 			entry = pmdp_invalidate(vma, address, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 			entry = pmd_wrprotect(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 			entry = pmd_mkclean(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 			set_pmd_at(vma->vm_mm, address, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 			ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 			/* unexpected pmd-mapped page? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 			WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		 * No need to call mmu_notifier_invalidate_range() as we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		 * downgrading page table protection not changing it to point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		 * to a new page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 		 * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			(*cleaned)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	if (vma->vm_flags & VM_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) int page_mkclean(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	int cleaned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		.arg = (void *)&cleaned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		.rmap_one = page_mkclean_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		.invalid_vma = invalid_mkclean_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	return cleaned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) EXPORT_SYMBOL_GPL(page_mkclean);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  * page_move_anon_rmap - move a page to our anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)  * @page:	the page to move to our anon_vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)  * @vma:	the vma the page belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)  * When a page belongs exclusively to one process after a COW event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)  * that page can be moved into the anon_vma that belongs to just that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)  * process, so the rmap code will not search the parent or sibling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)  * processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	VM_BUG_ON_VMA(!anon_vma, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	 * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	 * simultaneously, so a concurrent reader (eg page_referenced()'s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	 * PageAnon()) will not see one without the other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)  * __page_set_anon_rmap - set up new anonymous rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)  * @page:	Page or Hugepage to add to rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)  * @vma:	VM area to add page to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)  * @address:	User virtual address of the mapping	
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)  * @exclusive:	the page is exclusively owned by the current process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) static void __page_set_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	struct vm_area_struct *vma, unsigned long address, int exclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	BUG_ON(!anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	 * If the page isn't exclusively mapped into this vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	 * we must use the _oldest_ possible anon_vma for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	 * page mapping!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	if (!exclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		anon_vma = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	page->mapping = (struct address_space *) anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	page->index = linear_page_index(vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)  * __page_check_anon_rmap - sanity check anonymous rmap addition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)  * @page:	the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065)  * @vma:	the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)  * @address:	the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) static void __page_check_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	 * The page's anon-rmap details (mapping and index) are guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	 * be set up correctly at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	 * We have exclusion against page_add_anon_rmap because the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	 * always holds the page locked, except if called from page_dup_rmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	 * in which case the page is already known to be setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	 * We have exclusion against page_add_new_anon_rmap because those pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	 * are initially only visible via the pagetables, and the pte is locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	 * over the call to page_add_new_anon_rmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		       page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)  * page_add_anon_rmap - add pte mapping to an anonymous page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)  * @page:	the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)  * @vma:	the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)  * @address:	the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)  * @compound:	charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)  * The caller needs to hold the pte lock, and the page must be locked in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)  * the anon_vma case: to serialize mapping,index checking after setting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)  * and to ensure that PageAnon is not being upgraded racily to PageKsm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)  * (but PageKsm is never downgraded to PageAnon).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) void page_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	struct vm_area_struct *vma, unsigned long address, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)  * Special version of the above for do_swap_page, which often runs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)  * into pages that are exclusively owned by the current process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)  * Everybody else should continue to use page_add_anon_rmap above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) void do_page_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	struct vm_area_struct *vma, unsigned long address, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	bool compound = flags & RMAP_COMPOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	bool first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	if (unlikely(PageKsm(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 		lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 		atomic_t *mapcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 		mapcount = compound_mapcount_ptr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		first = atomic_inc_and_test(mapcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		first = atomic_inc_and_test(&page->_mapcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	if (first) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		int nr = compound ? thp_nr_pages(page) : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 		 * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		 * pte lock(a spinlock) is held, which implies preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		 * disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 		if (compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 			__inc_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	if (unlikely(PageKsm(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	/* address might be in next vma when migration races vma_adjust */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	if (first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		__page_set_anon_rmap(page, vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 				flags & RMAP_EXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 		__page_check_anon_rmap(page, vma, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  * __page_add_new_anon_rmap - add pte mapping to a new anonymous page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)  * @page:	the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)  * @vma:	the vm area in which the mapping is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)  * @address:	the user virtual address mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)  * @compound:	charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)  * Same as page_add_anon_rmap but must only be called on *new* pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)  * This means the inc-and-test can be bypassed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)  * Page does not have to be locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) void __page_add_new_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	struct vm_area_struct *vma, unsigned long address, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	int nr = compound ? thp_nr_pages(page) : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	__SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 		/* increment count (starts at -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 		atomic_set(compound_mapcount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 			atomic_set(compound_pincount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 		__inc_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		/* Anon THP always mapped first with PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		VM_BUG_ON_PAGE(PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		/* increment count (starts at -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		atomic_set(&page->_mapcount, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	__page_set_anon_rmap(page, vma, address, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)  * page_add_file_rmap - add pte mapping to a file page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)  * @page: the page to add the mapping to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)  * @compound: charge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)  * The caller needs to hold the pte lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) void page_add_file_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	int i, nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	if (compound && PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 			if (atomic_inc_and_test(&page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 				nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 			__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 			__inc_node_page_state(page, NR_FILE_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 		if (PageTransCompound(page) && page_mapping(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 			VM_WARN_ON_ONCE(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 			SetPageDoubleMap(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 			if (PageMlocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 				clear_page_mlock(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		if (!atomic_inc_and_test(&page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) static void page_remove_file_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	int i, nr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	if (unlikely(PageHuge(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 		/* hugetlb pages are always mapped with pmds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		atomic_dec(compound_mapcount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	/* page still mapped by someone else? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	if (compound && PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 			if (atomic_add_negative(-1, &page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 				nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 			__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 			__dec_node_page_state(page, NR_FILE_PMDMAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		if (!atomic_add_negative(-1, &page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	 * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	 * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	 * pte lock(a spinlock) is held, which implies preemption disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) static void page_remove_anon_compound_rmap(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	int i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	/* Hugepages are not counted in NR_ANON_PAGES for now. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	if (unlikely(PageHuge(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	__dec_lruvec_page_state(page, NR_ANON_THPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	if (TestClearPageDoubleMap(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		 * Subpages can be mapped with PTEs too. Check how many of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		 * them are still mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 			if (atomic_add_negative(-1, &page[i]._mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 				nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		 * Queue the page for deferred split if at least one small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		 * page of the compound page is unmapped, but at least one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 		 * small page is still mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		if (nr && nr < thp_nr_pages(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 			deferred_split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		nr = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 		clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	if (nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)  * page_remove_rmap - take down pte mapping from a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)  * @page:	page to remove mapping from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)  * @compound:	uncharge the page as compound or small page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)  * The caller needs to hold the pte lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) void page_remove_rmap(struct page *page, bool compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	lock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	if (!PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		page_remove_file_rmap(page, compound);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	if (compound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 		page_remove_anon_compound_rmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	/* page still mapped by someone else? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	if (!atomic_add_negative(-1, &page->_mapcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	 * these counters are not modified in interrupt context, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	 * pte lock(a spinlock) is held, which implies preemption disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	__dec_lruvec_page_state(page, NR_ANON_MAPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	if (unlikely(PageMlocked(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 		clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	if (PageTransCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 		deferred_split_huge_page(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	 * It would be tidy to reset the PageAnon mapping here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	 * but that might overwrite a racing page_add_anon_rmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	 * which increments mapcount after us but sets mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	 * before us: so leave the reset to free_unref_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	 * and remember that it's only reliable while mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	 * Leaving it set also helps swapoff to reinstate ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	 * faster for those pages still in swapcache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	unlock_page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)  * @arg: enum ttu_flags will be passed to this argument
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 		     unsigned long address, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		.page = page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		.vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		.address = address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	pte_t pteval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	struct page *subpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	 * When racing against e.g. zap_pte_range() on another cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	 * try_to_unmap() may return false when it is about to become true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	 * if page table locking is skipped: use TTU_SYNC to wait for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	if (flags & TTU_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 		pvmw.flags = PVMW_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	/* munlock has nothing to gain from examining un-locked vmas */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	    is_zone_device_page(page) && !is_device_private_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	if (flags & TTU_SPLIT_HUGE_PMD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 		split_huge_pmd_address(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 				flags & TTU_SPLIT_FREEZE, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	 * For THP, we have to assume the worse case ie pmd for invalidation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	 * For hugetlb, it could be much worse if we need to do pud
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	 * invalidation in the case of pmd sharing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	 * Note that the page can not be free in this function as call of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	 * try_to_unmap() must hold a reference on the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	range.end = PageKsm(page) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 			address + PAGE_SIZE : vma_address_end(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 				address, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		 * If sharing is possible, start and end will be adjusted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 		 * accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		adjust_range_if_pmd_sharing_possible(vma, &range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 						     &range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		/* PMD-mapped THP migration entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 			set_pmd_migration_entry(&pvmw, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		 * If the page is mlock()d, we cannot swap it out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 		 * If it's recently referenced (perhaps page_referenced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		 * skipped over this mm) then we should reactivate it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 		if (!(flags & TTU_IGNORE_MLOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 			if (vma->vm_flags & VM_LOCKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 				/* PTE-mapped THP are never mlocked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 				if (!PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 					/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 					 * Holding pte lock, we do *not* need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 					 * mmap_lock here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 					 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 					mlock_vma_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 			if (flags & TTU_MUNLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 		/* Unexpected PMD-mapped THP? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		VM_BUG_ON_PAGE(!pvmw.pte, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		address = pvmw.address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		if (PageHuge(page) && !PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 			 * To call huge_pmd_unshare, i_mmap_rwsem must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 			 * held in write mode.  Caller needs to explicitly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 			 * do this outside rmap routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 			VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 			if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 				 * huge_pmd_unshare unmapped an entire PMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 				 * page.  There is no way of knowing exactly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 				 * which PMDs may be cached for this mm, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 				 * we must flush them all.  start/end were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 				 * already adjusted above to cover this range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 				flush_cache_range(vma, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 				flush_tlb_range(vma, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 				mmu_notifier_invalidate_range(mm, range.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 							      range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 				 * The ref count of the PMD page was dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 				 * which is part of the way map counting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 				 * is done for shared PMDs.  Return 'true'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 				 * here.  When there is no other sharing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 				 * huge_pmd_unshare returns false and we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 				 * unmap the actual page and drop map count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 				 * to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 		if (IS_ENABLED(CONFIG_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		    (flags & TTU_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		    is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 			swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 			pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 			 * Store the pfn of the page in a special migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 			 * pte. do_swap_page() will wait until the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 			 * pte is removed and then restart fault handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 			entry = make_migration_entry(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 			swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 			 * pteval maps a zone device page and is therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 			 * a swap pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 			if (pte_swp_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 			if (pte_swp_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 			 * No need to invalidate here it will synchronize on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 			 * against the special swap migration pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 			 * The assignment to subpage above was computed from a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 			 * swap PTE which results in an invalid pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 			 * Since only PAGE_SIZE pages can currently be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 			 * migrated, just set it to page. This will need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 			 * changed when hugepage migrations to device private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 			 * memory are supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 			subpage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		/* Nuke the page table entry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		if (should_defer_flush(mm, flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 			 * We clear the PTE but do not flush so potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 			 * a remote CPU could still be writing to the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 			 * If the entry was previously clean then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 			 * architecture must guarantee that a clear->dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 			 * transition on a cached TLB entry is written through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 			 * and traps if the PTE is unmapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 			pteval = ptep_clear_flush(vma, address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		/* Move the dirty bit to the page. Now the pte is gone. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		if (pte_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 			set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		/* Update high watermark before we lower rss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		update_hiwater_rss(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 			if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 				hugetlb_count_sub(compound_nr(page), mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 				set_huge_swap_pte_at(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 						     pvmw.pte, pteval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 						     vma_mmu_pagesize(vma));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 				dec_mm_counter(mm, mm_counter(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 				set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 			 * The guest indicated that the page content is of no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 			 * interest anymore. Simply discard the pte, vmscan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 			 * will take care of the rest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 			 * A future reference will then fault in a new zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 			 * page. When userfaultfd is active, we must not drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 			 * this page though, as its main user (postcopy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 			 * migration) will not expect userfaults on already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 			 * copied pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 			dec_mm_counter(mm, mm_counter(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 			/* We have to invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 			mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 						      address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 				(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 			swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 			pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 				set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 			 * Store the pfn of the page in a special migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 			 * pte. do_swap_page() will wait until the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 			 * pte is removed and then restart fault handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 			entry = make_migration_entry(subpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 					pte_write(pteval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 			swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 			if (pte_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 			if (pte_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 			set_pte_at(mm, address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 			 * No need to invalidate here it will synchronize on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 			 * against the special swap migration pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 		} else if (PageAnon(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 			swp_entry_t entry = { .val = page_private(subpage) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 			pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 			 * Store the swap location in the pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 			 * See handle_pte_fault() ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 			if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 				WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 				/* We have to invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 				mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 							address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 			/* MADV_FREE page check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 			if (!PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 				if (!PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 					/* Invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 					mmu_notifier_invalidate_range(mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 						address, address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 					dec_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 					goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 				 * If the page was redirtied, it cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 				 * discarded. Remap the page to page table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 				set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 				SetPageSwapBacked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 			if (swap_duplicate(entry) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 				set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 				set_pte_at(mm, address, pvmw.pte, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 				page_vma_mapped_walk_done(&pvmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 			if (list_empty(&mm->mmlist)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 				spin_lock(&mmlist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 				if (list_empty(&mm->mmlist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 					list_add(&mm->mmlist, &init_mm.mmlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 				spin_unlock(&mmlist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 			dec_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 			inc_mm_counter(mm, MM_SWAPENTS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 			swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 			if (pte_soft_dirty(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 			if (pte_uffd_wp(pteval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 			set_pte_at(mm, address, pvmw.pte, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 			/* Invalidate as we cleared the pte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			mmu_notifier_invalidate_range(mm, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 						      address + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 			 * This is a locked file-backed page, thus it cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 			 * be removed from the page cache and replaced by a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 			 * page before mmu_notifier_invalidate_range_end, so no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 			 * concurrent thread might update its page table to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 			 * point at new page while a device still is using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 			 * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 			 * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 			dec_mm_counter(mm, mm_counter_file(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 		 * No need to call mmu_notifier_invalidate_range() it has be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 		 * done above for all cases requiring it to happen under page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 		 * table lock before mmu_notifier_invalidate_range_end()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 		 * See Documentation/vm/mmu_notifier.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 		page_remove_rmap(subpage, PageHuge(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	trace_android_vh_try_to_unmap_one(vma, page, address, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	return vma_is_temporary_stack(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) static int page_not_mapped(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	return !page_mapped(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)  * try_to_unmap - try to remove all page table mappings to a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736)  * @page: the page to get unmapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)  * @flags: action and flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)  * Tries to remove all the page table entries which are mapping this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)  * page, used in the pageout path.  Caller must hold the page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)  * If unmap is successful, return true. Otherwise, false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) bool try_to_unmap(struct page *page, enum ttu_flags flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		.rmap_one = try_to_unmap_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 		.arg = (void *)flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		.done = page_not_mapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		.anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	 * During exec, a temporary VMA is setup and later moved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	 * The VMA is moved under the anon_vma lock but not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	 * page tables leading to a race where migration cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	 * find the migration ptes. Rather than increasing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	 * locking requirements of exec(), migration skips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	 * temporary VMAs until after exec() completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 	    && !PageKsm(page) && PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		rwc.invalid_vma = invalid_migration_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	if (flags & TTU_RMAP_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		rmap_walk_locked(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 		rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 	 * When racing against e.g. zap_pte_range() on another cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	 * try_to_unmap() may return false when it is about to become true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	 * if page table locking is skipped: use TTU_SYNC to wait for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	return !page_mapcount(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780)  * try_to_munlock - try to munlock a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)  * @page: the page to be munlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)  * Called from munlock code.  Checks all of the VMAs mapping the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)  * to make sure nobody else has this page mlocked. The page will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)  * returned with PG_mlocked cleared if no other vmas have it mlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) void try_to_munlock(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 	struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 		.rmap_one = try_to_unmap_one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 		.arg = (void *)TTU_MUNLOCK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 		.done = page_not_mapped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 		.anon_lock = page_lock_anon_vma_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	rmap_walk(page, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) void __put_anon_vma(struct anon_vma *anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	struct anon_vma *root = anon_vma->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	anon_vma_free(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		anon_vma_free(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) static struct anon_vma *rmap_walk_anon_lock(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 					struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	if (rwc->anon_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 		return rwc->anon_lock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	 * because that depends on page_mapped(); but not all its usages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	 * are holding mmap_lock. Users without mmap_lock are required to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	 * take a reference count to prevent the anon_vma disappearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 	anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	anon_vma_lock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	return anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)  * rmap_walk_anon - do something to anonymous page using the object-based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)  * rmap method
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)  * @page: the page to be handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)  * @rwc: control variable according to each walk type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)  * Find all the mappings of a page using the mapping pointer and the vma chains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)  * contained in the anon_vma struct it points to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)  * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)  * where the page was found will be held for write.  So, we won't recheck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)  * LOCKED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	struct anon_vma *anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	pgoff_t pgoff_start, pgoff_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	struct anon_vma_chain *avc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		anon_vma = page_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 		/* anon_vma disappear under us? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 		VM_BUG_ON_PAGE(!anon_vma, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		anon_vma = rmap_walk_anon_lock(page, rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	if (!anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 	pgoff_start = page_to_pgoff(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 			pgoff_start, pgoff_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		struct vm_area_struct *vma = avc->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		unsigned long address = vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 		VM_BUG_ON_VMA(address == -EFAULT, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		if (!rwc->rmap_one(page, vma, address, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 		if (rwc->done && rwc->done(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		anon_vma_unlock_read(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890)  * rmap_walk_file - do something to file page using the object-based rmap method
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)  * @page: the page to be handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)  * @rwc: control variable according to each walk type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894)  * Find all the mappings of a page using the mapping pointer and the vma chains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)  * contained in the address_space struct it points to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)  * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)  * where the page was found will be held for write.  So, we won't recheck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899)  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900)  * LOCKED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 		bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	struct address_space *mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	pgoff_t pgoff_start, pgoff_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	 * The page lock not only makes sure that page->mapping cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 	 * suddenly be NULLified by truncation, it makes sure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	 * structure at mapping cannot be freed and reused yet,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 	 * so we can safely take mapping->i_mmap_rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 	if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 	pgoff_start = page_to_pgoff(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 		i_mmap_lock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	vma_interval_tree_foreach(vma, &mapping->i_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 			pgoff_start, pgoff_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		unsigned long address = vma_address(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		VM_BUG_ON_VMA(address == -EFAULT, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 		if (!rwc->rmap_one(page, vma, address, rwc->arg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 		if (rwc->done && rwc->done(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 		i_mmap_unlock_read(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	if (unlikely(PageKsm(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		rmap_walk_ksm(page, rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 	else if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 		rmap_walk_anon(page, rwc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		rmap_walk_file(page, rwc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) /* Like rmap_walk, but caller holds relevant rmap lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	/* no ksm support for now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 	VM_BUG_ON_PAGE(PageKsm(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	if (PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 		rmap_walk_anon(page, rwc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		rmap_walk_file(page, rwc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)  * The following two functions are for anonymous (private mapped) hugepages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)  * Unlike common anonymous pages, anonymous hugepages have no accounting code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)  * and no lru code, because we handle hugepages differently from common pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) void hugepage_add_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 			    struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	struct anon_vma *anon_vma = vma->anon_vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	int first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 	BUG_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	BUG_ON(!anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	/* address might be in next vma when migration races vma_adjust */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	first = atomic_inc_and_test(compound_mapcount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	if (first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 		__page_set_anon_rmap(page, vma, address, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) void hugepage_add_new_anon_rmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 			struct vm_area_struct *vma, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	atomic_set(compound_mapcount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		atomic_set(compound_pincount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 	__page_set_anon_rmap(page, vma, address, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) #endif /* CONFIG_HUGETLB_PAGE */