Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Memory Migration functionality - linux/mm/migrate.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * Page migration was first developed in the context of the memory hotplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * project. The main authors of the migration code are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * Hirokazu Takahashi <taka@valinux.co.jp>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * Dave Hansen <haveblue@us.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * Christoph Lameter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/buffer_head.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/nsproxy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/hugetlb_cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/pfn_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/balloon_compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <linux/page_idle.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/page_owner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <trace/events/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) int isolate_movable_page(struct page *page, isolate_mode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	 * Avoid burning cycles with pages that are yet under __free_pages(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	 * or just got freed under us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	 * In case we 'win' a race for a movable page being freed under us and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	 * raise its refcount preventing __free_pages() from doing its job
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	 * the put_page() at the end of this block will take care of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	 * release this page, thus avoiding a nasty leakage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	if (unlikely(!get_page_unless_zero(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	 * Check PageMovable before holding a PG_lock because page's owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	 * assumes anybody doesn't touch PG_lock of newly allocated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	 * so unconditionally grabbing the lock ruins page's owner side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	if (unlikely(!__PageMovable(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 		goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	 * As movable pages are not isolated from LRU lists, concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	 * compaction threads can race against page migration functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	 * as well as race against the releasing a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	 * In order to avoid having an already isolated movable page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	 * being (wrongly) re-isolated while it is under migration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	 * or to avoid attempting to isolate pages being released,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	 * lets be sure we have the page lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	 * before proceeding with the movable page isolation steps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	if (unlikely(!trylock_page(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	if (!PageMovable(page) || PageIsolated(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		goto out_no_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	VM_BUG_ON_PAGE(!mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	if (!mapping->a_ops->isolate_page(page, mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 		goto out_no_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	/* Driver shouldn't use PG_isolated bit of page->flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	WARN_ON_ONCE(PageIsolated(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	SetPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) out_no_isolated:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) out_putpage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) /* It should be called on page which is PG_movable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) void putback_movable_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	VM_BUG_ON_PAGE(!PageMovable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	mapping->a_ops->putback_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  * Put previously isolated pages back onto the appropriate lists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  * from where they were once taken off for compaction/migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141)  * This function shall be used whenever the isolated pageset has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142)  * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143)  * and isolate_huge_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) void putback_movable_pages(struct list_head *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	struct page *page2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	list_for_each_entry_safe(page, page2, l, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 		if (unlikely(PageHuge(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 			putback_active_hugepage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		 * We isolated non-lru movable page so here we can use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 		 * __PageMovable because LRU page's mapping cannot have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		 * PAGE_MAPPING_MOVABLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		if (unlikely(__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 			VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 			lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 			if (PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 				putback_movable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 				ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 			mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 					page_is_file_lru(page), -thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 			putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) EXPORT_SYMBOL_GPL(putback_movable_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  * Restore a potential migration pte to a working pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 				 unsigned long addr, void *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 		.page = old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		.vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		.address = addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		.flags = PVMW_SYNC | PVMW_MIGRATION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	struct page *new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		if (PageKsm(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 			new = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 			new = page - pvmw.page->index +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 				linear_page_index(vma, pvmw.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 		/* PMD-mapped THP migration entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 		if (!pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 			VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 			remove_migration_pmd(&pvmw, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 		get_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 		pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		if (pte_swp_soft_dirty(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 			pte = pte_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 		 * Recheck VMA as permissions can change since migration started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		entry = pte_to_swp_entry(*pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 		if (is_write_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 			pte = maybe_mkwrite(pte, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 		else if (pte_swp_uffd_wp(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 			pte = pte_mkuffd_wp(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		if (unlikely(is_device_private_page(new))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 			entry = make_device_private_entry(new, pte_write(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 			pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 			if (pte_swp_soft_dirty(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 				pte = pte_swp_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 			if (pte_swp_uffd_wp(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 				pte = pte_swp_mkuffd_wp(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		if (PageHuge(new)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 			pte = pte_mkhuge(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 			pte = arch_make_huge_pte(pte, vma, new, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 			set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 			if (PageAnon(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 				hugepage_add_anon_rmap(new, vma, pvmw.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 				page_dup_rmap(new, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 			set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 			if (PageAnon(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 				page_add_anon_rmap(new, vma, pvmw.address, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 				page_add_file_rmap(new, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 			mlock_vma_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		if (PageTransHuge(page) && PageMlocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 			clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		/* No need to invalidate - it was non-present before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		update_mmu_cache(vma, pvmw.address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  * Get rid of all migration entries and replace them by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  * references to the indicated page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) void remove_migration_ptes(struct page *old, struct page *new, bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		.rmap_one = remove_migration_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		.arg = old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 		rmap_walk_locked(new, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 		rmap_walk(new, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  * Something used the pte of a page under migration. We need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  * get to the page and wait until migration is finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  * When we return from this function the fault will be retried.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 				spinlock_t *ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	spin_lock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	if (!is_swap_pte(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	if (!is_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	page = migration_entry_to_page(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	 * Once page cache replacement of page migration started, page_count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	 * is zero; but we must not call put_and_wait_on_page_locked() without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	 * a ref. Use get_page_unless_zero(), and just fault again if it fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	if (!get_page_unless_zero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	trace_android_vh_waiting_for_page_migration(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	put_and_wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 				unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	spinlock_t *ptl = pte_lockptr(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	pte_t *ptep = pte_offset_map(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	__migration_entry_wait(mm, ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) void migration_entry_wait_huge(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 		struct mm_struct *mm, pte_t *pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	__migration_entry_wait(mm, pte, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	if (!is_pmd_migration_entry(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	if (!get_page_unless_zero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	put_and_wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) static int expected_page_refs(struct address_space *mapping, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	int expected_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	 * Device private pages have an extra refcount as they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	 * ZONE_DEVICE pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	expected_count += is_device_private_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	if (mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		expected_count += thp_nr_pages(page) + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	return expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374)  * Replace the page in the mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376)  * The number of remaining references must be:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377)  * 1 for anonymous pages without a mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378)  * 2 for pages with a mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379)  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) int migrate_page_move_mapping(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 		struct page *newpage, struct page *page, int extra_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	XA_STATE(xas, &mapping->i_pages, page_index(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	struct zone *oldzone, *newzone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	int expected_count = expected_page_refs(mapping, page) + extra_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	int nr = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (!mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		/* Anonymous page without mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		if (page_count(page) != expected_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		/* No turning back from here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 		newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 		if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 			__SetPageSwapBacked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	oldzone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	newzone = page_zone(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	if (page_count(page) != expected_count || xas_load(&xas) != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	if (!page_ref_freeze(page, expected_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	 * Now we know that no one else is looking at the page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	 * no turning back from here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	page_ref_add(newpage, nr); /* add cache reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	if (PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 		__SetPageSwapBacked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		if (PageSwapCache(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 			SetPageSwapCache(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 			set_page_private(newpage, page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		VM_BUG_ON_PAGE(PageSwapCache(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	/* Move dirty while page refs frozen and newpage not yet exposed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	dirty = PageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	if (dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		ClearPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 		SetPageDirty(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		for (i = 1; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 			xas_next(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 			xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	 * Drop cache reference from old page by unfreezing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	 * to one less reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	 * We know this isn't the last reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	page_ref_unfreeze(page, expected_count - nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	xas_unlock(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	/* Leave irq disabled to prevent preemption while updating stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	 * If moved to a different zone then also account
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	 * the page for that zone. Other VM counters will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	 * taken care of when we establish references to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	 * new page and drop references to the old page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	 * Note that anonymous pages are accounted for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	 * via NR_FILE_PAGES and NR_ANON_MAPPED if they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	 * are mapped to swap space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	if (newzone != oldzone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		struct lruvec *old_lruvec, *new_lruvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 		struct mem_cgroup *memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		memcg = page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 		__mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		__mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 		if (PageSwapBacked(page) && !PageSwapCache(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 			__mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 			__mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		if (dirty && mapping_can_writeback(mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 			__mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 			__mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			__mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 			__mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) EXPORT_SYMBOL(migrate_page_move_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500)  * The expected number of remaining references is the same as that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501)  * of migrate_page_move_mapping().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) int migrate_huge_page_move_mapping(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 				   struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	XA_STATE(xas, &mapping->i_pages, page_index(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	int expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	expected_count = 2 + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	if (page_count(page) != expected_count || xas_load(&xas) != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	if (!page_ref_freeze(page, expected_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	get_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	page_ref_unfreeze(page, expected_count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536)  * Gigantic pages are so large that we do not guarantee that page++ pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537)  * arithmetic will work across the entire page.  We need something more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538)  * specialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) static void __copy_gigantic_page(struct page *dst, struct page *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 				int nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	struct page *dst_base = dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	struct page *src_base = src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	for (i = 0; i < nr_pages; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 		copy_highpage(dst, src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		dst = mem_map_next(dst, dst_base, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		src = mem_map_next(src, src_base, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) static void copy_huge_page(struct page *dst, struct page *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	if (PageHuge(src)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		/* hugetlbfs page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		struct hstate *h = page_hstate(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 		nr_pages = pages_per_huge_page(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 			__copy_gigantic_page(dst, src, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		/* thp page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		BUG_ON(!PageTransHuge(src));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		nr_pages = thp_nr_pages(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		copy_highpage(dst + i, src + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584)  * Copy the page to its new location
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) void migrate_page_states(struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	int cpupid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	trace_android_vh_migrate_page_states(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	if (PageError(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		SetPageError(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	if (PageReferenced(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 		SetPageReferenced(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		SetPageUptodate(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	if (TestClearPageActive(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		VM_BUG_ON_PAGE(PageUnevictable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		SetPageActive(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	} else if (TestClearPageUnevictable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		SetPageUnevictable(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	if (PageWorkingset(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		SetPageWorkingset(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	if (PageChecked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 		SetPageChecked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	if (PageMappedToDisk(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		SetPageMappedToDisk(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	/* Move dirty on pages not done by migrate_page_move_mapping() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	if (PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		SetPageDirty(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	if (page_is_young(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 		set_page_young(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	if (page_is_idle(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		set_page_idle(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	 * Copy NUMA information to the new page, to prevent over-eager
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	 * future migrations of this same page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	cpupid = page_cpupid_xchg_last(page, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	page_cpupid_xchg_last(newpage, cpupid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	ksm_migrate_page(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	 * Please do not reorder this without considering how mm/ksm.c's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	if (PageSwapCache(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		ClearPageSwapCache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	 * If any waiters have accumulated on the new page then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	 * wake them up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	if (PageWriteback(newpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 		end_page_writeback(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	 * PG_readahead shares the same bit with PG_reclaim.  The above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	 * end_page_writeback() may clear PG_readahead mistakenly, so set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	 * bit after that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	if (PageReadahead(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		SetPageReadahead(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	copy_page_owner(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	if (!PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		mem_cgroup_migrate(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) EXPORT_SYMBOL(migrate_page_states);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) void migrate_page_copy(struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	if (PageHuge(page) || PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		copy_huge_page(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		copy_highpage(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) EXPORT_SYMBOL(migrate_page_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) /************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670)  *                    Migration functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671)  ***********************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674)  * Common logic to directly migrate a single LRU page suitable for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675)  * pages that do not use PagePrivate/PagePrivate2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677)  * Pages are locked upon entry and exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) int migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		struct page *newpage, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	rc = migrate_page_move_mapping(mapping, newpage, page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	if (rc != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	if (mode != MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		migrate_page_copy(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) EXPORT_SYMBOL(migrate_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) /* Returns true if all buffers are successfully locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) static bool buffer_migrate_lock_buffers(struct buffer_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 							enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	struct buffer_head *bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	/* Simple case, sync compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	if (mode != MIGRATE_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 			lock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 			bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		} while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	/* async case, we cannot block on lock_buffer so use trylock_buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		if (!trylock_buffer(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			 * We failed to lock the buffer and cannot stall in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			 * async migration. Release the taken locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 			struct buffer_head *failed_bh = bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 			bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 			while (bh != failed_bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 				unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 				bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 		bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	} while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) static int __buffer_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 		struct page *newpage, struct page *page, enum migrate_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 		bool check_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	struct buffer_head *bh, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	int expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	if (!page_has_buffers(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 		return migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	/* Check whether page does not have extra refs before we do more work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	expected_count = expected_page_refs(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	if (page_count(page) != expected_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	head = page_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	if (!buffer_migrate_lock_buffers(head, mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	if (check_refs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		bool busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		bool invalidated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) recheck_buffers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		busy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 		spin_lock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 			if (atomic_read(&bh->b_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 				busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 			bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		} while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		if (busy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 			if (invalidated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 				rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 				goto unlock_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 			spin_unlock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 			invalidate_bh_lrus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 			invalidated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 			goto recheck_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	rc = migrate_page_move_mapping(mapping, newpage, page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	if (rc != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		goto unlock_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	attach_page_private(newpage, detach_page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		set_bh_page(bh, newpage, bh_offset(bh));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	} while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	if (mode != MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		migrate_page_copy(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) unlock_buffers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	if (check_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		spin_unlock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	} while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819)  * Migration function for pages with buffers. This function can only be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820)  * if the underlying filesystem guarantees that no other references to "page"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821)  * exist. For example attached buffer heads are accessed only under page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) int buffer_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	return __buffer_migrate_page(mapping, newpage, page, mode, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) EXPORT_SYMBOL(buffer_migrate_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831)  * Same as above except that this variant is more careful and checks that there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832)  * are also no buffer head references. This function is the right one for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833)  * mappings where buffer heads are directly looked up and referenced (such as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834)  * block device mappings).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) int buffer_migrate_page_norefs(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	return __buffer_migrate_page(mapping, newpage, page, mode, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * Writeback a page to clean the dirty state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) static int writeout(struct address_space *mapping, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 		.sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		.nr_to_write = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		.range_start = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		.range_end = LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		.for_reclaim = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	if (!mapping->a_ops->writepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		/* No write method for the address space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	if (!clear_page_dirty_for_io(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		/* Someone else already triggered a write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	 * A dirty page may imply that the underlying filesystem has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	 * the page on some queue. So the page must be clean for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	 * migration. Writeout may mean we loose the lock and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	 * page state is no longer what we checked for earlier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	 * At this point we know that the migration attempt cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	 * be successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	remove_migration_ptes(page, page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	rc = mapping->a_ops->writepage(page, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	if (rc != AOP_WRITEPAGE_ACTIVATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		/* unlocked. Relock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	return (rc < 0) ? -EIO : -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885)  * Default handling if a filesystem does not provide a migration function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) static int fallback_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		/* Only writeback pages in full synchronous migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		return writeout(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	 * Buffers may be managed in a filesystem specific way.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	 * We must have no buffers or drop them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	if (page_has_private(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	    !try_to_release_page(page, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	return migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914)  * Move a page to a newly allocated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915)  * The page is locked and all ptes have been successfully removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917)  * The new page will have replaced the old page if this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918)  * is successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920)  * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921)  *   < 0 - error code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922)  *  MIGRATEPAGE_SUCCESS - success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) static int move_to_new_page(struct page *newpage, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 				enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	bool is_lru = !__PageMovable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	if (likely(is_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 			rc = migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 		else if (mapping->a_ops->migratepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 			 * Most pages have a mapping and most filesystems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 			 * provide a migratepage callback. Anonymous pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			 * are part of swap space which also has its own
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 			 * migratepage callback. This is the most common path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 			 * for page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 			rc = mapping->a_ops->migratepage(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 							page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 			rc = fallback_migrate_page(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 							page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		 * In case of non-lru page, it could be released after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		 * isolation step. In that case, we shouldn't try migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		if (!PageMovable(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 			rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		rc = mapping->a_ops->migratepage(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 						page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 			!PageIsolated(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	 * When successful, old pagecache page->mapping must be cleared before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	 * page is freed; but stats require that PageAnon be left as PageAnon.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		if (__PageMovable(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 			VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 			 * We clear PG_movable under page_lock so any compactor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 			 * cannot try to migrate this page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 			ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		 * Anonymous and movable page->mapping will be cleared by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		 * free_pages_prepare so don't reset it here for keeping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		 * the type to work PageAnon, for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		if (!PageMappingFlags(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 			page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 		if (likely(!is_zone_device_page(newpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			flush_dcache_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static int __unmap_and_move(struct page *page, struct page *newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 				int force, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	int page_was_mapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	bool is_lru = !__PageMovable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 		if (!force || mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		 * It's not safe for direct compaction to call lock_page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		 * For example, during page readahead pages are added locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 		 * to the LRU. Later, when the IO completes the pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		 * marked uptodate and unlocked. However, the queueing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		 * could be merging multiple pages for one bio (e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		 * mpage_readahead). If an allocation happens for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 		 * second or third page, the process can end up locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		 * the same page twice and deadlocking. Rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		 * trying to be clever about what pages can be locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		 * avoid the use of lock_page for direct compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		 * altogether.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		if (current->flags & PF_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 		lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	if (PageWriteback(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		 * Only in the case of a full synchronous migration is it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		 * necessary to wait for PageWriteback. In the async case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		 * the retry loop is too short and in the sync-light case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		 * the overhead of stalling is too much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 			rc = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		if (!force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	 * we cannot notice that anon_vma is freed while we migrates a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	 * This get_anon_vma() delays freeing anon_vma pointer until the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	 * of migration. File cache pages are no problem because of page_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	 * File Caches may use write_page() or lock_page() in migration, then,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	 * just care Anon page here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	 * Only page_get_anon_vma() understands the subtleties of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	 * getting a hold on an anon_vma from outside one of its mms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	 * But if we cannot get anon_vma, then we won't need it anyway,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	 * because that implies that the anon page is no longer mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	 * (and cannot be remapped so long as we hold the page lock).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	if (PageAnon(page) && !PageKsm(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		anon_vma = page_get_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	 * Block others from accessing the new page when we get around to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	 * establishing additional references. We are usually the only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	 * holding a reference to newpage at this point. We used to have a BUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	 * here if trylock_page(newpage) fails, but would like to allow for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	 * cases where there might be a race with the previous use of newpage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	 * This is much like races on refcount of oldpage: just don't BUG().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	if (unlikely(!trylock_page(newpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	if (unlikely(!is_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		rc = move_to_new_page(newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		goto out_unlock_both;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	 * Corner case handling:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	 * 1. When a new swap-cache page is read into, it is added to the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	 * and treated as swapcache but it has no rmap yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	 * Calling try_to_unmap() against a page->mapping==NULL page will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	 * trigger a BUG.  So handle it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	 * 2. An orphaned page (see truncate_complete_page) might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	 * fs-private metadata. The page can be picked up due to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	 * offlining.  Everywhere else except page reclaim, the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	 * invisible to the vm, so the page can not be migrated.  So try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	 * free the metadata, so the page can be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	if (!page->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		VM_BUG_ON_PAGE(PageAnon(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 		if (page_has_private(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 			try_to_free_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 			goto out_unlock_both;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	} else if (page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		/* Establish migration ptes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 				page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 		try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 		page_was_mapped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 		rc = move_to_new_page(newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	if (page_was_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		remove_migration_ptes(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 			rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) out_unlock_both:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	/* Drop an anon_vma reference if we took one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	if (anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 		put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	 * If migration is successful, decrease refcount of the newpage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	 * which will not free the page because new page owner increased
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	 * refcounter. As well, if it is LRU page, add the page to LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	 * list in here. Use the old state of the isolated source page to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	 * determine if we migrated a LRU page. newpage was already unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	 * and possibly modified by its owner - don't rely on the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	 * state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 		if (unlikely(!is_lru))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 			put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 			putback_lru_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)  * Obtain the lock on page, remove all ptes and migrate the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)  * to the newly allocated page in newpage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) static int unmap_and_move(new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 				   free_page_t put_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 				   unsigned long private, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 				   int force, enum migrate_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 				   enum migrate_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	int rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	struct page *newpage = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	if (!thp_migration_supported() && PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	if (page_count(page) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		/* page was freed from under us. So we are done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 		ClearPageActive(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		ClearPageUnevictable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		if (unlikely(__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 			lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 			if (!PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 				ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	newpage = get_new_page(page, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	if (!newpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	rc = __unmap_and_move(page, newpage, force, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	if (rc == MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		set_page_owner_migrate_reason(newpage, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	if (rc != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		 * A page that has been migrated has all references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		 * removed and will be freed. A page that has not been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		 * migrated will have kept its references and be restored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 		 * Compaction can migrate also non-LRU pages which are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		 * not accounted to NR_ISOLATED_*. They can be recognized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 		 * as __PageMovable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		if (likely(!__PageMovable(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 			mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 					page_is_file_lru(page), -thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	 * If migration is successful, releases reference grabbed during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	 * isolation. Otherwise, restore the page to right list unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	 * we want to retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 		if (reason != MR_MEMORY_FAILURE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 			 * We release the page in page_handle_poison.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		if (rc != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 			if (likely(!__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 				putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 				goto put_new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 			lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 			if (PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 				putback_movable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 				ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) put_new:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		if (put_new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 			put_new_page(newpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 			put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)  * Counterpart of unmap_and_move_page() for hugepage migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)  * This function doesn't wait the completion of hugepage I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)  * because there is no race between I/O and migration for hugepage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)  * Note that currently hugepage I/O occurs only in direct I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)  * where no lock is held and PG_writeback is irrelevant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)  * and writeback status of all subpages are counted in the reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)  * count of the head page (i.e. if all subpages of a 2MB hugepage are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)  * under direct I/O, the reference of the head page is 512 and a bit more.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)  * This means that when we try to migrate hugepage whose subpages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)  * doing direct I/O, some references remain after try_to_unmap() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)  * hugepage migration fails without data corruption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)  * There is also no race when direct I/O is issued on the page under migration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)  * because then pte is replaced with migration swap entry and direct I/O code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)  * will wait in the page fault for migration to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static int unmap_and_move_huge_page(new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 				free_page_t put_new_page, unsigned long private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 				struct page *hpage, int force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 				enum migrate_mode mode, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	int page_was_mapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	struct page *new_hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	struct address_space *mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	 * Migratability of hugepages depends on architectures and their size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	 * This check is necessary because some callers of hugepage migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	 * like soft offline and memory hotremove don't walk through page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	 * tables or check whether the hugepage is pmd-based or not before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	 * kicking migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	if (!hugepage_migration_supported(page_hstate(hpage))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		putback_active_hugepage(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 		return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	new_hpage = get_new_page(hpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	if (!new_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	if (!trylock_page(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		if (!force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 		case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		lock_page(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	 * Check for pages which are in the process of being freed.  Without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	 * page_mapping() set, hugetlbfs specific move page routine will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	 * be called and we could leak usage counts for subpools.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	if (page_private(hpage) && !page_mapping(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		rc = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	if (PageAnon(hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		anon_vma = page_get_anon_vma(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	if (unlikely(!trylock_page(new_hpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 		goto put_anon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	if (page_mapped(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 		bool mapping_locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 		enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 		if (!PageAnon(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 			 * In shared mappings, try_to_unmap could potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 			 * call huge_pmd_unshare.  Because of this, take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 			 * semaphore in write mode here and set TTU_RMAP_LOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 			 * to let lower levels know we have taken the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 			mapping = hugetlb_page_mapping_lock_write(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 			if (unlikely(!mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 				goto unlock_put_anon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 			mapping_locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 			ttu |= TTU_RMAP_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		try_to_unmap(hpage, ttu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		page_was_mapped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		if (mapping_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 			i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	if (!page_mapped(hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 		rc = move_to_new_page(new_hpage, hpage, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	if (page_was_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		remove_migration_ptes(hpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) unlock_put_anon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	unlock_page(new_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) put_anon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	if (anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		move_hugetlb_state(hpage, new_hpage, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 		put_new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	unlock_page(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	if (rc != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		putback_active_hugepage(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	 * If migration was not successful and there's a freeing callback, use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	 * it.  Otherwise, put_page() will drop the reference grabbed during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	 * isolation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	if (put_new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 		put_new_page(new_hpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		putback_active_hugepage(new_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376)  * migrate_pages - migrate the pages specified in a list, to the free pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)  *		   supplied as the target for the page migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)  * @from:		The list of pages to be migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)  * @get_new_page:	The function used to allocate free pages to be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)  *			as the target of the page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)  * @put_new_page:	The function used to free target pages if migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)  *			fails, or NULL if no special handling is necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)  * @private:		Private data to be passed on to get_new_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385)  * @mode:		The migration mode that specifies the constraints for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)  *			page migration, if any.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)  * @reason:		The reason for page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)  * The function returns after 10 attempts or if no pages are movable any more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)  * because the list has become empty or no retryable pages exist any more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)  * The caller should call putback_movable_pages() to return pages to the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)  * or free list only if ret != 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)  * Returns the number of pages that were not migrated, or an error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) int migrate_pages(struct list_head *from, new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		free_page_t put_new_page, unsigned long private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		enum migrate_mode mode, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	int retry = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	int thp_retry = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	int nr_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	int nr_succeeded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	int nr_thp_succeeded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	int nr_thp_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	int nr_thp_split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	int pass = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	bool is_thp = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	struct page *page2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	int swapwrite = current->flags & PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	int rc, nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	trace_mm_migrate_pages_start(mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	if (!swapwrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 		current->flags |= PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 		retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		thp_retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		list_for_each_entry_safe(page, page2, from, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 			 * THP statistics is based on the source huge page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 			 * Capture required information that might get lost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 			 * during migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 			is_thp = PageTransHuge(page) && !PageHuge(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 			nr_subpages = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 			if (PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 				rc = unmap_and_move_huge_page(get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 						put_new_page, private, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 						pass > 2, mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 				rc = unmap_and_move(get_new_page, put_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 						private, page, pass > 2, mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 						reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 			switch(rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 			case -ENOMEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 				 * THP migration might be unsupported or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 				 * allocation could've failed so we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 				 * retry on the same page with the THP split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 				 * to base pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 				 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 				 * Head page is retried immediately and tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 				 * pages are added to the tail of the list so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 				 * we encounter them after the rest of the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 				 * is processed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 				if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 					lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 					rc = split_huge_page_to_list(page, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 					if (!rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 						list_safe_reset_next(page, page2, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 						nr_thp_split++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 						goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 					}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 					nr_thp_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 					nr_failed += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 				nr_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 			case -EAGAIN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 				if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 					thp_retry++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 				retry++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 			case MIGRATEPAGE_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 				if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 					nr_thp_succeeded++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 					nr_succeeded += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 				nr_succeeded++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 				 * Permanent failure (-EBUSY, -ENOSYS, etc.):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 				 * unlike -EAGAIN case, the failed page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 				 * removed from migration page list and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 				 * retried in the next outer loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 				if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 					nr_thp_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 					nr_failed += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 				nr_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	nr_failed += retry + thp_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	nr_thp_failed += thp_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	rc = nr_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	count_vm_events(PGMIGRATE_FAIL, nr_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 			       nr_thp_failed, nr_thp_split, mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	if (!swapwrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 		current->flags &= ~PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) EXPORT_SYMBOL_GPL(migrate_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) struct page *alloc_migration_target(struct page *page, unsigned long private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	struct migration_target_control *mtc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	gfp_t gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	unsigned int order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	struct page *new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	int zidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	mtc = (struct migration_target_control *)private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	gfp_mask = mtc->gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 	nid = mtc->nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		nid = page_to_nid(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		struct hstate *h = page_hstate(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 		return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		 * clear __GFP_RECLAIM to make the migration callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		 * consistent with regular THP allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		gfp_mask &= ~__GFP_RECLAIM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		gfp_mask |= GFP_TRANSHUGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		order = HPAGE_PMD_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	zidx = zone_idx(page_zone(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		gfp_mask |= __GFP_HIGHMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	if (new_page && PageTransHuge(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		prep_transhuge_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	return new_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) static int store_status(int __user *status, int start, int value, int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 		if (put_user(value, status + start))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 		start++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) static int do_move_pages_to_node(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 		struct list_head *pagelist, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	struct migration_target_control mtc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 		.nid = node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 		.gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	err = migrate_pages(pagelist, alloc_migration_target, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 			(unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 		putback_movable_pages(pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)  * Resolves the given address to a struct page, isolates it from the LRU and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)  * puts it to the given pagelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598)  * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)  *     errno - if the page cannot be found/isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)  *     0 - when it doesn't have to be migrated because it is already on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)  *         target node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)  *     1 - when it has been queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		int node, struct list_head *pagelist, bool migrate_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	unsigned int follflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	if (!vma || addr < vma->vm_start || !vma_migratable(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	/* FOLL_DUMP to ignore special (like zero) pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	follflags = FOLL_GET | FOLL_DUMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	page = follow_page(vma, addr, follflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	err = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	err = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	if (page_to_nid(page) == node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 		goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	if (page_mapcount(page) > 1 && !migrate_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 		goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 		if (PageHead(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 			isolate_huge_page(page, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 			err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 		struct page *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 		head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		err = isolate_lru_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 			goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 		err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 		list_add_tail(&head->lru, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 		mod_node_page_state(page_pgdat(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 			NR_ISOLATED_ANON + page_is_file_lru(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 			thp_nr_pages(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) out_putpage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	 * Either remove the duplicate refcount from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	 * isolate_lru_page() or drop the page ref if it was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	 * not isolated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	put_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) static int move_pages_and_store_status(struct mm_struct *mm, int node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 		struct list_head *pagelist, int __user *status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 		int start, int i, unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	if (list_empty(pagelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	err = do_move_pages_to_node(mm, pagelist, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 		 * Positive err means the number of failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 		 * pages to migrate.  Since we are going to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 		 * abort and return the number of non-migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 		 * pages, so need to incude the rest of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 		 * nr_pages that have not been attempted as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 		 * well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		if (err > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 			err += nr_pages - i - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	return store_status(status, start, node, i - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696)  * Migrate an array of page address onto an array of nodes and fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697)  * the corresponding array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 			 unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 			 const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 			 const int __user *nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 			 int __user *status, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	int current_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	LIST_HEAD(pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	int start, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	int err = 0, err1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	lru_cache_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	for (i = start = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 		const void __user *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 		unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 		int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 		err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		if (get_user(p, pages + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 		if (get_user(node, nodes + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		addr = (unsigned long)untagged_addr(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 		err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 		if (node < 0 || node >= MAX_NUMNODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		if (!node_state(node, N_MEMORY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 		if (!node_isset(node, task_nodes))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		if (current_node == NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 			current_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 			start = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 		} else if (node != current_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 			err = move_pages_and_store_status(mm, current_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 					&pagelist, status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 			start = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 			current_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		 * Errors in the page lookup or isolation are not fatal and we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 		 * report them via status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		err = add_page_for_migration(mm, addr, current_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 				&pagelist, flags & MPOL_MF_MOVE_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 		if (err > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 			/* The page is successfully queued for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 		 * If the page is already on the target node (!err), store the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 		 * node, otherwise, store the err.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 		err = store_status(status, i, err ? : current_node, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 			goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		err = move_pages_and_store_status(mm, current_node, &pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 				status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 		current_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) out_flush:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	/* Make sure we do not overwrite the existing error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	err1 = move_pages_and_store_status(mm, current_node, &pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 				status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	if (err >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 		err = err1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	lru_cache_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)  * Determine the nodes of an array of pages and store it in an array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 				const void __user **pages, int *status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 		unsigned long addr = (unsigned long)(*pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 		struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 		int err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 		vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		if (!vma || addr < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 			goto set_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 		/* FOLL_DUMP to ignore special (like zero) pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		page = follow_page(vma, addr, FOLL_DUMP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 		err = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 		if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 			goto set_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		err = page ? page_to_nid(page) : -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) set_status:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 		*status = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 		pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 		status++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)  * Determine the nodes of a user array of pages and store it in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)  * a user array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 			 const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 			 int __user *status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) #define DO_PAGES_STAT_CHUNK_NR 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	while (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 		unsigned long chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 		chunk_nr = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 		pages += chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 		status += chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		nr_pages -= chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	return nr_pages ? -EFAULT : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	 * There is no need to check if current process has the right to modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	 * the specified process when they are same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	if (!pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 		mmget(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 		*mem_nodes = cpuset_mems_allowed(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		return current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	/* Find the mm_struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	task = find_task_by_vpid(pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	if (!task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 		return ERR_PTR(-ESRCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	 * Check if this process has the right to modify the specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	 * process. Use the regular "ptrace_may_access()" checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 		mm = ERR_PTR(-EPERM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	mm = ERR_PTR(security_task_movememory(task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 	if (IS_ERR(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	*mem_nodes = cpuset_mems_allowed(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	mm = get_task_mm(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	put_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 	if (!mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 		mm = ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 	return mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)  * Move a list of pages in the address space of the currently executing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904)  * process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 			     const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 			     const int __user *nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 			     int __user *status, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 	nodemask_t task_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 	/* Check flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 		return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	mm = find_mm_struct(pid, &task_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 	if (IS_ERR(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		return PTR_ERR(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 	if (nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 		err = do_pages_move(mm, task_nodes, nr_pages, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 				    nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		err = do_pages_stat(mm, nr_pages, pages, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 		const void __user * __user *, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 		const int __user *, nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		int __user *, status, int, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 		       compat_uptr_t __user *, pages32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 		       const int __user *, nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		       int __user *, status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 		       int, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	const void __user * __user *pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	pages = compat_alloc_user_space(nr_pages * sizeof(void *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 		compat_uptr_t p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		if (get_user(p, pages32 + i) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 			put_user(compat_ptr(p), pages + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) #endif /* CONFIG_COMPAT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)  * Returns true if this is a safe migration target node for misplaced NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)  * pages. Currently it only checks the watermarks which crude
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 				   unsigned long nr_migrate_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 	int z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		struct zone *zone = pgdat->node_zones + z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 		if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 		if (!zone_watermark_ok(zone, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 				       high_wmark_pages(zone) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 				       nr_migrate_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 				       ZONE_MOVABLE, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) static struct page *alloc_misplaced_dst_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 					   unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	int nid = (int) data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	struct page *newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	newpage = __alloc_pages_node(nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 					 (GFP_HIGHUSER_MOVABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 					  __GFP_NORETRY | __GFP_NOWARN) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 					 ~__GFP_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	return newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 	int page_lru;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	/* Avoid migrating to a node that is nearly full */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 	if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	if (isolate_lru_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 	 * migrate_misplaced_transhuge_page() skips page migration's usual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	 * check on page_count(), so we must do it here, now that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 	 * has been isolated: a GUP pin, or any other pin, prevents migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	 * The expected page count is 3: 1 for page's mapcount and 1 for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 	 * caller's pin and 1 for the reference taken by isolate_lru_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 	if (PageTransHuge(page) && page_count(page) != 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	page_lru = page_is_file_lru(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 	mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 				thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	 * Isolating the page has taken another reference, so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	 * caller's reference can be safely dropped without the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	 * disappearing underneath us during migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) bool pmd_trans_migrating(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	struct page *page = pmd_page(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	return PageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)  * Attempt to migrate a misplaced page to the specified destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054)  * node. Caller is expected to have an elevated reference count on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)  * the page that will be dropped by this function before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) int migrate_misplaced_page(struct page *page, struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 			   int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	pg_data_t *pgdat = NODE_DATA(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	int isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	int nr_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	LIST_HEAD(migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	 * Don't migrate file pages that are mapped in multiple processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 	 * with execute permissions as they are probably shared libraries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	    (vmf->vma_flags & VM_EXEC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	 * Also do not migrate dirty pages as not all filesystems can move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	 * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	if (page_is_file_lru(page) && PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	isolated = numamigrate_isolate_page(pgdat, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 	list_add(&page->lru, &migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 				     NULL, node, MIGRATE_ASYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 				     MR_NUMA_MISPLACED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	if (nr_remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 		if (!list_empty(&migratepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 			list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 			dec_node_page_state(page, NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 					page_is_file_lru(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 			putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 		isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 		count_vm_numa_event(NUMA_PAGE_MIGRATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	BUG_ON(!list_empty(&migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)  * Migrates a THP to a given target node. page must be locked and is unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)  * before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) int migrate_misplaced_transhuge_page(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 				struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 				pmd_t *pmd, pmd_t entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 				unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 				struct page *page, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	pg_data_t *pgdat = NODE_DATA(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	int isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	struct page *new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	int page_lru = page_is_file_lru(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	unsigned long start = address & HPAGE_PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	new_page = alloc_pages_node(node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 		(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 		HPAGE_PMD_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	if (!new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 		goto out_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	prep_transhuge_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	isolated = numamigrate_isolate_page(pgdat, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	if (!isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 		put_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 		goto out_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	/* Prepare a page as a migration target */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	__SetPageLocked(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 		__SetPageSwapBacked(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	/* anon mapping, we can simply copy page->mapping to the new page: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	new_page->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 	new_page->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	/* flush the cache before copying using the kernel virtual address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	migrate_page_copy(new_page, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 	WARN_ON(PageLRU(new_page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	/* Recheck the target PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 		/* Reverse changes made by migrate_page_copy() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 		if (TestClearPageActive(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 			SetPageActive(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 		if (TestClearPageUnevictable(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 			SetPageUnevictable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		unlock_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 		put_page(new_page);		/* Free it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 		/* Retake the callers reference and putback on LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 		putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 		mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	 * Overwrite the old entry under pagetable lock and establish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	 * the new PTE. Any parallel GUP will either observe the old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	 * page blocking on the page lock, block on the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	 * lock or observe the new page. The SetPageUptodate on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	 * new page and page_add_new_anon_rmap guarantee the copy is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	 * visible before the pagetable update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	page_add_anon_rmap(new_page, vma, start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 	 * At this point the pmd is numa/protnone (i.e. non present) and the TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	 * has already been flushed globally.  So no TLB can be currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	 * caching this non present pmd mapping.  There's no need to clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	 * pmd before doing set_pmd_at(), nor to flush the TLB after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	 * set_pmd_at().  Clearing the pmd here would introduce a race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	 * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 	 * mmap_lock for reading.  If the pmd is set to NULL at any given time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 	 * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	 * pmd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 	set_pmd_at(mm, start, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	update_mmu_cache_pmd(vma, address, &entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	page_ref_unfreeze(page, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 	mlock_migrate_page(new_page, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	page_remove_rmap(page, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	/* Take an "isolate" reference and put new page on the LRU. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	get_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	putback_lru_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 	unlock_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	put_page(page);			/* Drop the rmap reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	put_page(page);			/* Drop the LRU isolation reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 	mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 			NR_ISOLATED_ANON + page_lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 			-HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 	return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) out_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 	if (pmd_same(*pmd, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 		entry = pmd_modify(entry, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 		set_pmd_at(mm, start, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 		update_mmu_cache_pmd(vma, address, &entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) #endif /* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) #ifdef CONFIG_DEVICE_PRIVATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) static int migrate_vma_collect_hole(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 				    unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 				    __always_unused int depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 				    struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	/* Only allow populating anonymous memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 	if (!vma_is_anonymous(walk->vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 		for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 			migrate->src[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 			migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 			migrate->npages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 		migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 		migrate->npages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 		migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) static int migrate_vma_collect_skip(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 				    unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 				    struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 	struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 		migrate->src[migrate->npages++] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) static int migrate_vma_collect_pmd(pmd_t *pmdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 				   unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 				   unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 				   struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 	struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 	struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	unsigned long addr = start, unmapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 	pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	if (pmd_none(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 		return migrate_vma_collect_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	if (pmd_trans_huge(*pmdp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 		ptl = pmd_lock(mm, pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 		if (unlikely(!pmd_trans_huge(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 			goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 		page = pmd_page(*pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 		if (is_huge_zero_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 			split_huge_pmd(vma, pmdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 			if (pmd_trans_unstable(pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 				return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 								walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 			int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 			get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 			if (unlikely(!trylock_page(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 				return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 								walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 			ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 				return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 								walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 			if (pmd_none(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 				return migrate_vma_collect_hole(start, end, -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 								walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	if (unlikely(pmd_bad(*pmdp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 		return migrate_vma_collect_skip(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 	arch_enter_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	for (; addr < end; addr += PAGE_SIZE, ptep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 		unsigned long mpfn = 0, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 		swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 		pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 		pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 		if (pte_none(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 			if (vma_is_anonymous(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 				mpfn = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 				migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 		if (!pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 			 * Only care about unaddressable device page special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 			 * page table entry. Other special swap entries are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 			 * migratable, and we ignore regular swapped page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 			entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 			if (!is_device_private_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 			page = device_private_entry_to_page(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 			if (!(migrate->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 				MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 			    page->pgmap->owner != migrate->pgmap_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 			mpfn = migrate_pfn(page_to_pfn(page)) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 					MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 			if (is_write_device_private_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 				mpfn |= MIGRATE_PFN_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 			if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 			pfn = pte_pfn(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 			if (is_zero_pfn(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 				mpfn = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 				migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 			page = vm_normal_page(migrate->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 			mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 			mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 		/* FIXME support THP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 		if (!page || !page->mapping || PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 			mpfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 		 * By getting a reference on the page we pin it and that blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 		 * any kind of migration. Side effect is that it "freezes" the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 		 * pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 		 * We drop this reference after isolating the page from the lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 		 * for non device page (device page are not on the lru and thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 		 * can't be dropped from it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 		get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 		migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 		 * Optimize for the common case where page is only mapped once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 		 * in one process. If we can lock the page, then we can safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 		 * set up a special migration page table entry now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 		if (trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 			pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 			mpfn |= MIGRATE_PFN_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 			ptep_get_and_clear(mm, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 			/* Setup special migration page table entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 			entry = make_migration_entry(page, mpfn &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 						     MIGRATE_PFN_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 			swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 			if (pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 				if (pte_soft_dirty(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 					swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 				if (pte_uffd_wp(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 					swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 				if (pte_swp_soft_dirty(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 					swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 				if (pte_swp_uffd_wp(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 					swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 			set_pte_at(mm, addr, ptep, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 			 * This is like regular unmap: we remove the rmap and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 			 * drop page refcount. Page won't be freed, as we took
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 			 * a reference just above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 			page_remove_rmap(page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 			if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 				unmapped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 		migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 		migrate->src[migrate->npages++] = mpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	arch_leave_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 	pte_unmap_unlock(ptep - 1, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 	/* Only flush the TLB if we actually modified any entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 	if (unmapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 		flush_tlb_range(walk->vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) static const struct mm_walk_ops migrate_vma_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 	.pmd_entry		= migrate_vma_collect_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	.pte_hole		= migrate_vma_collect_hole,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)  * migrate_vma_collect() - collect pages over a range of virtual addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474)  * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)  * This will walk the CPU page table. For each virtual address backed by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)  * valid page, it updates the src array and takes a reference on the page, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478)  * order to pin the page until we lock it and unmap it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void migrate_vma_collect(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 	 * Note that the pgmap_owner is passed to the mmu notifier callback so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 	 * that the registered device driver can skip invalidating device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 	 * private page mappings that won't be migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 	mmu_notifier_range_init_migrate(&range, 0, migrate->vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 		migrate->vma->vm_mm, migrate->start, migrate->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 		migrate->pgmap_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 	mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 	walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 			&migrate_vma_walk_ops, migrate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 	migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502)  * migrate_vma_check_page() - check if page is pinned or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)  * @page: struct page to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505)  * Pinned pages cannot be migrated. This is the same test as in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506)  * migrate_page_move_mapping(), except that here we allow migration of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507)  * ZONE_DEVICE page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) static bool migrate_vma_check_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 	 * One extra ref because caller holds an extra reference, either from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	 * isolate_lru_page() for a regular page, or migrate_vma_collect() for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 	 * a device page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 	int extra = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	 * FIXME support THP (transparent huge page), it is bit more complex to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	 * check them than regular pages, because they can be mapped with a pmd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	 * or with a pte (split pte mapping).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	if (PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 	/* Page from ZONE_DEVICE have one extra reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	if (is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 		 * Private page can never be pin as they have no valid pte and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 		 * GUP will fail for those. Yet if there is a pending migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 		 * a thread might try to wait on the pte migration entry and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		 * will bump the page reference count. Sadly there is no way to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 		 * differentiate a regular pin from migration wait. Hence to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 		 * avoid 2 racing thread trying to migrate back to CPU to enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 		 * infinite loop (one stoping migration because the other is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 		 * waiting on pte migration entry). We always return true here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 		 * FIXME proper solution is to rework migration_entry_wait() so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		 * it does not need to take a reference on page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 		return is_device_private_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	/* For file back page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	if (page_mapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 		extra += 1 + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 	if ((page_count(page) - extra) > page_mapcount(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)  * migrate_vma_prepare() - lock pages and isolate them from the lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)  * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558)  * This locks pages that have been collected by migrate_vma_collect(). Once each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)  * page is locked it is isolated from the lru (for non-device pages). Finally,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560)  * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)  * migrated by concurrent kernel threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void migrate_vma_prepare(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	unsigned long addr, i, restore = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 	bool allow_drain = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	for (i = 0; (i < npages) && migrate->cpages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 		bool remap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 		if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 			 * Because we are migrating several pages there can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 			 * a deadlock between 2 concurrent migration where each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 			 * are waiting on each other page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 			 * Make migrate_vma() a best effort thing and backoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 			 * for any page we can not lock right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 			if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 				migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 				migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 				put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 			remap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 			migrate->src[i] |= MIGRATE_PFN_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 		/* ZONE_DEVICE pages are not on LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 		if (!is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 			if (!PageLRU(page) && allow_drain) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 				/* Drain CPU's pagevec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 				lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 				allow_drain = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 			if (isolate_lru_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 				if (remap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 					migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 					restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 				} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 					migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 					migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 					put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 			/* Drop the reference we took in collect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 		if (!migrate_vma_check_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 			if (remap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 				migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 				restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 				if (!is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 					get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 					putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 				migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 				migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 				if (!is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 					putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 				else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 					put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 	for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 		if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 		remove_migration_pte(page, migrate->vma, addr, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 		migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 		restore--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663)  * migrate_vma_unmap() - replace page mapping with special migration pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664)  * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)  * Replace page mapping (CPU page table pte) with a special migration pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667)  * and check again if it has been pinned. Pinned pages are restored because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668)  * cannot migrate them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670)  * This is the last step before we call the device driver callback to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671)  * destination memory and copy contents of original page over to new page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) static void migrate_vma_unmap(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 	int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 	const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	unsigned long addr, i, restore = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 	for (i = 0; i < npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 		if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 		if (page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 			try_to_unmap(page, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 			if (page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 				goto restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 		if (migrate_vma_check_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) restore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 		migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 		migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 		restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 	for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 		if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 		remove_migration_ptes(page, page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 		migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 		restore--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 		if (is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 			putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721)  * migrate_vma_setup() - prepare to migrate a range of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722)  * @args: contains the vma, start, and pfns arrays for the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724)  * Returns: negative errno on failures, 0 when 0 or more pages were migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725)  * without an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727)  * Prepare to migrate a range of memory virtual address range by collecting all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)  * the pages backing each virtual address in the range, saving them inside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729)  * src array.  Then lock those pages and unmap them. Once the pages are locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730)  * and unmapped, check whether each page is pinned or not.  Pages that aren't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)  * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)  * corresponding src array entry.  Then restores any pages that are pinned, by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733)  * remapping and unlocking those pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735)  * The caller should then allocate destination memory and copy source memory to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736)  * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737)  * flag set).  Once these are allocated and copied, the caller must update each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)  * corresponding entry in the dst array with the pfn value of the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739)  * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)  * (destination pages must have their struct pages locked, via lock_page()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)  * Note that the caller does not have to migrate all the pages that are marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)  * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)  * device memory to system memory.  If the caller cannot migrate a device page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745)  * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746)  * consequences for the userspace process, so it must be avoided if at all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747)  * possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749)  * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750)  * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751)  * allowing the caller to allocate device memory for those unback virtual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)  * address.  For this the caller simply has to allocate device memory and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753)  * properly set the destination entry like for regular migration.  Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)  * this can still fails and thus inside the device driver must check if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755)  * migration was successful for those entries after calling migrate_vma_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756)  * just like for regular migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758)  * After that, the callers must call migrate_vma_pages() to go over each entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)  * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760)  * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)  * then migrate_vma_pages() to migrate struct page information from the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)  * struct page to the destination struct page.  If it fails to migrate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763)  * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)  * src array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766)  * At this point all successfully migrated pages have an entry in the src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767)  * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768)  * array entry with MIGRATE_PFN_VALID flag set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)  * Once migrate_vma_pages() returns the caller may inspect which pages were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771)  * successfully migrated, and which were not.  Successfully migrated pages will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772)  * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)  * It is safe to update device page table after migrate_vma_pages() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)  * both destination and source page are still locked, and the mmap_lock is held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)  * in read mode (hence no one can unmap the range being migrated).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)  * Once the caller is done cleaning up things and updating its page table (if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)  * chose to do so, this is not an obligation) it finally calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780)  * migrate_vma_finalize() to update the CPU page table to point to new pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781)  * for successfully migrated pages or otherwise restore the CPU page table to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)  * point to the original source pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) int migrate_vma_setup(struct migrate_vma *args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 	long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 	args->start &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 	args->end &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 	if (!args->vma || is_vm_hugetlb_page(args->vma) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	    (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 	if (nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	if (args->start < args->vma->vm_start ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	    args->start >= args->vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 	if (!args->src || !args->dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 	memset(args->src, 0, sizeof(*args->src) * nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	args->cpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 	args->npages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 	migrate_vma_collect(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 	if (args->cpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 		migrate_vma_prepare(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 	if (args->cpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 		migrate_vma_unmap(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 	 * At this point pages are locked and unmapped, and thus they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 	 * stable content and can safely be copied to destination memory that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 	 * is allocated by the drivers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) EXPORT_SYMBOL(migrate_vma_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825)  * This code closely matches the code in:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826)  *   __handle_mm_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827)  *     handle_pte_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828)  *       do_anonymous_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829)  * to map in an anonymous zero page but the struct page will be a ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)  * private page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) static void migrate_vma_insert_page(struct migrate_vma *migrate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 				    unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 				    struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 				    unsigned long *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 				    unsigned long *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 	struct vm_area_struct *vma = migrate->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 	bool flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	pgd_t *pgdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 	p4d_t *p4dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	pud_t *pudp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	pmd_t *pmdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	/* Only allow populating anonymous memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	if (!vma_is_anonymous(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	pgdp = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	p4dp = p4d_alloc(mm, pgdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	if (!p4dp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 	pudp = pud_alloc(mm, p4dp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 	if (!pudp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	pmdp = pmd_alloc(mm, pudp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	if (!pmdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 	if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 	 * pte_offset_map() on pmds where a huge pmd might be created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 	 * from a different thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 	 * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 	 * parallel threads are excluded by other means.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 	 * Here we only have mmap_read_lock(mm).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 	if (pte_alloc(mm, pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 	/* See the comment in pte_alloc_one_map() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 	if (unlikely(pmd_trans_unstable(pmdp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 	if (unlikely(anon_vma_prepare(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 	if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	 * The memory barrier inside __SetPageUptodate makes sure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 	 * preceding stores to the page contents become visible before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 	 * the set_pte_at() write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	__SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 	if (is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 		if (is_device_private_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 			swp_entry_t swp_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 			swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 			entry = swp_entry_to_pte(swp_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 			 * For now we only support migrating to un-addressable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 			 * device memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 			pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 			goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 		entry = mk_pte(page, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 		if (vma->vm_flags & VM_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 			entry = pte_mkwrite(pte_mkdirty(entry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 	if (check_stable_address_space(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 		goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 	if (pte_present(*ptep)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		unsigned long pfn = pte_pfn(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 		if (!is_zero_pfn(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 			goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 		flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 	} else if (!pte_none(*ptep))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 		goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 	 * Check for userfaultfd but do not deliver the fault. Instead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 	 * just back off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 	if (userfaultfd_missing(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 		goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 	inc_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 	page_add_new_anon_rmap(page, vma, addr, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 	if (!is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 		lru_cache_add_inactive_or_unevictable(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 	get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 	if (flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		flush_cache_page(vma, addr, pte_pfn(*ptep));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 		ptep_clear_flush_notify(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 		set_pte_at_notify(mm, addr, ptep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 		update_mmu_cache(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 		/* No need to invalidate - it was non-present before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 		set_pte_at(mm, addr, ptep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 		update_mmu_cache(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 	*src = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) unlock_abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 	*src &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965)  * migrate_vma_pages() - migrate meta-data from src page to dst page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966)  * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968)  * This migrates struct page meta-data from source struct page to destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969)  * struct page. This effectively finishes the migration from source page to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)  * destination page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) void migrate_vma_pages(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 	const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 	const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 	unsigned long addr, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 	bool notified = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 	for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 		struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 		struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 		int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 		if (!newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) 			if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 			if (!notified) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 				notified = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 				mmu_notifier_range_init(&range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 							MMU_NOTIFY_CLEAR, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 							NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 							migrate->vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 							addr, migrate->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 				mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 			migrate_vma_insert_page(migrate, addr, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 						&migrate->src[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 						&migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 		mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 		if (is_zone_device_page(newpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 			if (is_device_private_page(newpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 				 * For now only support private anonymous when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 				 * migrating to un-addressable device memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 				if (mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 				 * Other types of ZONE_DEVICE page are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 				 * supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 		r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 		if (r != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 	 * No need to double call mmu_notifier->invalidate_range() callback as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 	 * the above ptep_clear_flush_notify() inside migrate_vma_insert_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 	 * did already call it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) 	if (notified)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 		mmu_notifier_invalidate_range_only_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) EXPORT_SYMBOL(migrate_vma_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048)  * migrate_vma_finalize() - restore CPU page table entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)  * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051)  * This replaces the special migration pte entry with either a mapping to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052)  * new page if migration was successful for that page, or to the original page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053)  * otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055)  * This also unlocks the pages and puts them back on the lru, or drops the extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056)  * refcount, for device pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) void migrate_vma_finalize(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 	for (i = 0; i < npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) 		struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) 			if (newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) 				unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 				put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 		if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) 			if (newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 				unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 				put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 			newpage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 		remove_migration_ptes(page, newpage, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) 		if (is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) 			putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) 		if (newpage != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) 			unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 			if (is_zone_device_page(newpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) 				put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 				putback_lru_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) EXPORT_SYMBOL(migrate_vma_finalize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) #endif /* CONFIG_DEVICE_PRIVATE */