^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Memory Migration functionality - linux/mm/migrate.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Page migration was first developed in the context of the memory hotplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * project. The main authors of the migration code are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Hirokazu Takahashi <taka@valinux.co.jp>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Dave Hansen <haveblue@us.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Christoph Lameter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/buffer_head.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/nsproxy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/ksm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/hugetlb_cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/pfn_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/userfaultfd_k.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/balloon_compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/page_idle.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/page_owner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <trace/events/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) int isolate_movable_page(struct page *page, isolate_mode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * Avoid burning cycles with pages that are yet under __free_pages(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * or just got freed under us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * In case we 'win' a race for a movable page being freed under us and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * raise its refcount preventing __free_pages() from doing its job
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * the put_page() at the end of this block will take care of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * release this page, thus avoiding a nasty leakage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (unlikely(!get_page_unless_zero(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Check PageMovable before holding a PG_lock because page's owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * assumes anybody doesn't touch PG_lock of newly allocated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * so unconditionally grabbing the lock ruins page's owner side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) if (unlikely(!__PageMovable(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * As movable pages are not isolated from LRU lists, concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * compaction threads can race against page migration functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * as well as race against the releasing a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * In order to avoid having an already isolated movable page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * being (wrongly) re-isolated while it is under migration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * or to avoid attempting to isolate pages being released,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * lets be sure we have the page lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * before proceeding with the movable page isolation steps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) if (unlikely(!trylock_page(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (!PageMovable(page) || PageIsolated(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) goto out_no_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) VM_BUG_ON_PAGE(!mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (!mapping->a_ops->isolate_page(page, mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) goto out_no_isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* Driver shouldn't use PG_isolated bit of page->flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) WARN_ON_ONCE(PageIsolated(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) SetPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) out_no_isolated:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) out_putpage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /* It should be called on page which is PG_movable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) void putback_movable_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) VM_BUG_ON_PAGE(!PageMovable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) mapping->a_ops->putback_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * Put previously isolated pages back onto the appropriate lists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * from where they were once taken off for compaction/migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * This function shall be used whenever the isolated pageset has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * and isolate_huge_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) void putback_movable_pages(struct list_head *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct page *page2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) list_for_each_entry_safe(page, page2, l, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (unlikely(PageHuge(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) putback_active_hugepage(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * We isolated non-lru movable page so here we can use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * __PageMovable because LRU page's mapping cannot have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * PAGE_MAPPING_MOVABLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) if (unlikely(__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) putback_movable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) page_is_file_lru(page), -thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) EXPORT_SYMBOL_GPL(putback_movable_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * Restore a potential migration pte to a working pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) unsigned long addr, void *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct page_vma_mapped_walk pvmw = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) .page = old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) .vma = vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) .address = addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) .flags = PVMW_SYNC | PVMW_MIGRATION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) struct page *new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) while (page_vma_mapped_walk(&pvmw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (PageKsm(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) new = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) new = page - pvmw.page->index +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) linear_page_index(vma, pvmw.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* PMD-mapped THP migration entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (!pvmw.pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) remove_migration_pmd(&pvmw, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) get_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (pte_swp_soft_dirty(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) pte = pte_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * Recheck VMA as permissions can change since migration started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) entry = pte_to_swp_entry(*pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (is_write_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) pte = maybe_mkwrite(pte, vma->vm_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) else if (pte_swp_uffd_wp(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) pte = pte_mkuffd_wp(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (unlikely(is_device_private_page(new))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) entry = make_device_private_entry(new, pte_write(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (pte_swp_soft_dirty(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) pte = pte_swp_mksoft_dirty(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (pte_swp_uffd_wp(*pvmw.pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) pte = pte_swp_mkuffd_wp(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) if (PageHuge(new)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) pte = pte_mkhuge(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) pte = arch_make_huge_pte(pte, vma, new, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (PageAnon(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) hugepage_add_anon_rmap(new, vma, pvmw.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) page_dup_rmap(new, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (PageAnon(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) page_add_anon_rmap(new, vma, pvmw.address, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) page_add_file_rmap(new, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) mlock_vma_page(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (PageTransHuge(page) && PageMlocked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) clear_page_mlock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* No need to invalidate - it was non-present before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) update_mmu_cache(vma, pvmw.address, pvmw.pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * Get rid of all migration entries and replace them by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * references to the indicated page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) void remove_migration_ptes(struct page *old, struct page *new, bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) struct rmap_walk_control rwc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) .rmap_one = remove_migration_pte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) .arg = old,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) rmap_walk_locked(new, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) rmap_walk(new, &rwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * Something used the pte of a page under migration. We need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * get to the page and wait until migration is finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * When we return from this function the fault will be retried.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) spinlock_t *ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) spin_lock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (!is_swap_pte(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (!is_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) page = migration_entry_to_page(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * Once page cache replacement of page migration started, page_count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * is zero; but we must not call put_and_wait_on_page_locked() without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * a ref. Use get_page_unless_zero(), and just fault again if it fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (!get_page_unless_zero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) trace_android_vh_waiting_for_page_migration(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) put_and_wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) spinlock_t *ptl = pte_lockptr(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) pte_t *ptep = pte_offset_map(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) __migration_entry_wait(mm, ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) void migration_entry_wait_huge(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct mm_struct *mm, pte_t *pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) __migration_entry_wait(mm, pte, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (!is_pmd_migration_entry(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) if (!get_page_unless_zero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) put_and_wait_on_page_locked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) static int expected_page_refs(struct address_space *mapping, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) int expected_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * Device private pages have an extra refcount as they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * ZONE_DEVICE pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) expected_count += is_device_private_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) if (mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) expected_count += thp_nr_pages(page) + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) return expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * Replace the page in the mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * The number of remaining references must be:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * 1 for anonymous pages without a mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * 2 for pages with a mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int migrate_page_move_mapping(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) struct page *newpage, struct page *page, int extra_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) XA_STATE(xas, &mapping->i_pages, page_index(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) struct zone *oldzone, *newzone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) int expected_count = expected_page_refs(mapping, page) + extra_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) int nr = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (!mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* Anonymous page without mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (page_count(page) != expected_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /* No turning back from here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) __SetPageSwapBacked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) oldzone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) newzone = page_zone(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (page_count(page) != expected_count || xas_load(&xas) != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (!page_ref_freeze(page, expected_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * Now we know that no one else is looking at the page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * no turning back from here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) page_ref_add(newpage, nr); /* add cache reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (PageSwapBacked(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) __SetPageSwapBacked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (PageSwapCache(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) SetPageSwapCache(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) set_page_private(newpage, page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) VM_BUG_ON_PAGE(PageSwapCache(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* Move dirty while page refs frozen and newpage not yet exposed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) dirty = PageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) ClearPageDirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) SetPageDirty(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) for (i = 1; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) xas_next(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * Drop cache reference from old page by unfreezing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * to one less reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * We know this isn't the last reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) page_ref_unfreeze(page, expected_count - nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) xas_unlock(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) /* Leave irq disabled to prevent preemption while updating stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * If moved to a different zone then also account
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * the page for that zone. Other VM counters will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * taken care of when we establish references to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * new page and drop references to the old page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * Note that anonymous pages are accounted for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * via NR_FILE_PAGES and NR_ANON_MAPPED if they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * are mapped to swap space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (newzone != oldzone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) struct lruvec *old_lruvec, *new_lruvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct mem_cgroup *memcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) memcg = page_memcg(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (PageSwapBacked(page) && !PageSwapCache(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (dirty && mapping_can_writeback(mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) EXPORT_SYMBOL(migrate_page_move_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * The expected number of remaining references is the same as that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * of migrate_page_move_mapping().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) int migrate_huge_page_move_mapping(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) XA_STATE(xas, &mapping->i_pages, page_index(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) int expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) xas_lock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) expected_count = 2 + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) if (page_count(page) != expected_count || xas_load(&xas) != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (!page_ref_freeze(page, expected_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) newpage->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) newpage->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) get_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) xas_store(&xas, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) page_ref_unfreeze(page, expected_count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) xas_unlock_irq(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * Gigantic pages are so large that we do not guarantee that page++ pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * arithmetic will work across the entire page. We need something more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * specialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static void __copy_gigantic_page(struct page *dst, struct page *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) int nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) struct page *dst_base = dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) struct page *src_base = src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) for (i = 0; i < nr_pages; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) copy_highpage(dst, src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) dst = mem_map_next(dst, dst_base, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) src = mem_map_next(src, src_base, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) static void copy_huge_page(struct page *dst, struct page *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (PageHuge(src)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) /* hugetlbfs page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) struct hstate *h = page_hstate(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) nr_pages = pages_per_huge_page(h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) __copy_gigantic_page(dst, src, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) /* thp page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) BUG_ON(!PageTransHuge(src));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) nr_pages = thp_nr_pages(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) copy_highpage(dst + i, src + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * Copy the page to its new location
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) void migrate_page_states(struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) int cpupid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) trace_android_vh_migrate_page_states(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (PageError(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) SetPageError(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) if (PageReferenced(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) SetPageReferenced(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) SetPageUptodate(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (TestClearPageActive(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) VM_BUG_ON_PAGE(PageUnevictable(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) SetPageActive(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) } else if (TestClearPageUnevictable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) SetPageUnevictable(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (PageWorkingset(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) SetPageWorkingset(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) if (PageChecked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) SetPageChecked(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (PageMappedToDisk(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) SetPageMappedToDisk(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) /* Move dirty on pages not done by migrate_page_move_mapping() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) if (PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) SetPageDirty(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (page_is_young(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) set_page_young(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (page_is_idle(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) set_page_idle(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * Copy NUMA information to the new page, to prevent over-eager
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * future migrations of this same page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) cpupid = page_cpupid_xchg_last(page, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) page_cpupid_xchg_last(newpage, cpupid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) ksm_migrate_page(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * Please do not reorder this without considering how mm/ksm.c's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (PageSwapCache(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) ClearPageSwapCache(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) * If any waiters have accumulated on the new page then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) * wake them up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (PageWriteback(newpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) end_page_writeback(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * PG_readahead shares the same bit with PG_reclaim. The above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * end_page_writeback() may clear PG_readahead mistakenly, so set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * bit after that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (PageReadahead(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) SetPageReadahead(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) copy_page_owner(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) if (!PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) mem_cgroup_migrate(page, newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) EXPORT_SYMBOL(migrate_page_states);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) void migrate_page_copy(struct page *newpage, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) if (PageHuge(page) || PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) copy_huge_page(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) copy_highpage(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) EXPORT_SYMBOL(migrate_page_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) /************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * Migration functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) ***********************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * Common logic to directly migrate a single LRU page suitable for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) * pages that do not use PagePrivate/PagePrivate2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) * Pages are locked upon entry and exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) int migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) struct page *newpage, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) BUG_ON(PageWriteback(page)); /* Writeback must be complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) rc = migrate_page_move_mapping(mapping, newpage, page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (rc != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (mode != MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) migrate_page_copy(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) return MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) EXPORT_SYMBOL(migrate_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /* Returns true if all buffers are successfully locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) static bool buffer_migrate_lock_buffers(struct buffer_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) struct buffer_head *bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) /* Simple case, sync compaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (mode != MIGRATE_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) lock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) /* async case, we cannot block on lock_buffer so use trylock_buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (!trylock_buffer(bh)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * We failed to lock the buffer and cannot stall in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * async migration. Release the taken locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) struct buffer_head *failed_bh = bh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) while (bh != failed_bh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) static int __buffer_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) struct page *newpage, struct page *page, enum migrate_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) bool check_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) struct buffer_head *bh, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) int expected_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (!page_has_buffers(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) return migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /* Check whether page does not have extra refs before we do more work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) expected_count = expected_page_refs(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (page_count(page) != expected_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) head = page_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (!buffer_migrate_lock_buffers(head, mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) if (check_refs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) bool busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) bool invalidated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) recheck_buffers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) busy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) spin_lock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (atomic_read(&bh->b_count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) if (busy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (invalidated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) goto unlock_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) spin_unlock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) invalidate_bh_lrus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) invalidated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) goto recheck_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) rc = migrate_page_move_mapping(mapping, newpage, page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (rc != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) goto unlock_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) attach_page_private(newpage, detach_page_private(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) set_bh_page(bh, newpage, bh_offset(bh));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (mode != MIGRATE_SYNC_NO_COPY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) migrate_page_copy(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) migrate_page_states(newpage, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) unlock_buffers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (check_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) spin_unlock(&mapping->private_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) bh = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) unlock_buffer(bh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) bh = bh->b_this_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) } while (bh != head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) * Migration function for pages with buffers. This function can only be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) * if the underlying filesystem guarantees that no other references to "page"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * exist. For example attached buffer heads are accessed only under page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) int buffer_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) return __buffer_migrate_page(mapping, newpage, page, mode, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) EXPORT_SYMBOL(buffer_migrate_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * Same as above except that this variant is more careful and checks that there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * are also no buffer head references. This function is the right one for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) * mappings where buffer heads are directly looked up and referenced (such as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) * block device mappings).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) int buffer_migrate_page_norefs(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) return __buffer_migrate_page(mapping, newpage, page, mode, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * Writeback a page to clean the dirty state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) static int writeout(struct address_space *mapping, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) .nr_to_write = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) .range_start = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) .range_end = LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) .for_reclaim = 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (!mapping->a_ops->writepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) /* No write method for the address space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) if (!clear_page_dirty_for_io(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) /* Someone else already triggered a write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * A dirty page may imply that the underlying filesystem has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) * the page on some queue. So the page must be clean for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) * migration. Writeout may mean we loose the lock and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) * page state is no longer what we checked for earlier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) * At this point we know that the migration attempt cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) * be successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) remove_migration_ptes(page, page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) rc = mapping->a_ops->writepage(page, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) if (rc != AOP_WRITEPAGE_ACTIVATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) /* unlocked. Relock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) return (rc < 0) ? -EIO : -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * Default handling if a filesystem does not provide a migration function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) static int fallback_migrate_page(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) struct page *newpage, struct page *page, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) /* Only writeback pages in full synchronous migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) return writeout(mapping, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * Buffers may be managed in a filesystem specific way.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * We must have no buffers or drop them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (page_has_private(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) !try_to_release_page(page, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) return migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * Move a page to a newly allocated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * The page is locked and all ptes have been successfully removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * The new page will have replaced the old page if this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * is successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * Return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * < 0 - error code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * MIGRATEPAGE_SUCCESS - success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) static int move_to_new_page(struct page *newpage, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) bool is_lru = !__PageMovable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) VM_BUG_ON_PAGE(!PageLocked(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (likely(is_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) if (!mapping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) rc = migrate_page(mapping, newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) else if (mapping->a_ops->migratepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) * Most pages have a mapping and most filesystems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) * provide a migratepage callback. Anonymous pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) * are part of swap space which also has its own
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) * migratepage callback. This is the most common path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) * for page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) rc = mapping->a_ops->migratepage(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) rc = fallback_migrate_page(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * In case of non-lru page, it could be released after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * isolation step. In that case, we shouldn't try migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (!PageMovable(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) rc = mapping->a_ops->migratepage(mapping, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) !PageIsolated(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * When successful, old pagecache page->mapping must be cleared before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * page is freed; but stats require that PageAnon be left as PageAnon.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (__PageMovable(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) VM_BUG_ON_PAGE(!PageIsolated(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) * We clear PG_movable under page_lock so any compactor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) * cannot try to migrate this page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * Anonymous and movable page->mapping will be cleared by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * free_pages_prepare so don't reset it here for keeping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) * the type to work PageAnon, for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (!PageMappingFlags(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (likely(!is_zone_device_page(newpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) flush_dcache_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static int __unmap_and_move(struct page *page, struct page *newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) int force, enum migrate_mode mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) int page_was_mapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) bool is_lru = !__PageMovable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (!force || mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * It's not safe for direct compaction to call lock_page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * For example, during page readahead pages are added locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * to the LRU. Later, when the IO completes the pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * marked uptodate and unlocked. However, the queueing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * could be merging multiple pages for one bio (e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * mpage_readahead). If an allocation happens for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) * second or third page, the process can end up locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * the same page twice and deadlocking. Rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * trying to be clever about what pages can be locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * avoid the use of lock_page for direct compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * altogether.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (current->flags & PF_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (PageWriteback(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * Only in the case of a full synchronous migration is it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * necessary to wait for PageWriteback. In the async case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * the retry loop is too short and in the sync-light case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * the overhead of stalling is too much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) rc = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) if (!force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * we cannot notice that anon_vma is freed while we migrates a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * This get_anon_vma() delays freeing anon_vma pointer until the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * of migration. File cache pages are no problem because of page_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) * File Caches may use write_page() or lock_page() in migration, then,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) * just care Anon page here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * Only page_get_anon_vma() understands the subtleties of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) * getting a hold on an anon_vma from outside one of its mms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * But if we cannot get anon_vma, then we won't need it anyway,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) * because that implies that the anon page is no longer mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * (and cannot be remapped so long as we hold the page lock).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (PageAnon(page) && !PageKsm(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) anon_vma = page_get_anon_vma(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * Block others from accessing the new page when we get around to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * establishing additional references. We are usually the only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * holding a reference to newpage at this point. We used to have a BUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * here if trylock_page(newpage) fails, but would like to allow for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * cases where there might be a race with the previous use of newpage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * This is much like races on refcount of oldpage: just don't BUG().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) if (unlikely(!trylock_page(newpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) if (unlikely(!is_lru)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) rc = move_to_new_page(newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) goto out_unlock_both;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * Corner case handling:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * 1. When a new swap-cache page is read into, it is added to the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * and treated as swapcache but it has no rmap yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * Calling try_to_unmap() against a page->mapping==NULL page will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * trigger a BUG. So handle it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * 2. An orphaned page (see truncate_complete_page) might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * fs-private metadata. The page can be picked up due to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * offlining. Everywhere else except page reclaim, the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * invisible to the vm, so the page can not be migrated. So try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * free the metadata, so the page can be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (!page->mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) VM_BUG_ON_PAGE(PageAnon(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (page_has_private(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) try_to_free_buffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) goto out_unlock_both;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) } else if (page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /* Establish migration ptes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) page_was_mapped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (!page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) rc = move_to_new_page(newpage, page, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (page_was_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) remove_migration_ptes(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) out_unlock_both:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) /* Drop an anon_vma reference if we took one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * If migration is successful, decrease refcount of the newpage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * which will not free the page because new page owner increased
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * refcounter. As well, if it is LRU page, add the page to LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * list in here. Use the old state of the isolated source page to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * determine if we migrated a LRU page. newpage was already unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * and possibly modified by its owner - don't rely on the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) if (unlikely(!is_lru))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) putback_lru_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * Obtain the lock on page, remove all ptes and migrate the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * to the newly allocated page in newpage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) static int unmap_and_move(new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) free_page_t put_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) unsigned long private, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) int force, enum migrate_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) enum migrate_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) int rc = MIGRATEPAGE_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) struct page *newpage = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (!thp_migration_supported() && PageTransHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (page_count(page) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) /* page was freed from under us. So we are done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) ClearPageActive(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) ClearPageUnevictable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (unlikely(__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (!PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) newpage = get_new_page(page, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (!newpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) rc = __unmap_and_move(page, newpage, force, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (rc == MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) set_page_owner_migrate_reason(newpage, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (rc != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * A page that has been migrated has all references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * removed and will be freed. A page that has not been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * migrated will have kept its references and be restored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * Compaction can migrate also non-LRU pages which are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * not accounted to NR_ISOLATED_*. They can be recognized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * as __PageMovable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) if (likely(!__PageMovable(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) page_is_file_lru(page), -thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * If migration is successful, releases reference grabbed during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * isolation. Otherwise, restore the page to right list unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * we want to retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) if (reason != MR_MEMORY_FAILURE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) * We release the page in page_handle_poison.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (rc != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) if (likely(!__PageMovable(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) goto put_new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (PageMovable(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) putback_movable_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) ClearPageIsolated(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) put_new:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) if (put_new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) put_new_page(newpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) * Counterpart of unmap_and_move_page() for hugepage migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) * This function doesn't wait the completion of hugepage I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) * because there is no race between I/O and migration for hugepage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) * Note that currently hugepage I/O occurs only in direct I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) * where no lock is held and PG_writeback is irrelevant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) * and writeback status of all subpages are counted in the reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) * count of the head page (i.e. if all subpages of a 2MB hugepage are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) * under direct I/O, the reference of the head page is 512 and a bit more.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) * This means that when we try to migrate hugepage whose subpages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * doing direct I/O, some references remain after try_to_unmap() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * hugepage migration fails without data corruption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) * There is also no race when direct I/O is issued on the page under migration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) * because then pte is replaced with migration swap entry and direct I/O code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) * will wait in the page fault for migration to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static int unmap_and_move_huge_page(new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) free_page_t put_new_page, unsigned long private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) struct page *hpage, int force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) enum migrate_mode mode, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) int rc = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) int page_was_mapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) struct page *new_hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) struct anon_vma *anon_vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) struct address_space *mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * Migratability of hugepages depends on architectures and their size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) * This check is necessary because some callers of hugepage migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * like soft offline and memory hotremove don't walk through page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * tables or check whether the hugepage is pmd-based or not before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) * kicking migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!hugepage_migration_supported(page_hstate(hpage))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) putback_active_hugepage(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) new_hpage = get_new_page(hpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (!new_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (!trylock_page(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (!force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) case MIGRATE_SYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) case MIGRATE_SYNC_NO_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) lock_page(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) * Check for pages which are in the process of being freed. Without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) * page_mapping() set, hugetlbfs specific move page routine will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * be called and we could leak usage counts for subpools.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) if (page_private(hpage) && !page_mapping(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) rc = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) if (PageAnon(hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) anon_vma = page_get_anon_vma(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) if (unlikely(!trylock_page(new_hpage)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) goto put_anon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) if (page_mapped(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) bool mapping_locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (!PageAnon(hpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) * In shared mappings, try_to_unmap could potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) * call huge_pmd_unshare. Because of this, take
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) * semaphore in write mode here and set TTU_RMAP_LOCKED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) * to let lower levels know we have taken the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) mapping = hugetlb_page_mapping_lock_write(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (unlikely(!mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) goto unlock_put_anon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) mapping_locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) ttu |= TTU_RMAP_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) try_to_unmap(hpage, ttu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) page_was_mapped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) if (mapping_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) i_mmap_unlock_write(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) if (!page_mapped(hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) rc = move_to_new_page(new_hpage, hpage, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) if (page_was_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) remove_migration_ptes(hpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) unlock_put_anon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) unlock_page(new_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) put_anon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) if (anon_vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) put_anon_vma(anon_vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) if (rc == MIGRATEPAGE_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) move_hugetlb_state(hpage, new_hpage, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) put_new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) unlock_page(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (rc != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) putback_active_hugepage(hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) * If migration was not successful and there's a freeing callback, use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) * it. Otherwise, put_page() will drop the reference grabbed during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) * isolation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (put_new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) put_new_page(new_hpage, private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) putback_active_hugepage(new_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) * migrate_pages - migrate the pages specified in a list, to the free pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) * supplied as the target for the page migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) * @from: The list of pages to be migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) * @get_new_page: The function used to allocate free pages to be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) * as the target of the page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) * @put_new_page: The function used to free target pages if migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) * fails, or NULL if no special handling is necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) * @private: Private data to be passed on to get_new_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) * @mode: The migration mode that specifies the constraints for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) * page migration, if any.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) * @reason: The reason for page migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * The function returns after 10 attempts or if no pages are movable any more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * because the list has become empty or no retryable pages exist any more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) * The caller should call putback_movable_pages() to return pages to the LRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) * or free list only if ret != 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * Returns the number of pages that were not migrated, or an error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) int migrate_pages(struct list_head *from, new_page_t get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) free_page_t put_new_page, unsigned long private,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) enum migrate_mode mode, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) int retry = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) int thp_retry = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) int nr_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) int nr_succeeded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) int nr_thp_succeeded = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) int nr_thp_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) int nr_thp_split = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) int pass = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) bool is_thp = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) struct page *page2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) int swapwrite = current->flags & PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) int rc, nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) trace_mm_migrate_pages_start(mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) if (!swapwrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) current->flags |= PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) thp_retry = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) list_for_each_entry_safe(page, page2, from, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * THP statistics is based on the source huge page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * Capture required information that might get lost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * during migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) is_thp = PageTransHuge(page) && !PageHuge(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) nr_subpages = thp_nr_pages(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) if (PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) rc = unmap_and_move_huge_page(get_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) put_new_page, private, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) pass > 2, mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) rc = unmap_and_move(get_new_page, put_new_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) private, page, pass > 2, mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) switch(rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) case -ENOMEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * THP migration might be unsupported or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) * allocation could've failed so we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) * retry on the same page with the THP split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) * to base pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * Head page is retried immediately and tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * pages are added to the tail of the list so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * we encounter them after the rest of the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * is processed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) rc = split_huge_page_to_list(page, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) if (!rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) list_safe_reset_next(page, page2, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) nr_thp_split++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) nr_thp_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) nr_failed += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) nr_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) case -EAGAIN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) thp_retry++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) retry++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) case MIGRATEPAGE_SUCCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) nr_thp_succeeded++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) nr_succeeded += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) nr_succeeded++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * Permanent failure (-EBUSY, -ENOSYS, etc.):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) * unlike -EAGAIN case, the failed page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * removed from migration page list and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) * retried in the next outer loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (is_thp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) nr_thp_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) nr_failed += nr_subpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) nr_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) nr_failed += retry + thp_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) nr_thp_failed += thp_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) rc = nr_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) count_vm_events(PGMIGRATE_FAIL, nr_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) nr_thp_failed, nr_thp_split, mode, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) if (!swapwrite)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) current->flags &= ~PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) EXPORT_SYMBOL_GPL(migrate_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) struct page *alloc_migration_target(struct page *page, unsigned long private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) struct migration_target_control *mtc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) gfp_t gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) unsigned int order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) struct page *new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) int zidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) mtc = (struct migration_target_control *)private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) gfp_mask = mtc->gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) nid = mtc->nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) nid = page_to_nid(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) struct hstate *h = page_hstate(compound_head(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (PageTransHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) * clear __GFP_RECLAIM to make the migration callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) * consistent with regular THP allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) gfp_mask &= ~__GFP_RECLAIM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) gfp_mask |= GFP_TRANSHUGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) order = HPAGE_PMD_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) zidx = zone_idx(page_zone(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) gfp_mask |= __GFP_HIGHMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) if (new_page && PageTransHuge(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) prep_transhuge_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) return new_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) static int store_status(int __user *status, int start, int value, int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) while (nr-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) if (put_user(value, status + start))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) start++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) static int do_move_pages_to_node(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) struct list_head *pagelist, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) struct migration_target_control mtc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) .nid = node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) err = migrate_pages(pagelist, alloc_migration_target, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) putback_movable_pages(pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) * Resolves the given address to a struct page, isolates it from the LRU and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) * puts it to the given pagelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) * errno - if the page cannot be found/isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) * 0 - when it doesn't have to be migrated because it is already on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) * target node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) * 1 - when it has been queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) int node, struct list_head *pagelist, bool migrate_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) unsigned int follflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (!vma || addr < vma->vm_start || !vma_migratable(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) /* FOLL_DUMP to ignore special (like zero) pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) follflags = FOLL_GET | FOLL_DUMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) page = follow_page(vma, addr, follflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) err = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) err = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) if (page_to_nid(page) == node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) if (page_mapcount(page) > 1 && !migrate_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (PageHead(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) isolate_huge_page(page, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) struct page *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) err = isolate_lru_page(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) goto out_putpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) err = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) list_add_tail(&head->lru, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) mod_node_page_state(page_pgdat(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) NR_ISOLATED_ANON + page_is_file_lru(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) thp_nr_pages(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) out_putpage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) * Either remove the duplicate refcount from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) * isolate_lru_page() or drop the page ref if it was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) * not isolated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) put_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) static int move_pages_and_store_status(struct mm_struct *mm, int node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) struct list_head *pagelist, int __user *status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) int start, int i, unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (list_empty(pagelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) err = do_move_pages_to_node(mm, pagelist, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) * Positive err means the number of failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) * pages to migrate. Since we are going to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * abort and return the number of non-migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * pages, so need to incude the rest of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * nr_pages that have not been attempted as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) if (err > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) err += nr_pages - i - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) return store_status(status, start, node, i - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * Migrate an array of page address onto an array of nodes and fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * the corresponding array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) const int __user *nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) int __user *status, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) int current_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) LIST_HEAD(pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) int start, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) int err = 0, err1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) lru_cache_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) for (i = start = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) const void __user *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) if (get_user(p, pages + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) if (get_user(node, nodes + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) addr = (unsigned long)untagged_addr(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) if (node < 0 || node >= MAX_NUMNODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (!node_state(node, N_MEMORY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) if (!node_isset(node, task_nodes))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) if (current_node == NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) current_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) start = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) } else if (node != current_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) err = move_pages_and_store_status(mm, current_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) &pagelist, status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) start = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) current_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) * Errors in the page lookup or isolation are not fatal and we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) * report them via status
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) err = add_page_for_migration(mm, addr, current_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) &pagelist, flags & MPOL_MF_MOVE_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) if (err > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) /* The page is successfully queued for migration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) * If the page is already on the target node (!err), store the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) * node, otherwise, store the err.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) err = store_status(status, i, err ? : current_node, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) goto out_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) err = move_pages_and_store_status(mm, current_node, &pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) current_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) out_flush:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) /* Make sure we do not overwrite the existing error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) err1 = move_pages_and_store_status(mm, current_node, &pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) status, start, i, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) if (err >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) err = err1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) lru_cache_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * Determine the nodes of an array of pages and store it in an array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) const void __user **pages, int *status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) unsigned long addr = (unsigned long)(*pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) int err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) vma = find_vma(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) if (!vma || addr < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) goto set_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) /* FOLL_DUMP to ignore special (like zero) pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) page = follow_page(vma, addr, FOLL_DUMP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) err = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) goto set_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) err = page ? page_to_nid(page) : -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) set_status:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) *status = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) status++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) * Determine the nodes of a user array of pages and store it in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * a user array of status.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) int __user *status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) #define DO_PAGES_STAT_CHUNK_NR 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) int chunk_status[DO_PAGES_STAT_CHUNK_NR];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) while (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) unsigned long chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) chunk_nr = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) chunk_nr = DO_PAGES_STAT_CHUNK_NR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) pages += chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) status += chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) nr_pages -= chunk_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) return nr_pages ? -EFAULT : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) * There is no need to check if current process has the right to modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) * the specified process when they are same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) if (!pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) mmget(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) *mem_nodes = cpuset_mems_allowed(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) return current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) /* Find the mm_struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) task = find_task_by_vpid(pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) if (!task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) return ERR_PTR(-ESRCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) * Check if this process has the right to modify the specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * process. Use the regular "ptrace_may_access()" checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) mm = ERR_PTR(-EPERM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) mm = ERR_PTR(security_task_movememory(task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (IS_ERR(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) *mem_nodes = cpuset_mems_allowed(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) mm = get_task_mm(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) put_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) if (!mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) mm = ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) return mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) * Move a list of pages in the address space of the currently executing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) * process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) const void __user * __user *pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) const int __user *nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) int __user *status, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) nodemask_t task_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) /* Check flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) mm = find_mm_struct(pid, &task_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (IS_ERR(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) return PTR_ERR(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) err = do_pages_move(mm, task_nodes, nr_pages, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) err = do_pages_stat(mm, nr_pages, pages, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) const void __user * __user *, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) const int __user *, nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) int __user *, status, int, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) compat_uptr_t __user *, pages32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) const int __user *, nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) int __user *, status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) int, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) const void __user * __user *pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) pages = compat_alloc_user_space(nr_pages * sizeof(void *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) compat_uptr_t p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) if (get_user(p, pages32 + i) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) put_user(compat_ptr(p), pages + i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) #endif /* CONFIG_COMPAT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * Returns true if this is a safe migration target node for misplaced NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * pages. Currently it only checks the watermarks which crude
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) unsigned long nr_migrate_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) int z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) for (z = pgdat->nr_zones - 1; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) struct zone *zone = pgdat->node_zones + z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /* Avoid waking kswapd by allocating pages_to_migrate pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) if (!zone_watermark_ok(zone, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) high_wmark_pages(zone) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) nr_migrate_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) ZONE_MOVABLE, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) static struct page *alloc_misplaced_dst_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) unsigned long data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) int nid = (int) data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) struct page *newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) newpage = __alloc_pages_node(nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) (GFP_HIGHUSER_MOVABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) __GFP_THISNODE | __GFP_NOMEMALLOC |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) __GFP_NORETRY | __GFP_NOWARN) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) ~__GFP_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) return newpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) int page_lru;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) /* Avoid migrating to a node that is nearly full */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) if (isolate_lru_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) * migrate_misplaced_transhuge_page() skips page migration's usual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) * check on page_count(), so we must do it here, now that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) * has been isolated: a GUP pin, or any other pin, prevents migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * The expected page count is 3: 1 for page's mapcount and 1 for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) * caller's pin and 1 for the reference taken by isolate_lru_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) if (PageTransHuge(page) && page_count(page) != 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) page_lru = page_is_file_lru(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) thp_nr_pages(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * Isolating the page has taken another reference, so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) * caller's reference can be safely dropped without the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) * disappearing underneath us during migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) bool pmd_trans_migrating(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) struct page *page = pmd_page(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) return PageLocked(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) * Attempt to migrate a misplaced page to the specified destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) * node. Caller is expected to have an elevated reference count on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) * the page that will be dropped by this function before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) int migrate_misplaced_page(struct page *page, struct vm_fault *vmf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) pg_data_t *pgdat = NODE_DATA(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) int isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) int nr_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) LIST_HEAD(migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) * Don't migrate file pages that are mapped in multiple processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * with execute permissions as they are probably shared libraries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) (vmf->vma_flags & VM_EXEC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * Also do not migrate dirty pages as not all filesystems can move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) if (page_is_file_lru(page) && PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) isolated = numamigrate_isolate_page(pgdat, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) if (!isolated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) list_add(&page->lru, &migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) NULL, node, MIGRATE_ASYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) MR_NUMA_MISPLACED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) if (nr_remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) if (!list_empty(&migratepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) dec_node_page_state(page, NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) page_is_file_lru(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) count_vm_numa_event(NUMA_PAGE_MIGRATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) BUG_ON(!list_empty(&migratepages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) * Migrates a THP to a given target node. page must be locked and is unlocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) * before returning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) int migrate_misplaced_transhuge_page(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) pmd_t *pmd, pmd_t entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) struct page *page, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) pg_data_t *pgdat = NODE_DATA(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) int isolated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) struct page *new_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) int page_lru = page_is_file_lru(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) unsigned long start = address & HPAGE_PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) new_page = alloc_pages_node(node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) HPAGE_PMD_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if (!new_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) goto out_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) prep_transhuge_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) isolated = numamigrate_isolate_page(pgdat, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (!isolated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) put_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) goto out_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) /* Prepare a page as a migration target */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) __SetPageLocked(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) if (PageSwapBacked(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) __SetPageSwapBacked(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) /* anon mapping, we can simply copy page->mapping to the new page: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) new_page->mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) new_page->index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) /* flush the cache before copying using the kernel virtual address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) migrate_page_copy(new_page, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) WARN_ON(PageLRU(new_page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) /* Recheck the target PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) /* Reverse changes made by migrate_page_copy() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) if (TestClearPageActive(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) SetPageActive(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) if (TestClearPageUnevictable(new_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) SetPageUnevictable(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) unlock_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) put_page(new_page); /* Free it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) /* Retake the callers reference and putback on LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) entry = mk_huge_pmd(new_page, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) * Overwrite the old entry under pagetable lock and establish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) * the new PTE. Any parallel GUP will either observe the old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) * page blocking on the page lock, block on the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * lock or observe the new page. The SetPageUptodate on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) * new page and page_add_new_anon_rmap guarantee the copy is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * visible before the pagetable update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) page_add_anon_rmap(new_page, vma, start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * At this point the pmd is numa/protnone (i.e. non present) and the TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * has already been flushed globally. So no TLB can be currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * caching this non present pmd mapping. There's no need to clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * pmd before doing set_pmd_at(), nor to flush the TLB after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * set_pmd_at(). Clearing the pmd here would introduce a race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * mmap_lock for reading. If the pmd is set to NULL at any given time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) * pmd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) set_pmd_at(mm, start, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) update_mmu_cache_pmd(vma, address, &entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) page_ref_unfreeze(page, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) mlock_migrate_page(new_page, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) page_remove_rmap(page, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) /* Take an "isolate" reference and put new page on the LRU. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) get_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) putback_lru_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) unlock_page(new_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) put_page(page); /* Drop the rmap reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) put_page(page); /* Drop the LRU isolation reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) mod_node_page_state(page_pgdat(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) NR_ISOLATED_ANON + page_lru,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) -HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) return isolated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) out_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) if (pmd_same(*pmd, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) entry = pmd_modify(entry, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) set_pmd_at(mm, start, pmd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) update_mmu_cache_pmd(vma, address, &entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) #endif /* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) #ifdef CONFIG_DEVICE_PRIVATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) static int migrate_vma_collect_hole(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) __always_unused int depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) /* Only allow populating anonymous memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) if (!vma_is_anonymous(walk->vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) migrate->src[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) migrate->npages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) migrate->npages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) static int migrate_vma_collect_skip(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) for (addr = start; addr < end; addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) migrate->src[migrate->npages++] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) static int migrate_vma_collect_pmd(pmd_t *pmdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) struct migrate_vma *migrate = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) unsigned long addr = start, unmapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) if (pmd_none(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) return migrate_vma_collect_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) if (pmd_trans_huge(*pmdp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) ptl = pmd_lock(mm, pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (unlikely(!pmd_trans_huge(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) page = pmd_page(*pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) if (is_huge_zero_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) split_huge_pmd(vma, pmdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) if (pmd_trans_unstable(pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) if (unlikely(!trylock_page(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) return migrate_vma_collect_skip(start, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) if (pmd_none(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) return migrate_vma_collect_hole(start, end, -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) if (unlikely(pmd_bad(*pmdp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) return migrate_vma_collect_skip(start, end, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) arch_enter_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) for (; addr < end; addr += PAGE_SIZE, ptep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) unsigned long mpfn = 0, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) if (pte_none(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) if (vma_is_anonymous(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) mpfn = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) if (!pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) * Only care about unaddressable device page special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) * page table entry. Other special swap entries are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) * migratable, and we ignore regular swapped page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) if (!is_device_private_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) page = device_private_entry_to_page(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) if (!(migrate->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) page->pgmap->owner != migrate->pgmap_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) mpfn = migrate_pfn(page_to_pfn(page)) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) if (is_write_device_private_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) mpfn |= MIGRATE_PFN_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) pfn = pte_pfn(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) if (is_zero_pfn(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) mpfn = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) page = vm_normal_page(migrate->vma, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) /* FIXME support THP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (!page || !page->mapping || PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) mpfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) * By getting a reference on the page we pin it and that blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) * any kind of migration. Side effect is that it "freezes" the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) * pte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) * We drop this reference after isolating the page from the lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) * for non device page (device page are not on the lru and thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) * can't be dropped from it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) migrate->cpages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) * Optimize for the common case where page is only mapped once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) * in one process. If we can lock the page, then we can safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) * set up a special migration page table entry now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) if (trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) pte_t swp_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) mpfn |= MIGRATE_PFN_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) ptep_get_and_clear(mm, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) /* Setup special migration page table entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) entry = make_migration_entry(page, mpfn &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) MIGRATE_PFN_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) swp_pte = swp_entry_to_pte(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) if (pte_soft_dirty(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) if (pte_uffd_wp(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) if (pte_swp_soft_dirty(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) swp_pte = pte_swp_mksoft_dirty(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) if (pte_swp_uffd_wp(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) swp_pte = pte_swp_mkuffd_wp(swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) set_pte_at(mm, addr, ptep, swp_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) * This is like regular unmap: we remove the rmap and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) * drop page refcount. Page won't be freed, as we took
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) * a reference just above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) page_remove_rmap(page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) if (pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) unmapped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) migrate->dst[migrate->npages] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) migrate->src[migrate->npages++] = mpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) arch_leave_lazy_mmu_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) pte_unmap_unlock(ptep - 1, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) /* Only flush the TLB if we actually modified any entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) if (unmapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) flush_tlb_range(walk->vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) static const struct mm_walk_ops migrate_vma_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) .pmd_entry = migrate_vma_collect_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) .pte_hole = migrate_vma_collect_hole,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) * migrate_vma_collect() - collect pages over a range of virtual addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) * This will walk the CPU page table. For each virtual address backed by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) * valid page, it updates the src array and takes a reference on the page, in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) * order to pin the page until we lock it and unmap it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void migrate_vma_collect(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) * Note that the pgmap_owner is passed to the mmu notifier callback so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) * that the registered device driver can skip invalidating device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) * private page mappings that won't be migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) mmu_notifier_range_init_migrate(&range, 0, migrate->vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) migrate->vma->vm_mm, migrate->start, migrate->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) migrate->pgmap_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) &migrate_vma_walk_ops, migrate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) * migrate_vma_check_page() - check if page is pinned or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) * @page: struct page to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) * Pinned pages cannot be migrated. This is the same test as in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) * migrate_page_move_mapping(), except that here we allow migration of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) * ZONE_DEVICE page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) static bool migrate_vma_check_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) * One extra ref because caller holds an extra reference, either from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) * isolate_lru_page() for a regular page, or migrate_vma_collect() for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * a device page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) int extra = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) * FIXME support THP (transparent huge page), it is bit more complex to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) * check them than regular pages, because they can be mapped with a pmd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) * or with a pte (split pte mapping).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) if (PageCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) /* Page from ZONE_DEVICE have one extra reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) if (is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) * Private page can never be pin as they have no valid pte and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) * GUP will fail for those. Yet if there is a pending migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) * a thread might try to wait on the pte migration entry and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) * will bump the page reference count. Sadly there is no way to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) * differentiate a regular pin from migration wait. Hence to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) * avoid 2 racing thread trying to migrate back to CPU to enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) * infinite loop (one stoping migration because the other is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) * waiting on pte migration entry). We always return true here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) * FIXME proper solution is to rework migration_entry_wait() so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) * it does not need to take a reference on page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) return is_device_private_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) /* For file back page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) if (page_mapping(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) extra += 1 + page_has_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) if ((page_count(page) - extra) > page_mapcount(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) * migrate_vma_prepare() - lock pages and isolate them from the lru
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) * This locks pages that have been collected by migrate_vma_collect(). Once each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) * page is locked it is isolated from the lru (for non-device pages). Finally,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) * migrated by concurrent kernel threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void migrate_vma_prepare(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) unsigned long addr, i, restore = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) bool allow_drain = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) lru_add_drain();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) for (i = 0; (i < npages) && migrate->cpages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) bool remap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) * Because we are migrating several pages there can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) * a deadlock between 2 concurrent migration where each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) * are waiting on each other page lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) * Make migrate_vma() a best effort thing and backoff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) * for any page we can not lock right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) if (!trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) remap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) migrate->src[i] |= MIGRATE_PFN_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) /* ZONE_DEVICE pages are not on LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) if (!is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) if (!PageLRU(page) && allow_drain) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) /* Drain CPU's pagevec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) allow_drain = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) if (isolate_lru_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) if (remap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) /* Drop the reference we took in collect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) if (!migrate_vma_check_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) if (remap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) if (!is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) if (!is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) remove_migration_pte(page, migrate->vma, addr, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) restore--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) * migrate_vma_unmap() - replace page mapping with special migration pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) * Replace page mapping (CPU page table pte) with a special migration pte entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) * and check again if it has been pinned. Pinned pages are restored because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) * cannot migrate them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) * This is the last step before we call the device driver callback to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) * destination memory and copy contents of original page over to new page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) static void migrate_vma_unmap(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) unsigned long addr, i, restore = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) for (i = 0; i < npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) if (page_mapped(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) try_to_unmap(page, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) if (page_mapped(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) goto restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) if (migrate_vma_check_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) restore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) migrate->cpages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) restore++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) remove_migration_ptes(page, page, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) migrate->src[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) restore--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) if (is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) * migrate_vma_setup() - prepare to migrate a range of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) * @args: contains the vma, start, and pfns arrays for the migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) * Returns: negative errno on failures, 0 when 0 or more pages were migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) * without an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * Prepare to migrate a range of memory virtual address range by collecting all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * the pages backing each virtual address in the range, saving them inside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) * src array. Then lock those pages and unmap them. Once the pages are locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) * and unmapped, check whether each page is pinned or not. Pages that aren't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) * corresponding src array entry. Then restores any pages that are pinned, by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) * remapping and unlocking those pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) * The caller should then allocate destination memory and copy source memory to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) * flag set). Once these are allocated and copied, the caller must update each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) * corresponding entry in the dst array with the pfn value of the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) * (destination pages must have their struct pages locked, via lock_page()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) * Note that the caller does not have to migrate all the pages that are marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) * device memory to system memory. If the caller cannot migrate a device page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) * consequences for the userspace process, so it must be avoided if at all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) * possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) * allowing the caller to allocate device memory for those unback virtual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) * address. For this the caller simply has to allocate device memory and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) * properly set the destination entry like for regular migration. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) * this can still fails and thus inside the device driver must check if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) * migration was successful for those entries after calling migrate_vma_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) * just like for regular migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) * After that, the callers must call migrate_vma_pages() to go over each entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) * then migrate_vma_pages() to migrate struct page information from the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) * struct page to the destination struct page. If it fails to migrate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) * src array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) * At this point all successfully migrated pages have an entry in the src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) * array entry with MIGRATE_PFN_VALID flag set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) * Once migrate_vma_pages() returns the caller may inspect which pages were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) * successfully migrated, and which were not. Successfully migrated pages will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) * It is safe to update device page table after migrate_vma_pages() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) * both destination and source page are still locked, and the mmap_lock is held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) * in read mode (hence no one can unmap the range being migrated).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) * Once the caller is done cleaning up things and updating its page table (if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) * chose to do so, this is not an obligation) it finally calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) * migrate_vma_finalize() to update the CPU page table to point to new pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) * for successfully migrated pages or otherwise restore the CPU page table to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) * point to the original source pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) int migrate_vma_setup(struct migrate_vma *args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) args->start &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) args->end &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) if (!args->vma || is_vm_hugetlb_page(args->vma) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) if (nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) if (args->start < args->vma->vm_start ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) args->start >= args->vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) if (!args->src || !args->dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) memset(args->src, 0, sizeof(*args->src) * nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) args->cpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) args->npages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) migrate_vma_collect(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) if (args->cpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) migrate_vma_prepare(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) if (args->cpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) migrate_vma_unmap(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) * At this point pages are locked and unmapped, and thus they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) * stable content and can safely be copied to destination memory that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) * is allocated by the drivers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) EXPORT_SYMBOL(migrate_vma_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) * This code closely matches the code in:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) * __handle_mm_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) * handle_pte_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) * do_anonymous_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) * to map in an anonymous zero page but the struct page will be a ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) * private page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) static void migrate_vma_insert_page(struct migrate_vma *migrate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) unsigned long *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) unsigned long *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) struct vm_area_struct *vma = migrate->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) bool flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) pgd_t *pgdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) p4d_t *p4dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) pud_t *pudp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) pmd_t *pmdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) /* Only allow populating anonymous memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) if (!vma_is_anonymous(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) pgdp = pgd_offset(mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) p4dp = p4d_alloc(mm, pgdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) if (!p4dp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) pudp = pud_alloc(mm, p4dp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) if (!pudp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) pmdp = pmd_alloc(mm, pudp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) if (!pmdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) * Use pte_alloc() instead of pte_alloc_map(). We can't run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) * pte_offset_map() on pmds where a huge pmd might be created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) * from a different thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) * parallel threads are excluded by other means.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) * Here we only have mmap_read_lock(mm).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) if (pte_alloc(mm, pmdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) /* See the comment in pte_alloc_one_map() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) if (unlikely(pmd_trans_unstable(pmdp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) if (unlikely(anon_vma_prepare(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) * The memory barrier inside __SetPageUptodate makes sure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) * preceding stores to the page contents become visible before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) * the set_pte_at() write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) __SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) if (is_zone_device_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) if (is_device_private_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) swp_entry_t swp_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) entry = swp_entry_to_pte(swp_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) * For now we only support migrating to un-addressable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) * device memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) entry = mk_pte(page, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) if (vma->vm_flags & VM_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) entry = pte_mkwrite(pte_mkdirty(entry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) if (check_stable_address_space(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) if (pte_present(*ptep)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) unsigned long pfn = pte_pfn(*ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) if (!is_zero_pfn(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) flush = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) } else if (!pte_none(*ptep))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) * Check for userfaultfd but do not deliver the fault. Instead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) * just back off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) if (userfaultfd_missing(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) goto unlock_abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) inc_mm_counter(mm, MM_ANONPAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) page_add_new_anon_rmap(page, vma, addr, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) if (!is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) lru_cache_add_inactive_or_unevictable(page, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) if (flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) flush_cache_page(vma, addr, pte_pfn(*ptep));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) ptep_clear_flush_notify(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) set_pte_at_notify(mm, addr, ptep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) update_mmu_cache(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) /* No need to invalidate - it was non-present before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) set_pte_at(mm, addr, ptep, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) update_mmu_cache(vma, addr, ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) *src = MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) unlock_abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) *src &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) * migrate_vma_pages() - migrate meta-data from src page to dst page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) * This migrates struct page meta-data from source struct page to destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) * struct page. This effectively finishes the migration from source page to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) * destination page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) void migrate_vma_pages(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) const unsigned long start = migrate->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) unsigned long addr, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) bool notified = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) struct address_space *mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) if (!newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) if (!notified) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) notified = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) mmu_notifier_range_init(&range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) MMU_NOTIFY_CLEAR, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) migrate->vma->vm_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) addr, migrate->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) mmu_notifier_invalidate_range_start(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) migrate_vma_insert_page(migrate, addr, newpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) &migrate->src[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) &migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) mapping = page_mapping(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) if (is_zone_device_page(newpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) if (is_device_private_page(newpage)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) * For now only support private anonymous when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) * migrating to un-addressable device memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) if (mapping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) * Other types of ZONE_DEVICE page are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) * supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) if (r != MIGRATEPAGE_SUCCESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) * No need to double call mmu_notifier->invalidate_range() callback as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) * the above ptep_clear_flush_notify() inside migrate_vma_insert_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) * did already call it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) if (notified)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) mmu_notifier_invalidate_range_only_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) EXPORT_SYMBOL(migrate_vma_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) * migrate_vma_finalize() - restore CPU page table entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) * @migrate: migrate struct containing all migration information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) * This replaces the special migration pte entry with either a mapping to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) * new page if migration was successful for that page, or to the original page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) * otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) * This also unlocks the pages and puts them back on the lru, or drops the extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) * refcount, for device pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) void migrate_vma_finalize(struct migrate_vma *migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) const unsigned long npages = migrate->npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) for (i = 0; i < npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) struct page *page = migrate_pfn_to_page(migrate->src[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) if (newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) if (newpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) newpage = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) remove_migration_ptes(page, newpage, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) if (is_zone_device_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) putback_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) if (newpage != page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) unlock_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) if (is_zone_device_page(newpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) put_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) putback_lru_page(newpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) EXPORT_SYMBOL(migrate_vma_finalize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) #endif /* CONFIG_DEVICE_PRIVATE */