Orange Pi5 kernel

^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *  linux/mm/page_alloc.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *  Manages the free list, the system allocates free pages here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *  Note that kmalloc() lives in slab.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *  Swap reorganised 29.12.95, Stephen Tweedie
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *  Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *          (lots of bits borrowed from Ingo Molnar & Andrew Morton)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/stddef.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/jiffies.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/compiler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/kasan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/suspend.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/memory_hotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/nodemask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/vmstat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/stop_machine.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <linux/pfn.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #include <linux/fault-inject.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #include <linux/page-isolation.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <linux/debugobjects.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <linux/kmemleak.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #include <linux/compaction.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <trace/events/kmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #include <trace/events/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #include <linux/sched/rt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #include <linux/page_owner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #include <linux/page_pinner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #include <linux/ftrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <linux/lockdep.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) #include <linux/padata.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) #include <linux/khugepaged.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) #include <asm/sections.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) #include <asm/div64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #include "shuffle.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) #include "page_reporting.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) typedef int __bitwise fpi_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) /* No special request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) #define FPI_NONE		((__force fpi_t)0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90)  * Skip free page reporting notification for the (possibly merged) page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91)  * This does not hinder free page reporting from grabbing the page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92)  * reporting it and marking it "reported" -  it only skips notifying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  * the free page reporting infrastructure about a newly freed page. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  * example, used when temporarily pulling a page from a freelist and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95)  * putting it back unmodified.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) #define FPI_SKIP_REPORT_NOTIFY	((__force fpi_t)BIT(0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100)  * Place the (possibly merged) page to the tail of the freelist. Will ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101)  * page shuffling (relevant code - e.g., memory onlining - is expected to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102)  * shuffle the whole zone).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  * Note: No code should rely on this flag for correctness - it's purely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  *       to allow for optimizations when handing back either fresh pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  *       (memory onlining) or untouched pages (page isolation, free page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  *       reporting).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) #define FPI_TO_TAIL		((__force fpi_t)BIT(1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * Don't poison memory with KASAN (only for the tag-based modes).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  * During boot, all non-reserved memblock memory is exposed to page_alloc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  * Poisoning all that memory lengthens boot time, especially on systems with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  * large amount of RAM. This flag is used to skip that poisoning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  * This is only done for the tag-based KASAN modes, as those are able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  * detect memory corruptions with the memory tags assigned by default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118)  * All memory allocated normally after boot gets poisoned as usual.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) #define FPI_SKIP_KASAN_POISON	((__force fpi_t)BIT(2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) static DEFINE_MUTEX(pcp_batch_high_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) #define MIN_PERCPU_PAGELIST_FRACTION	(8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) DEFINE_PER_CPU(int, numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) EXPORT_PER_CPU_SYMBOL(numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) #ifdef CONFIG_HAVE_MEMORYLESS_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135)  * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136)  * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137)  * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  * defined in <linux/topology.h>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) EXPORT_PER_CPU_SYMBOL(_numa_mem_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) /* work_structs for global per-cpu drains */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) struct pcpu_drain {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	struct work_struct work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) static DEFINE_MUTEX(pcpu_drain_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) volatile unsigned long latent_entropy __latent_entropy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) EXPORT_SYMBOL(latent_entropy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158)  * Array of node states.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	[N_POSSIBLE] = NODE_MASK_ALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	[N_ONLINE] = { { [0] = 1UL } },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) #ifndef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	[N_HIGH_MEMORY] = { { [0] = 1UL } },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	[N_MEMORY] = { { [0] = 1UL } },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	[N_CPU] = { { [0] = 1UL } },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) #endif	/* NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) EXPORT_SYMBOL(node_states);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) atomic_long_t _totalram_pages __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) EXPORT_SYMBOL(_totalram_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) unsigned long totalreserve_pages __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) unsigned long totalcma_pages __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) int percpu_pagelist_fraction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) DEFINE_STATIC_KEY_FALSE(init_on_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) EXPORT_SYMBOL(init_on_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) DEFINE_STATIC_KEY_FALSE(init_on_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) EXPORT_SYMBOL(init_on_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) static bool _init_on_alloc_enabled_early __read_mostly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 				= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) static int __init early_init_on_alloc(char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	return kstrtobool(buf, &_init_on_alloc_enabled_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) early_param("init_on_alloc", early_init_on_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) static bool _init_on_free_enabled_early __read_mostly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 				= IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) static int __init early_init_on_free(char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	return kstrtobool(buf, &_init_on_free_enabled_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) early_param("init_on_free", early_init_on_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205)  * A cached value of the page's pageblock's migratetype, used when the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206)  * put on a pcplist. Used to avoid the pageblock migratetype lookup when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)  * freeing from pcplists in most cases, at the cost of possibly becoming stale.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208)  * Also the migratetype set in the page does not necessarily match the pcplist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  * other index - this ensures that it will be put on the correct CMA freelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) static inline int get_pcppage_migratetype(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	return page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) static inline void set_pcppage_migratetype(struct page *page, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	page->index = migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) #ifdef CONFIG_PM_SLEEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  * The following functions are used by the suspend/hibernate code to temporarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  * change gfp_allowed_mask in order to avoid using I/O during memory allocations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  * while devices are suspended.  To avoid races with the suspend/hibernate code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  * they should always be called with system_transition_mutex held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  * (gfp_allowed_mask also should only be modified with system_transition_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * held, unless the suspend/hibernate code is guaranteed not to run in parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  * with that modification).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) static gfp_t saved_gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) void pm_restore_gfp_mask(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	WARN_ON(!mutex_is_locked(&system_transition_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	if (saved_gfp_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 		gfp_allowed_mask = saved_gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 		saved_gfp_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) void pm_restrict_gfp_mask(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	WARN_ON(!mutex_is_locked(&system_transition_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	WARN_ON(saved_gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	saved_gfp_mask = gfp_allowed_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) bool pm_suspended_storage(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) #endif /* CONFIG_PM_SLEEP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) unsigned int pageblock_order __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) static void __free_pages_ok(struct page *page, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 			    fpi_t fpi_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  * results with 256, 32 in the lowmem_reserve sysctl:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270)  *	1G machine -> (16M dma, 784M normal, 224M high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271)  *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272)  *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273)  *	HIGHMEM allocation will leave (224M+784M)/256 of ram reserved in ZONE_DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275)  * TBD: should special case ZONE_DMA32 machines here - in those we normally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276)  * don't need any ZONE_NORMAL reservation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) #ifdef CONFIG_ZONE_DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	[ZONE_DMA] = 256,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) #ifdef CONFIG_ZONE_DMA32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	[ZONE_DMA32] = 256,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	[ZONE_NORMAL] = 32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	[ZONE_HIGHMEM] = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	[ZONE_MOVABLE] = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) static char * const zone_names[MAX_NR_ZONES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) #ifdef CONFIG_ZONE_DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	 "DMA",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) #ifdef CONFIG_ZONE_DMA32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	 "DMA32",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 "Normal",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	 "HighMem",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	 "Movable",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) #ifdef CONFIG_ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	 "Device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) const char * const migratetype_names[MIGRATE_TYPES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	"Unmovable",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	"Movable",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	"Reclaimable",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	"CMA",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	"HighAtomic",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) #ifdef CONFIG_MEMORY_ISOLATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	"Isolate",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	[NULL_COMPOUND_DTOR] = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	[COMPOUND_PAGE_DTOR] = free_compound_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	[HUGETLB_PAGE_DTOR] = free_huge_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	[TRANSHUGE_PAGE_DTOR] = free_transhuge_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334)  * Try to keep at least this much lowmem free.  Do not allow normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335)  * allocations below this point, only high priority ones. Automatically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336)  * tuned according to the amount of memory in the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) int min_free_kbytes = 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) int user_min_free_kbytes = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) #ifdef CONFIG_DISCONTIGMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342)  * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343)  * are not on separate NUMA nodes. Functionally this works but with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344)  * watermark_boost_factor, it can reclaim prematurely as the ranges can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345)  * quite small. By default, do not boost watermarks on discontigmem as in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346)  * many cases very high-order allocations like THP are likely to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  * unsupported and the premature reclaim offsets the advantage of long-term
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  * fragmentation avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) int watermark_boost_factor __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) int watermark_boost_factor __read_mostly = 15000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) int watermark_scale_factor = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357)  * Extra memory for the system to try freeing. Used to temporarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358)  * free memory, to make space for new workloads. Anyone can allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359)  * down to the min watermarks controlled by min_free_kbytes above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) int extra_free_kbytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) static unsigned long nr_kernel_pages __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) static unsigned long nr_all_pages __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) static unsigned long dma_reserve __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) static unsigned long required_kernelcore __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) static unsigned long required_kernelcore_percent __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) static unsigned long required_movablecore __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) static unsigned long required_movablecore_percent __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) static bool mirrored_kernelcore __meminitdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) int movable_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) EXPORT_SYMBOL(movable_zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) #if MAX_NUMNODES > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) unsigned int nr_online_nodes __read_mostly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) EXPORT_SYMBOL(nr_node_ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) EXPORT_SYMBOL(nr_online_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) int page_group_by_mobility_disabled __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391)  * During boot we initialize deferred pages on-demand, as needed, but once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392)  * page_alloc_init_late() has finished, the deferred pages are all initialized,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393)  * and we can permanently disable that path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) static DEFINE_STATIC_KEY_TRUE(deferred_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398)  * Calling kasan_poison_pages() only after deferred memory initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399)  * has completed. Poisoning pages during deferred memory init will greatly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400)  * lengthen the process and cause problem in large memory systems as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401)  * deferred pages initialization is done with interrupt disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403)  * Assuming that there will be no reference to those newly initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404)  * pages before they are ever allocated, this should have no effect on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405)  * KASAN memory tracking as the poison will be properly inserted at page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406)  * allocation time. The only corner case is when pages are allocated by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407)  * on-demand allocation and then freed again before the deferred pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408)  * initialization is done, but this is not likely to happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	return static_branch_unlikely(&deferred_pages) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	       (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	       PageSkipKASanPoison(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) /* Returns true if the struct page for the pfn is uninitialised */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) static inline bool __meminit early_page_uninitialised(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	int nid = early_pfn_to_nid(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430)  * Returns true when the remaining initialisation should be deferred until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431)  * later in the boot cycle when it can be parallelised.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) static bool __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	static unsigned long prev_end_pfn, nr_initialised;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	 * prev_end_pfn static that contains the end of previous zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	 * No need to protect because called very early in boot before smp_init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	if (prev_end_pfn != end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 		prev_end_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		nr_initialised = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	/* Always populate low zones for address-constrained allocations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	 * We start only with one section of pages, more pages are added as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	 * needed until the rest of deferred pages are initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	nr_initialised++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	if ((nr_initialised > PAGES_PER_SECTION) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		NODE_DATA(nid)->first_deferred_pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	return (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	       PageSkipKASanPoison(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) static inline bool early_page_uninitialised(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) /* Return a pointer to the bitmap storing bits affecting a block of pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) static inline unsigned long *get_pageblock_bitmap(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 							unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) #ifdef CONFIG_SPARSEMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	return section_to_usemap(__pfn_to_section(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	return page_zone(page)->pageblock_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) #endif /* CONFIG_SPARSEMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) #ifdef CONFIG_SPARSEMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	pfn &= (PAGES_PER_SECTION-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) #endif /* CONFIG_SPARSEMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506)  * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507)  * @page: The page within the block of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508)  * @pfn: The target page frame number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509)  * @mask: mask of bits that the caller is interested in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511)  * Return: pageblock_bits flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) static __always_inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) unsigned long __get_pfnblock_flags_mask(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 					unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 					unsigned long mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	unsigned long *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	unsigned long bitidx, word_bitidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	unsigned long word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	bitmap = get_pageblock_bitmap(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	bitidx = pfn_to_bitidx(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	word_bitidx = bitidx / BITS_PER_LONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	bitidx &= (BITS_PER_LONG-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	word = bitmap[word_bitidx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	return (word >> bitidx) & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 					unsigned long mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	return __get_pfnblock_flags_mask(page, pfn, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) EXPORT_SYMBOL_GPL(get_pfnblock_flags_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) int isolate_anon_lru_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	if (!PageLRU(page) || !PageAnon(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	if (!get_page_unless_zero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	ret = isolate_lru_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) EXPORT_SYMBOL_GPL(isolate_anon_lru_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561)  * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562)  * @page: The page within the block of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  * @flags: The flags to set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564)  * @pfn: The target page frame number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565)  * @mask: mask of bits that the caller is interested in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 					unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 					unsigned long mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	unsigned long *bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	unsigned long bitidx, word_bitidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	unsigned long old_word, word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	bitmap = get_pageblock_bitmap(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	bitidx = pfn_to_bitidx(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	word_bitidx = bitidx / BITS_PER_LONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	bitidx &= (BITS_PER_LONG-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	mask <<= bitidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	flags <<= bitidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	word = READ_ONCE(bitmap[word_bitidx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		if (word == old_word)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		word = old_word;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) void set_pageblock_migratetype(struct page *page, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	if (unlikely(page_group_by_mobility_disabled &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		     migratetype < MIGRATE_PCPTYPES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		migratetype = MIGRATE_UNMOVABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	set_pfnblock_flags_mask(page, (unsigned long)migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 				page_to_pfn(page), MIGRATETYPE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) #ifdef CONFIG_DEBUG_VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	unsigned long sp, start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		seq = zone_span_seqbegin(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		start_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		sp = zone->spanned_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		if (!zone_spans_pfn(zone, pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 			ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	} while (zone_span_seqretry(zone, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 		pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 			pfn, zone_to_nid(zone), zone->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 			start_pfn, start_pfn + sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) static int page_is_consistent(struct zone *zone, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	if (!pfn_valid_within(page_to_pfn(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	if (zone != page_zone(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641)  * Temporary debugging check for pages not lying within a given zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) static int __maybe_unused bad_range(struct zone *zone, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	if (page_outside_zone_boundaries(zone, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	if (!page_is_consistent(zone, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) static inline int __maybe_unused bad_range(struct zone *zone, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) static void bad_page(struct page *page, const char *reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	static unsigned long resume;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	static unsigned long nr_shown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	static unsigned long nr_unshown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	 * Allow a burst of 60 reports, then keep quiet for that minute;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	 * or allow a steady drip of one report per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	if (nr_shown == 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		if (time_before(jiffies, resume)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			nr_unshown++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		if (nr_unshown) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 			pr_alert(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 			      "BUG: Bad page state: %lu messages suppressed\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 				nr_unshown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			nr_unshown = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		nr_shown = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	if (nr_shown++ == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		resume = jiffies + 60 * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		current->comm, page_to_pfn(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	__dump_page(page, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	dump_page_owner(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	print_modules();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	/* Leave bad fields for debug, except PageBuddy could make trouble */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	page_mapcount_reset(page); /* remove PageBuddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699)  * Higher-order pages are called "compound pages".  They are structured thusly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701)  * The first PAGE_SIZE page is called the "head page" and have PG_head set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703)  * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704)  * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706)  * The first tail page's ->compound_dtor holds the offset in array of compound
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707)  * page destructors. See compound_page_dtors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709)  * The first tail page's ->compound_order holds the order of allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710)  * This usage means that zero-order pages may not be compound.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) void free_compound_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	mem_cgroup_uncharge(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	__free_pages_ok(page, compound_order(page), FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) void prep_compound_page(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	int nr_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	__SetPageHead(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	for (i = 1; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		struct page *p = page + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		set_page_count(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		p->mapping = TAIL_MAPPING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		set_compound_head(p, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	set_compound_order(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	atomic_set(compound_mapcount_ptr(page), -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		atomic_set(compound_pincount_ptr(page), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) #ifdef CONFIG_DEBUG_PAGEALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) unsigned int _debug_guardpage_minorder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) bool _debug_pagealloc_enabled_early __read_mostly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 			= IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) EXPORT_SYMBOL(_debug_pagealloc_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) static int __init early_debug_pagealloc(char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	return kstrtobool(buf, &_debug_pagealloc_enabled_early);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) early_param("debug_pagealloc", early_debug_pagealloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) static int __init debug_guardpage_minorder_setup(char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	unsigned long res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		pr_err("Bad debug_guardpage_minorder value\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	_debug_guardpage_minorder = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	pr_info("Setting debug_guardpage_minorder to %lu\n", res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) static inline bool set_page_guard(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 				unsigned int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	if (!debug_guardpage_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	if (order >= debug_guardpage_minorder())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	__SetPageGuard(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	INIT_LIST_HEAD(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	/* Guard pages are not available for any usage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	__mod_zone_freepage_state(zone, -(1 << order), migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) static inline void clear_page_guard(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 				unsigned int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	if (!debug_guardpage_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	__ClearPageGuard(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	if (!is_migrate_isolate(migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		__mod_zone_freepage_state(zone, (1 << order), migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) static inline bool set_page_guard(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 			unsigned int order, int migratetype) { return false; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) static inline void clear_page_guard(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 				unsigned int order, int migratetype) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * Enable static keys related to various memory debugging and hardening options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * Some override others, and depend on early params that are evaluated in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * order of appearance. So we need to first gather the full picture of what was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  * enabled, and then make decisions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) void init_mem_debugging_and_hardening(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	bool page_poisoning_requested = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) #ifdef CONFIG_PAGE_POISONING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	 * Page poisoning is debug page alloc for some arches. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	 * either of those options are enabled, enable poisoning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	if (page_poisoning_enabled() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	     (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	      debug_pagealloc_enabled())) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		static_branch_enable(&_page_poisoning_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		page_poisoning_requested = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	if (_init_on_alloc_enabled_early) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		if (page_poisoning_requested)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 			pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 				"will take precedence over init_on_alloc\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 			static_branch_enable(&init_on_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	if (_init_on_free_enabled_early) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		if (page_poisoning_requested)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 			pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 				"will take precedence over init_on_free\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 			static_branch_enable(&init_on_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) #ifdef CONFIG_DEBUG_PAGEALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	if (!debug_pagealloc_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	static_branch_enable(&_debug_pagealloc_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	if (!debug_guardpage_minorder())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	static_branch_enable(&_debug_guardpage_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) static inline void set_buddy_order(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	set_page_private(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	__SetPageBuddy(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865)  * This function checks whether a page is free && is the buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866)  * we can coalesce a page and its buddy if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867)  * (a) the buddy is not in a hole (check before calling!) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868)  * (b) the buddy is in the buddy system &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869)  * (c) a page and its buddy have the same order &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870)  * (d) a page and its buddy are in the same zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872)  * For recording whether a page is in the buddy system, we set PageBuddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873)  * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875)  * For recording page's order, we use page_private(page).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) static inline bool page_is_buddy(struct page *page, struct page *buddy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 							unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	if (!page_is_guard(buddy) && !PageBuddy(buddy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	if (buddy_order(buddy) != order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	 * zone check is done late to avoid uselessly calculating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	 * zone/node ids for pages that could never merge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	if (page_zone_id(page) != page_zone_id(buddy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) static inline struct capture_control *task_capc(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	struct capture_control *capc = current->capture_control;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	return unlikely(capc) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		!(current->flags & PF_KTHREAD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		!capc->page &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		capc->cc->zone == zone ? capc : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) compaction_capture(struct capture_control *capc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		   int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	if (!capc || order != capc->cc->order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	/* Do not accidentally pollute CMA or isolated regions*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	if (is_migrate_cma(migratetype) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	    is_migrate_isolate(migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	 * Do not let lower order allocations polluate a movable pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	 * This might let an unmovable request use a reclaimable pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	 * and vice-versa but no more than normal fallback logic which can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	 * have trouble finding a high-order free page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	capc->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) static inline struct capture_control *task_capc(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) compaction_capture(struct capture_control *capc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 		   int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) #endif /* CONFIG_COMPACTION */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) /* Used for pages not on another list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) static inline void add_to_free_list(struct page *page, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 				    unsigned int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	struct free_area *area = &zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	list_add(&page->lru, &area->free_list[migratetype]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	area->nr_free++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) /* Used for pages not on another list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 					 unsigned int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	struct free_area *area = &zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	list_add_tail(&page->lru, &area->free_list[migratetype]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	area->nr_free++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969)  * Used for pages which are on another list. Move the pages to the tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970)  * of the list - so the moved pages won't immediately be considered for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971)  * allocation again (e.g., optimization for memory onlining).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) static inline void move_to_free_list(struct page *page, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 				     unsigned int order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	struct free_area *area = &zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	list_move_tail(&page->lru, &area->free_list[migratetype]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) static inline void del_page_from_free_list(struct page *page, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 					   unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	/* clear reported state and update reported page count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	if (page_reported(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		__ClearPageReported(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	__ClearPageBuddy(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	zone->free_area[order].nr_free--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  * If this is not the largest possible page, check if the buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996)  * of the next-highest order is free. If it is, it's possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997)  * that pages are being freed that will coalesce soon. In case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998)  * that is happening, add the free page to the tail of the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999)  * so it's less likely to be used soon and more likely to be merged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)  * as a higher order page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 		   struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	struct page *higher_page, *higher_buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	unsigned long combined_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	if (order >= MAX_ORDER - 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	if (!pfn_valid_within(buddy_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	combined_pfn = buddy_pfn & pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	higher_page = page + (combined_pfn - pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	higher_buddy = higher_page + (buddy_pfn - combined_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	return pfn_valid_within(buddy_pfn) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	       page_is_buddy(higher_page, higher_buddy, order + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)  * Freeing function for a buddy system allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)  * The concept of a buddy system is to maintain direct-mapped table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)  * (containing bit values) for memory blocks of various "orders".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)  * The bottom level table contains the map for the smallest allocatable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)  * units of memory (here, pages), and each level above it describes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)  * pairs of units from the levels below, hence, "buddies".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)  * At a high level, all that happens here is marking the table entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)  * at the bottom level available, and propagating the changes upward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)  * as necessary, plus some accounting needed to play nicely with other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)  * parts of the VM system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)  * At each level, we keep a list of pages, which are heads of continuous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)  * free pages of length of (1 << order) and marked with PageBuddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)  * Page's order is recorded in page_private(page) field.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)  * So when we are allocating or freeing one, we can derive the state of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)  * other.  That is, if we allocate a small block, and both were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)  * free, the remainder of the region must be split into blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)  * If a block is freed, and its buddy is also free, then this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043)  * triggers coalescing into a block of larger size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)  * -- nyc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) static inline void __free_one_page(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		int migratetype, fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	struct capture_control *capc = task_capc(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	unsigned long buddy_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	unsigned long combined_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	unsigned int max_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	struct page *buddy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	bool to_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	VM_BUG_ON(!zone_is_initialized(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	VM_BUG_ON(migratetype == -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	if (likely(!is_migrate_isolate(migratetype)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) continue_merging:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	while (order < max_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 		if (compaction_capture(capc, page, order, migratetype)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 			__mod_zone_freepage_state(zone, -(1 << order),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 								migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		buddy_pfn = __find_buddy_pfn(pfn, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		buddy = page + (buddy_pfn - pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		if (!pfn_valid_within(buddy_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 			goto done_merging;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		if (!page_is_buddy(page, buddy, order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 			goto done_merging;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		 * merge with it and move up one order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		if (page_is_guard(buddy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 			clear_page_guard(zone, buddy, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 			del_page_from_free_list(buddy, zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		combined_pfn = buddy_pfn & pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 		page = page + (combined_pfn - pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 		pfn = combined_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 		order++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	if (order < MAX_ORDER - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 		/* If we are here, it means order is >= pageblock_order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 		 * We want to prevent merge between freepages on isolate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		 * pageblock and normal pageblock. Without this, pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		 * isolation could cause incorrect freepage or CMA accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		 * We don't want to hit this code for the more frequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 		 * low-order merging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 		if (unlikely(has_isolate_pageblock(zone))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 			int buddy_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 			buddy_pfn = __find_buddy_pfn(pfn, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 			buddy = page + (buddy_pfn - pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 			buddy_mt = get_pageblock_migratetype(buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 			if (migratetype != buddy_mt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 					&& (is_migrate_isolate(migratetype) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 						is_migrate_isolate(buddy_mt)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 				goto done_merging;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		max_order = order + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 		goto continue_merging;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) done_merging:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	set_buddy_order(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	if (fpi_flags & FPI_TO_TAIL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		to_tail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	else if (is_shuffle_order(order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		to_tail = shuffle_pick_tail();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	if (to_tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		add_to_free_list_tail(page, zone, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		add_to_free_list(page, zone, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	/* Notify page reporting subsystem of freed page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		page_reporting_notify_free(order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)  * A bad page could be due to a number of fields. Instead of multiple branches,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)  * try and check multiple fields with one check. The caller must do a detailed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)  * check if necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) static inline bool page_expected_state(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 					unsigned long check_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	if (unlikely(atomic_read(&page->_mapcount) != -1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	if (unlikely((unsigned long)page->mapping |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 			page_ref_count(page) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) #ifdef CONFIG_MEMCG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 			(unsigned long)page->mem_cgroup |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 			(page->flags & check_flags)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) static const char *page_bad_reason(struct page *page, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	const char *bad_reason = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	if (unlikely(atomic_read(&page->_mapcount) != -1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 		bad_reason = "nonzero mapcount";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	if (unlikely(page->mapping != NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 		bad_reason = "non-NULL mapping";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	if (unlikely(page_ref_count(page) != 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 		bad_reason = "nonzero _refcount";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	if (unlikely(page->flags & flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 		if (flags == PAGE_FLAGS_CHECK_AT_PREP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 			bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag(s) set";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 			bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) #ifdef CONFIG_MEMCG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	if (unlikely(page->mem_cgroup))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		bad_reason = "page still charged to cgroup";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	return bad_reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static void check_free_page_bad(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	bad_page(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 		 page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) static inline int check_free_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	/* Something has gone sideways, find it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	check_free_page_bad(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) static int free_tail_pages_check(struct page *head_page, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	 * We rely page->lru.next never has bit 0 set, unless the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	 * is PageTail(). Let's make sure that's true even for poisoned ->lru.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	switch (page - head_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		/* the first tail page: ->mapping may be compound_mapcount() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		if (unlikely(compound_mapcount(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 			bad_page(page, "nonzero compound_mapcount");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		 * the second tail page: ->mapping is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		 * deferred_list.next -- ignore value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		if (page->mapping != TAIL_MAPPING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 			bad_page(page, "corrupted mapping in tail page");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	if (unlikely(!PageTail(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 		bad_page(page, "PageTail not set");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	if (unlikely(compound_head(page) != head_page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 		bad_page(page, "compound_head not consistent");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	clear_compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	if (zero_tags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		for (i = 0; i < numpages; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 			tag_clear_highpage(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	/* s390's use of memset() could override KASAN redzones. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	kasan_disable_current();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	for (i = 0; i < numpages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		u8 tag = page_kasan_tag(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		page_kasan_tag_reset(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		clear_highpage(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		page_kasan_tag_set(page + i, tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	kasan_enable_current();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) static __always_inline bool free_pages_prepare(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 			unsigned int order, bool check_free, fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	int bad = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	VM_BUG_ON_PAGE(PageTail(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	trace_mm_page_free(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	if (unlikely(PageHWPoison(page)) && !order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 		 * Do not let hwpoison pages hit pcplists/buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		 * Untie memcg state and reset page's owner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		if (memcg_kmem_enabled() && PageKmemcg(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 			__memcg_kmem_uncharge_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		reset_page_owner(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		free_page_pinner(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	 * Check tail pages before head page information is cleared to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	 * avoid checking PageCompound for order-0 pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	if (unlikely(order)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 		bool compound = PageCompound(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		if (compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			ClearPageDoubleMap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		for (i = 1; i < (1 << order); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 			if (compound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 				bad += free_tail_pages_check(page, page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 			if (unlikely(check_free_page(page + i))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 				bad++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 			(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	if (PageMappingFlags(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	if (memcg_kmem_enabled() && PageKmemcg(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		__memcg_kmem_uncharge_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	if (check_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		bad += check_free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	if (bad)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	page_cpupid_reset_last(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	reset_page_owner(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	free_page_pinner(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	if (!PageHighMem(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 		debug_check_no_locks_freed(page_address(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 					   PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 		debug_check_no_obj_freed(page_address(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 					   PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	kernel_poison_pages(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	 * As memory initialization might be integrated into KASAN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	 * kasan_free_pages and kernel_init_free_pages must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	 * kept together to avoid discrepancies in behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	 * With hardware tag-based KASAN, memory tags must be set before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	 * page becomes unavailable via debug_pagealloc or arch_free_page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	if (kasan_has_integrated_init()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		if (!skip_kasan_poison)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 			kasan_free_pages(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		bool init = want_init_on_free();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 		if (init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 			kernel_init_free_pages(page, 1 << order, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		if (!skip_kasan_poison)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 			kasan_poison_pages(page, order, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	 * arch_free_page() can make the page's contents inaccessible.  s390
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	 * does this.  So nothing which can access the page's contents should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	 * happen after this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	arch_free_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	debug_pagealloc_unmap_pages(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) #ifdef CONFIG_DEBUG_VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)  * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)  * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)  * moved from pcp lists to free lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) static bool free_pcp_prepare(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	return free_pages_prepare(page, 0, true, FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) static bool bulkfree_pcp_prepare(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	if (debug_pagealloc_enabled_static())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		return check_free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)  * With DEBUG_VM disabled, order-0 pages being freed are checked only when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)  * moving from pcp lists to free list in order to reduce overhead. With
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)  * debug_pagealloc enabled, they are checked also immediately when being freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)  * to the pcp lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) static bool free_pcp_prepare(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	if (debug_pagealloc_enabled_static())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		return free_pages_prepare(page, 0, true, FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 		return free_pages_prepare(page, 0, false, FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static bool bulkfree_pcp_prepare(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	return check_free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) #endif /* CONFIG_DEBUG_VM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) static inline void prefetch_buddy(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	struct page *buddy = page + (buddy_pfn - pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	prefetch(buddy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)  * Frees a number of pages from the PCP lists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)  * Assumes all pages on list are in same zone, and of same order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)  * count is the number of pages to free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)  * If the zone was previously in an "all pages pinned" state then look to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)  * see if this freeing clears that state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)  * And clear the zone's pages_scanned counter, to hold off the "all pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)  * pinned" detection logic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) static void free_pcppages_bulk(struct zone *zone, int count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 					struct per_cpu_pages *pcp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	int migratetype = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	int batch_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	int prefetch_nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	bool isolated_pageblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	struct page *page, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	LIST_HEAD(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	 * Ensure proper count is passed which otherwise would stuck in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	 * below while (list_empty(list)) loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	count = min(pcp->count, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	while (count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		struct list_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 		 * Remove pages from lists in a round-robin fashion. A
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		 * batch_free count is maintained that is incremented when an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		 * empty list is encountered.  This is so more pages are freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 		 * off fuller lists instead of spinning excessively around empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 		 * lists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 			batch_free++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 			if (++migratetype == MIGRATE_PCPTYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 				migratetype = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 			list = &pcp->lists[migratetype];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 		} while (list_empty(list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		/* This is the only non-empty list. Free them all. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		if (batch_free == MIGRATE_PCPTYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 			batch_free = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 			page = list_last_entry(list, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 			/* must delete to avoid corrupting pcp list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 			list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 			pcp->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 			if (bulkfree_pcp_prepare(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 			list_add_tail(&page->lru, &head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 			 * We are going to put the page back to the global
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 			 * pool, prefetch its buddy to speed up later access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 			 * under zone->lock. It is believed the overhead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 			 * an additional test and calculating buddy_pfn here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 			 * can be offset by reduced memory latency later. To
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 			 * avoid excessive prefetching due to large count, only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 			 * prefetch buddy for the first pcp->batch nr of pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 			if (prefetch_nr++ < pcp->batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 				prefetch_buddy(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		} while (--count && --batch_free && !list_empty(list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	isolated_pageblocks = has_isolate_pageblock(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	 * Use safe version since after __free_one_page(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	 * page->lru.next will not point to original list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	list_for_each_entry_safe(page, tmp, &head, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		int mt = get_pcppage_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		/* MIGRATE_ISOLATE page should not go to pcplists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		/* Pageblock could have been isolated meanwhile */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		if (unlikely(isolated_pageblocks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 			mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		__free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 		trace_mm_page_pcpu_drain(page, 0, mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) static void free_one_page(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 				struct page *page, unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 				unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 				int migratetype, fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	if (unlikely(has_isolate_pageblock(zone) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 		is_migrate_isolate(migratetype))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 		migratetype = get_pfnblock_migratetype(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) static void __meminit __init_single_page(struct page *page, unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 				unsigned long zone, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 				bool zero_page_struct __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) #ifdef CONFIG_ROCKCHIP_THUNDER_BOOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	if (zero_page_struct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		mm_zero_struct_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	mm_zero_struct_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	set_page_links(page, zone, nid, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 	init_page_count(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	page_mapcount_reset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	page_cpupid_reset_last(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	page_kasan_tag_reset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	INIT_LIST_HEAD(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) #ifdef WANT_PAGE_VIRTUAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	/* The shift won't overflow because ZONE_NORMAL is below 4G. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	if (!is_highmem_idx(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		set_page_address(page, __va(pfn << PAGE_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) static void __meminit init_reserved_page(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	int nid, zid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	if (!early_page_uninitialised(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	nid = early_pfn_to_nid(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		struct zone *zone = &pgdat->node_zones[zid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 		if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 	__init_single_page(pfn_to_page(pfn), pfn, zid, nid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) static inline void init_reserved_page(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581)  * Initialised pages do not have PageReserved set. This function is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)  * called for each range allocated by the bootmem allocator and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)  * marks the pages PageReserved. The remaining valid pages are later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)  * sent to the buddy page allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	unsigned long start_pfn = PFN_DOWN(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	unsigned long end_pfn = PFN_UP(end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	for (; start_pfn < end_pfn; start_pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 		if (pfn_valid(start_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 			struct page *page = pfn_to_page(start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 			init_reserved_page(start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 			/* Avoid false-positive PageTail() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 			INIT_LIST_HEAD(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 			 * no need for atomic set_bit because the struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 			 * page is not visible yet so nobody should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 			 * access it yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 			__SetPageReserved(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) static void __free_pages_ok(struct page *page, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 			    fpi_t fpi_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	int migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	if (!free_pages_prepare(page, order, true, fpi_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	migratetype = get_pfnblock_migratetype(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	__count_vm_events(PGFREE, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	free_one_page(page_zone(page), page, pfn, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		      fpi_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) void __free_pages_core(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	unsigned int nr_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	struct page *p = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	unsigned int loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	 * When initializing the memmap, __init_single_page() sets the refcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	 * of all pages to 1 ("allocated"/"not free"). We have to set the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	 * refcount of all involved pages to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	prefetchw(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 		prefetchw(p + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		__ClearPageReserved(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 		set_page_count(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	__ClearPageReserved(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	set_page_count(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	 * Bypass PCP and place fresh pages right to the tail, primarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	 * relevant for memory onlining.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	__free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) #ifdef CONFIG_NEED_MULTIPLE_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) int __meminit __early_pfn_to_nid(unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 					struct mminit_pfnnid_cache *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	if (state->last_start <= pfn && pfn < state->last_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		return state->last_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 	if (nid != NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 		state->last_start = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 		state->last_end = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		state->last_nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 	return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) int __meminit early_pfn_to_nid(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	static DEFINE_SPINLOCK(early_pfn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	spin_lock(&early_pfn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	if (nid < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 		nid = first_online_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	spin_unlock(&early_pfn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 	return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) #endif /* CONFIG_NEED_MULTIPLE_NODES */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) void __init memblock_free_pages(struct page *page, unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 							unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 	if (early_page_uninitialised(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	__free_pages_core(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)  * Check that the whole (or subset of) a pageblock given by the interval of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)  * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)  * with the migration of free compaction scanner. The scanners then need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)  * use only pfn_valid_within() check for arches that allow holes within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)  * pageblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)  * Return struct page pointer of start_pfn, or NULL if checks were not passed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)  * It's possible on some configurations to have a setup like node0 node1 node0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)  * i.e. it's possible that all pages within a zones range of pages do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)  * belong to a single zone. We assume that a border between node0 and node1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)  * can occur within a single pageblock, but not a node0 node1 node0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722)  * interleaving within a single pageblock. It is therefore sufficient to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)  * the first and last page of a pageblock and avoid checking each individual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)  * page in a pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 				     unsigned long end_pfn, struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	struct page *start_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	struct page *end_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	/* end_pfn is one past the range we are checking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	end_pfn--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 	if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	start_page = pfn_to_online_page(start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	if (!start_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	if (page_zone(start_page) != zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	end_page = pfn_to_page(end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	/* This gives a shorter code than deriving page_zone(end_page) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	if (page_zone_id(start_page) != page_zone_id(end_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	return start_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) void set_zone_contiguous(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	unsigned long block_start_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	unsigned long block_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 	for (; block_start_pfn < zone_end_pfn(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 			block_start_pfn = block_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 			 block_end_pfn += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 		block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		if (!__pageblock_pfn_to_page(block_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 					     block_end_pfn, zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 	/* We confirm that there is no hole */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	zone->contiguous = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) void clear_zone_contiguous(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	zone->contiguous = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) static void __init deferred_free_range(unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 				       unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 	if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	/* Free a large naturally-aligned chunk if possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	if (nr_pages == pageblock_nr_pages &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	    (pfn & (pageblock_nr_pages - 1)) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 		__free_pages_core(page, pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 		if ((pfn & (pageblock_nr_pages - 1)) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		__free_pages_core(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) /* Completion tracking for deferred_init_memmap() threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) static atomic_t pgdat_init_n_undone __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) static inline void __init pgdat_init_report_one_done(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	if (atomic_dec_and_test(&pgdat_init_n_undone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 		complete(&pgdat_init_all_done_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)  * Returns true if page needs to be initialized or freed to buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)  * First we check if pfn is valid on architectures where it is possible to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)  * holes within pageblock_nr_pages. On systems where it is not possible, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)  * function is optimized out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)  * Then, we check if a current large page is valid by only checking the validity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826)  * of the head pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) static inline bool __init deferred_pfn_valid(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	if (!pfn_valid_within(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)  * Free pages to buddy allocator. Try to free aligned pages in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)  * pageblock_nr_pages sizes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static void __init deferred_free_pages(unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 				       unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	unsigned long nr_pgmask = pageblock_nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	unsigned long nr_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	for (; pfn < end_pfn; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 		if (!deferred_pfn_valid(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 			deferred_free_range(pfn - nr_free, nr_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 			nr_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 		} else if (!(pfn & nr_pgmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 			deferred_free_range(pfn - nr_free, nr_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 			nr_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 			nr_free++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	/* Free the last block of pages to allocator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	deferred_free_range(pfn - nr_free, nr_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)  * Initialize struct pages.  We minimize pfn page lookups and scheduler checks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)  * by performing it only once every pageblock_nr_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)  * Return number of pages initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) static unsigned long  __init deferred_init_pages(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 						 unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 						 unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	unsigned long nr_pgmask = pageblock_nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	int nid = zone_to_nid(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	int zid = zone_idx(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	for (; pfn < end_pfn; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 		if (!deferred_pfn_valid(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 			page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 		} else if (!page || !(pfn & nr_pgmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 			page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 			page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		__init_single_page(page, pfn, zid, nid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	return (nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)  * This function is meant to pre-load the iterator for the zone init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894)  * Specifically it walks through the ranges until we are caught up to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)  * first_init_pfn value and exits there. If we never encounter the value we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)  * return false indicating there are no valid ranges left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) static bool __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 				    unsigned long *spfn, unsigned long *epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 				    unsigned long first_init_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	u64 j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	 * Start out by walking through the ranges in this zone that have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	 * already been initialized. We don't need to do anything with them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 	 * so we just need to flush them out of the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 		if (*epfn <= first_init_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		if (*spfn < first_init_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 			*spfn = first_init_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		*i = j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)  * Initialize and free pages. We do it in two loops: first we initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)  * struct page, then free to buddy allocator, because while we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)  * freeing pages we can access pages that are ahead (computing buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)  * page in __free_one_page()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)  * In order to try and keep some memory in the cache we have the loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)  * broken along max page order boundaries. This way we will not cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)  * any issues with the buddy page computation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) static unsigned long __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 		       unsigned long *end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	unsigned long spfn = *start_pfn, epfn = *end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	u64 j = *i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	/* First we loop through and initialize the page values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 		unsigned long t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 		if (mo_pfn <= *start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		t = min(mo_pfn, *end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 		nr_pages += deferred_init_pages(zone, *start_pfn, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 		if (mo_pfn < *end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 			*start_pfn = mo_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	/* Reset values and now loop through freeing pages as needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	swap(j, *i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 		unsigned long t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		if (mo_pfn <= spfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 		t = min(mo_pfn, epfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 		deferred_free_pages(spfn, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 		if (mo_pfn <= epfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) static void __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 			   void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	unsigned long spfn, epfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	struct zone *zone = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	u64 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	 * Initialize and free pages in MAX_ORDER sized increments so that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	 * can avoid introducing any issues with the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	while (spfn < end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		deferred_init_maxorder(&i, zone, &spfn, &epfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) /* An arch may override for more concurrency. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) __weak int __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) deferred_page_init_max_threads(const struct cpumask *node_cpumask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) /* Initialise remaining memory on a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) static int __init deferred_init_memmap(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	pg_data_t *pgdat = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 	unsigned long spfn = 0, epfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	unsigned long first_init_pfn, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 	unsigned long start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	int zid, max_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 	u64 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 	/* Bind memory initialisation thread to a local node if possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 	if (!cpumask_empty(cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 		set_cpus_allowed_ptr(current, cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	pgdat_resize_lock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	first_init_pfn = pgdat->first_deferred_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 	if (first_init_pfn == ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 		pgdat_resize_unlock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 		pgdat_init_report_one_done();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	/* Sanity check boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 	BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 	pgdat->first_deferred_pfn = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	 * Once we unlock here, the zone cannot be grown anymore, thus if an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 	 * interrupt thread must allocate this early in boot, zone must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 	 * pre-grown prior to start of deferred page initialization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 	pgdat_resize_unlock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	/* Only the highest zone is deferred so find it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 		zone = pgdat->node_zones + zid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 		if (first_init_pfn < zone_end_pfn(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	/* If the zone is empty somebody else may have cleared out the zone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 						 first_init_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 		goto zone_empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 	max_threads = deferred_page_init_max_threads(cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	while (spfn < epfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 		unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 		struct padata_mt_job job = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 			.thread_fn   = deferred_init_memmap_chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 			.fn_arg      = zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 			.start       = spfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 			.size        = epfn_align - spfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 			.align       = PAGES_PER_SECTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 			.min_chunk   = PAGES_PER_SECTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 			.max_threads = max_threads,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 		};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 		padata_do_multithreaded(&job);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 		deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 						    epfn_align);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) zone_empty:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	/* Sanity check that the next zone really is unpopulated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	pr_info("node %d deferred pages initialised in %ums\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 		pgdat->node_id, jiffies_to_msecs(jiffies - start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	pgdat_init_report_one_done();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081)  * If this zone has deferred pages, try to grow it by initializing enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)  * deferred pages to satisfy the allocation specified by order, rounded up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083)  * the nearest PAGES_PER_SECTION boundary.  So we're adding memory in increments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084)  * of SECTION_SIZE bytes by initializing struct pages in increments of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)  * PAGES_PER_SECTION * sizeof(struct page) bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)  * Return true when zone was grown, otherwise return false. We return true even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)  * when we grow less than requested, to let the caller decide if there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089)  * enough pages to satisfy the allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)  * Note: We use noinline because this function is needed only during boot, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)  * it is called from a __ref function _deferred_grow_zone. This way we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)  * making sure that it is not inlined into permanent text section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) static noinline bool __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) deferred_grow_zone(struct zone *zone, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	pg_data_t *pgdat = zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	unsigned long spfn, epfn, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	u64 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	/* Only the last zone may have deferred pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 	pgdat_resize_lock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	 * If someone grew this zone while we were waiting for spinlock, return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	 * true, as there might be enough pages already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	if (first_deferred_pfn != pgdat->first_deferred_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 		pgdat_resize_unlock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	/* If the zone is empty somebody else may have cleared out the zone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 						 first_deferred_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 		pgdat->first_deferred_pfn = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 		pgdat_resize_unlock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		/* Retry only once. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 		return first_deferred_pfn != ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	 * Initialize and free pages in MAX_ORDER sized increments so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	 * that we can avoid introducing any issues with the buddy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	 * allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 	while (spfn < epfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 		/* update our first deferred PFN for this section */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 		first_deferred_pfn = spfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 		nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 		touch_nmi_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 		/* We should only stop along section boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 		if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 		/* If our quota has been met we can stop here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 		if (nr_pages >= nr_pages_needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	pgdat->first_deferred_pfn = spfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	pgdat_resize_unlock(pgdat, &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	return nr_pages > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)  * deferred_grow_zone() is __init, but it is called from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)  * get_page_from_freelist() during early boot until deferred_pages permanently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)  * disables this call. This is why we have refdata wrapper to avoid warning,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)  * and to ensure that the function body gets unloaded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) static bool __ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) _deferred_grow_zone(struct zone *zone, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 	return deferred_grow_zone(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) void __init page_alloc_init_late(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	/* There will be num_node_state(N_MEMORY) threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	for_each_node_state(nid, N_MEMORY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 		kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	/* Block until all are initialised */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	wait_for_completion(&pgdat_init_all_done_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 	 * The number of managed pages has changed due to the initialisation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	 * so the pcpu batch and high limits needs to be updated or the limits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	 * will be artificially small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	for_each_populated_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 		zone_pcp_update(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	 * We initialized the rest of the deferred pages.  Permanently disable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 	 * on-demand struct page initialization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	static_branch_disable(&deferred_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	/* Reinit limits that are based on free pages after the kernel is up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 	files_maxfiles_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	/* Discard memblock private memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	memblock_discard();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	for_each_node_state(nid, N_MEMORY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 		shuffle_free_memory(NODE_DATA(nid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	for_each_populated_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 		set_zone_contiguous(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) void __init init_cma_reserved_pageblock(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	unsigned i = pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 	struct page *p = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 		__ClearPageReserved(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 		set_page_count(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 	} while (++p, --i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	set_pageblock_migratetype(page, MIGRATE_CMA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	if (pageblock_order >= MAX_ORDER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 		i = pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 		p = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 			set_page_refcounted(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 			__free_pages(p, MAX_ORDER - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 			p += MAX_ORDER_NR_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 		} while (i -= MAX_ORDER_NR_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 		set_page_refcounted(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 		__free_pages(page, pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	adjust_managed_page_count(page, pageblock_nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	page_zone(page)->cma_pages += pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)  * The order of subdivision here is critical for the IO subsystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)  * Please do not alter this order without good reasons and regression
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)  * testing. Specifically, as large blocks of memory are subdivided,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)  * the order in which smaller blocks are delivered depends on the order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251)  * they're subdivided in this function. This is the primary factor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)  * influencing the order in which pages are delivered to the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)  * subsystem according to empirical testing, and this is also justified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)  * by considering the behavior of a buddy system containing a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)  * large block of memory acted on by a series of small allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256)  * This behavior is a critical factor in sglist merging's success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)  * -- nyc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) static inline void expand(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	int low, int high, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	unsigned long size = 1 << high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	while (high > low) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 		high--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 		size >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 		VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 		 * Mark as guard pages (or page), that will allow to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		 * merge back to allocator when buddy will be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 		 * Corresponding page table entries will not be touched,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 		 * pages will stay not present in virtual address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		if (set_page_guard(zone, &page[size], high, migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		add_to_free_list(&page[size], zone, high, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		set_buddy_order(&page[size], high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) static void check_new_page_bad(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	if (unlikely(page->flags & __PG_HWPOISON)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		/* Don't complain about hwpoisoned pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 		page_mapcount_reset(page); /* remove PageBuddy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 	bad_page(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 		 page_bad_reason(page, PAGE_FLAGS_CHECK_AT_PREP));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297)  * This page is about to be returned from the page allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) static inline int check_new_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	if (likely(page_expected_state(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 				PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	check_new_page_bad(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) #ifdef CONFIG_DEBUG_VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)  * With DEBUG_VM enabled, order-0 pages are checked for expected state when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312)  * being allocated from pcp lists. With debug_pagealloc also enabled, they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)  * also checked when pcp lists are refilled from the free lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) static inline bool check_pcp_refill(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	if (debug_pagealloc_enabled_static())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 		return check_new_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) static inline bool check_new_pcp(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 	return check_new_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)  * With DEBUG_VM disabled, free order-0 pages are checked for expected state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330)  * when pcp lists are being refilled from the free lists. With debug_pagealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)  * enabled, they are also checked when being allocated from the pcp lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) static inline bool check_pcp_refill(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	return check_new_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) static inline bool check_new_pcp(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	if (debug_pagealloc_enabled_static())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 		return check_new_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) #endif /* CONFIG_DEBUG_VM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) static bool check_new_pages(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	for (i = 0; i < (1 << order); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 		struct page *p = page + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 		if (unlikely(check_new_page(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) inline void post_alloc_hook(struct page *page, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 				gfp_t gfp_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 	set_page_refcounted(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	arch_alloc_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 	debug_pagealloc_map_pages(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	 * Page unpoisoning must happen before memory initialization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	 * Otherwise, the poison pattern will be overwritten for __GFP_ZERO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	 * allocations and the page unpoisoning code will complain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 	kernel_unpoison_pages(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	 * As memory initialization might be integrated into KASAN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	 * kasan_alloc_pages and kernel_init_free_pages must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	 * kept together to avoid discrepancies in behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	if (kasan_has_integrated_init()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		kasan_alloc_pages(page, order, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 		kasan_unpoison_pages(page, order, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		if (init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 			kernel_init_free_pages(page, 1 << order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 					       gfp_flags & __GFP_ZEROTAGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	set_page_owner(page, order, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 							unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 	post_alloc_hook(page, order, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	if (order && (gfp_flags & __GFP_COMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 		prep_compound_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 	 * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	 * allocate the page. The expectation is that the caller is taking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 	 * steps that will free more memory. The caller should avoid the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 	 * being used for !PFMEMALLOC purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 	if (alloc_flags & ALLOC_NO_WATERMARKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 		set_page_pfmemalloc(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 		clear_page_pfmemalloc(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415)  * Go through the free lists for the given migratetype and remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)  * the smallest available page from the freelists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) static __always_inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 						int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 	unsigned int current_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 	struct free_area *area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 	/* Find a page of the appropriate size in the preferred list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 	for (current_order = order; current_order < MAX_ORDER; ++current_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 		area = &(zone->free_area[current_order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		page = get_page_from_free_area(area, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 		del_page_from_free_list(page, zone, current_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 		expand(zone, page, order, current_order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 		set_pcppage_migratetype(page, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443)  * This array describes the order lists are fallen back to when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)  * the free lists for the desirable migrate type are depleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) static int fallbacks[MIGRATE_TYPES][3] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) #ifdef CONFIG_MEMORY_ISOLATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 					unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	return __rmqueue_smallest(zone, order, MIGRATE_CMA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 					unsigned int order) { return NULL; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470)  * Move the free pages in a range to the freelist tail of the requested type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)  * Note that start_page and end_pages are not aligned on a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)  * boundary. If alignment is required, use move_freepages_block()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) static int move_freepages(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 			  struct page *start_page, struct page *end_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 			  int migratetype, int *num_movable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 	int pages_moved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	for (page = start_page; page <= end_page;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 		if (!pfn_valid_within(page_to_pfn(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 			page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 		if (!PageBuddy(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 			 * We assume that pages that could be isolated for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 			 * migration are movable. But we don't actually try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 			 * isolating, as that would be expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 			if (num_movable &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 					(PageLRU(page) || __PageMovable(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 				(*num_movable)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 			page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 		/* Make sure we are not inadvertently changing nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 		VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 		VM_BUG_ON_PAGE(page_zone(page) != zone, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 		order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 		move_to_free_list(page, zone, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 		page += 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 		pages_moved += 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 	return pages_moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) int move_freepages_block(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 				int migratetype, int *num_movable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	struct page *start_page, *end_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	if (num_movable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 		*num_movable = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	start_pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 	start_pfn = start_pfn & ~(pageblock_nr_pages-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 	start_page = pfn_to_page(start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	end_page = start_page + pageblock_nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	end_pfn = start_pfn + pageblock_nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 	/* Do not cross zone boundaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 	if (!zone_spans_pfn(zone, start_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		start_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	if (!zone_spans_pfn(zone, end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 	return move_freepages(zone, start_page, end_page, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 								num_movable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) static void change_pageblock_range(struct page *pageblock_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 					int start_order, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	int nr_pageblocks = 1 << (start_order - pageblock_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	while (nr_pageblocks--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 		set_pageblock_migratetype(pageblock_page, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 		pageblock_page += pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)  * When we are falling back to another migratetype during allocation, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)  * steal extra free pages from the same pageblocks to satisfy further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554)  * allocations, instead of polluting multiple pageblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)  * If we are stealing a relatively large buddy page, it is likely there will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557)  * be more free pages in the pageblock, so try to steal them all. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558)  * reclaimable and unmovable allocations, we steal regardless of page size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)  * as fragmentation caused by those allocations polluting movable pageblocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560)  * is worse than movable allocations stealing from unmovable and reclaimable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)  * pageblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static bool can_steal_fallback(unsigned int order, int start_mt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	 * Leaving this order check is intended, although there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	 * relaxed order check in next check. The reason is that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 	 * we can actually steal whole pageblock if this condition met,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 	 * but, below check doesn't guarantee it and that is just heuristic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	 * so could be changed anytime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	if (order >= pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 	if (order >= pageblock_order / 2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 		start_mt == MIGRATE_RECLAIMABLE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 		start_mt == MIGRATE_UNMOVABLE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 		page_group_by_mobility_disabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) static inline bool boost_watermark(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 	unsigned long max_boost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 	if (!watermark_boost_factor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 	 * Don't bother in zones that are unlikely to produce results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 	 * On small machines, including kdump capture kernels running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	 * in a small area, boosting the watermark can cause an out of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	 * memory situation immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 	if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 	max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 			watermark_boost_factor, 10000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 	 * high watermark may be uninitialised if fragmentation occurs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 	 * very early in boot so do not boost. We do not fall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 	 * through and boost by pageblock_nr_pages as failing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 	 * allocations that early means that reclaim is not going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 	 * to help and it may even be impossible to reclaim the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 	 * boosted watermark resulting in a hang.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 	if (!max_boost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 	max_boost = max(pageblock_nr_pages, max_boost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 	zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		max_boost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)  * This function implements actual steal behaviour. If order is large enough,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)  * we can steal whole pageblock. If not, we first move freepages in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624)  * pageblock to our migratetype and determine how many already-allocated pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625)  * are there in the pageblock with a compatible migratetype. If at least half
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)  * of pages are free or compatible, we can change migratetype of the pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627)  * itself, so pages freed in the future will be put on the correct free list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) static void steal_suitable_fallback(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 		unsigned int alloc_flags, int start_type, bool whole_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 	unsigned int current_order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 	int free_pages, movable_pages, alike_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 	int old_block_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	old_block_type = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 	 * This can happen due to races and we want to prevent broken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 	 * highatomic accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 	if (is_migrate_highatomic(old_block_type))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 		goto single_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 	/* Take ownership for orders >= pageblock_order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 	if (current_order >= pageblock_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 		change_pageblock_range(page, current_order, start_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		goto single_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 	 * Boost watermarks to increase reclaim pressure to reduce the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	 * likelihood of future fallbacks. Wake kswapd now as the node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 	 * may be balanced overall and kswapd will not wake naturally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 	if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 		set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	/* We are not allowed to try stealing from the whole block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	if (!whole_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 		goto single_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	free_pages = move_freepages_block(zone, page, start_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 						&movable_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 	 * Determine how many pages are compatible with our allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	 * For movable allocation, it's the number of movable pages which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 	 * we just obtained. For other types it's a bit more tricky.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 	if (start_type == MIGRATE_MOVABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 		alike_pages = movable_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 		 * If we are falling back a RECLAIMABLE or UNMOVABLE allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 		 * to MOVABLE pageblock, consider all non-movable pages as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 		 * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 		 * vice versa, be conservative since we can't distinguish the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 		 * exact migratetype of non-movable pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 		if (old_block_type == MIGRATE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 			alike_pages = pageblock_nr_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 						- (free_pages + movable_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 			alike_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 	/* moving whole block can fail due to zone boundary conditions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 	if (!free_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 		goto single_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 	 * If a sufficient number of pages in the block are either free or of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 	 * comparable migratability as our allocation, claim the whole block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 	if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 			page_group_by_mobility_disabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 		set_pageblock_migratetype(page, start_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) single_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 	move_to_free_list(page, zone, current_order, start_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)  * Check whether there is a suitable fallback freepage with requested order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707)  * If only_stealable is true, this function returns fallback_mt only if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)  * we can steal other freepages all together. This would help to reduce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709)  * fragmentation due to mixed migratetype pages in one pageblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) int find_suitable_fallback(struct free_area *area, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 			int migratetype, bool only_stealable, bool *can_steal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 	int fallback_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	if (area->nr_free == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 	*can_steal = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 	for (i = 0;; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 		fallback_mt = fallbacks[migratetype][i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		if (fallback_mt == MIGRATE_TYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 		if (free_area_empty(area, fallback_mt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 		if (can_steal_fallback(order, migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 			*can_steal = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 		if (!only_stealable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 			return fallback_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 		if (*can_steal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 			return fallback_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)  * Reserve a pageblock for exclusive use of high-order atomic allocations if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)  * there are no empty page blocks that contain a page with a suitable order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 				unsigned int alloc_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	int mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	unsigned long max_managed, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	 * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 	 * Check is race-prone but harmless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 	max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	if (zone->nr_reserved_highatomic >= max_managed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 	/* Recheck the nr_reserved_highatomic limit under the lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 	if (zone->nr_reserved_highatomic >= max_managed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 	/* Yoink! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 	mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	    && !is_migrate_cma(mt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 		zone->nr_reserved_highatomic += pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 		set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 		move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 	spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780)  * Used when an allocation is about to fail under memory pressure. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781)  * potentially hurts the reliability of high-order allocations when under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)  * intense memory pressure but failed atomic allocations should be easier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)  * to recover from than an OOM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)  * If @force is true, try to unreserve a pageblock even though highatomic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)  * pageblock is exhausted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 						bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	struct zonelist *zonelist = ac->zonelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 								ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 		 * Preserve at least one pageblock unless memory pressure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 		 * is really high.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 		if (!force && zone->nr_reserved_highatomic <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 					pageblock_nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 		spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 		for (order = 0; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 			struct free_area *area = &(zone->free_area[order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 			page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 			if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 			 * In page freeing path, migratetype change is racy so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 			 * we can counter several free pages in a pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 			 * in this loop althoug we changed the pageblock type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 			 * from highatomic to ac->migratetype. So we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 			 * adjust the count once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 			if (is_migrate_highatomic_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 				 * It should never happen but changes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 				 * locking could inadvertently allow a per-cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 				 * drain to add pages to MIGRATE_HIGHATOMIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 				 * while unreserving so be safe and watch for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 				 * underflows.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 				zone->nr_reserved_highatomic -= min(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 						pageblock_nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 						zone->nr_reserved_highatomic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 			 * Convert to ac->migratetype and avoid the normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 			 * pageblock stealing heuristics. Minimally, the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 			 * is doing the work and needs the pages. More
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 			 * importantly, if the block was always converted to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 			 * MIGRATE_UNMOVABLE or another type then the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 			 * of pageblocks that cannot be completely freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 			 * may increase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 			set_pageblock_migratetype(page, ac->migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 			ret = move_freepages_block(zone, page, ac->migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 									NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 				spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 		spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861)  * Try finding a free buddy page on the fallback list and put it on the free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862)  * list of requested migratetype, possibly along with other pages from the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)  * block, depending on fragmentation avoidance heuristics. Returns true if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864)  * fallback was found so that __rmqueue_smallest() can grab it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)  * The use of signed ints for order and current_order is a deliberate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867)  * deviation from the rest of this file, to make the for loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868)  * condition simpler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) static __always_inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 						unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 	struct free_area *area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 	int current_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 	int min_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 	int fallback_mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 	bool can_steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 	 * Do not steal pages from freelists belonging to other pageblocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 	 * i.e. orders < pageblock_order. If there are no local zones free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 	 * the zonelists will be reiterated without ALLOC_NOFRAGMENT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 	if (alloc_flags & ALLOC_NOFRAGMENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 		min_order = pageblock_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	 * Find the largest available free page in the other list. This roughly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 	 * approximates finding the pageblock with the most free pages, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 	 * would be too costly to do exactly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	for (current_order = MAX_ORDER - 1; current_order >= min_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 				--current_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 		area = &(zone->free_area[current_order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 		fallback_mt = find_suitable_fallback(area, current_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 				start_migratetype, false, &can_steal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 		if (fallback_mt == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 		 * We cannot steal all free pages from the pageblock and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 		 * requested migratetype is movable. In that case it's better to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 		 * steal and split the smallest available page instead of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 		 * largest available page, because even if the next movable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 		 * allocation falls back into a different pageblock than this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 		 * one, it won't cause permanent fragmentation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 		if (!can_steal && start_migratetype == MIGRATE_MOVABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 					&& current_order > order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 			goto find_smallest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 		goto do_steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) find_smallest:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 	for (current_order = order; current_order < MAX_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 							current_order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		area = &(zone->free_area[current_order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 		fallback_mt = find_suitable_fallback(area, current_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 				start_migratetype, false, &can_steal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 		if (fallback_mt != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	 * This should not happen - we already found a suitable fallback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 	 * when looking for the largest page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	VM_BUG_ON(current_order == MAX_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) do_steal:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 	page = get_page_from_free_area(area, fallback_mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 	steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 								can_steal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 	trace_mm_page_alloc_extfrag(page, order, current_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 		start_migratetype, fallback_mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949)  * Do the hard work of removing an element from the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950)  * Call me with the zone->lock already held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) static __always_inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) __rmqueue(struct zone *zone, unsigned int order, int migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 						unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 	page = __rmqueue_smallest(zone, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 	if (unlikely(!page) && __rmqueue_fallback(zone, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 						  alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 	trace_mm_page_alloc_zone_locked(page, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) static struct page *__rmqueue_cma(struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 				  int migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 				  unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 	struct page *page = __rmqueue_cma_fallback(zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 	trace_mm_page_alloc_zone_locked(page, order, MIGRATE_CMA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) static inline struct page *__rmqueue_cma(struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 					 int migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 					 unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988)  * Obtain a specified number of elements from the buddy allocator, all under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)  * a single hold of the lock, for efficiency.  Add them to the supplied list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990)  * Returns the number of new pages which were placed at *list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) static int rmqueue_bulk(struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 			unsigned long count, struct list_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 			int migratetype, unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 	int i, alloced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 	spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 	for (i = 0; i < count; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 		if (is_migrate_cma(migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 			page = __rmqueue_cma(zone, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 					     alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 			page = __rmqueue(zone, order, migratetype, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 		if (unlikely(page == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 		if (unlikely(check_pcp_refill(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 		 * Split buddy pages returned by expand() are received here in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 		 * physical page order. The page is added to the tail of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 		 * caller's list. From the callers perspective, the linked list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 		 * is ordered by page number under some conditions. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 		 * useful for IO devices that can forward direction from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 		 * head, thus also in the physical page order. This is useful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 		 * for IO devices that can merge IO requests if the physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 		 * pages are ordered properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 		list_add_tail(&page->lru, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 		alloced++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 		if (is_migrate_cma(get_pcppage_migratetype(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 					      -(1 << order));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 	 * i pages were removed from the buddy list even if some leak due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 	 * to check_pcp_refill failing so adjust NR_FREE_PAGES based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 	 * on i. Do not confuse with 'alloced' which is the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 	 * pages added to the pcp list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 	spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 	return alloced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)  * Return the pcp list that corresponds to the migrate type if that list isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044)  * empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045)  * If the list is empty return NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) static struct list_head *get_populated_pcp_list(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) 			unsigned int order, struct per_cpu_pages *pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) 			int migratetype, unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) 	struct list_head *list = &pcp->lists[migratetype];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 	if (list_empty(list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) 		pcp->count += rmqueue_bulk(zone, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 				pcp->batch, list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 				migratetype, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 		if (list_empty(list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 			list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	return list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066)  * Called from the vmstat counter updater to drain pagesets of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067)  * currently executing processor on remote nodes after they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068)  * expired.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070)  * Note that this function must be called with the thread pinned to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071)  * a single processor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) 	int to_drain, batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 	batch = READ_ONCE(pcp->batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 	to_drain = min(pcp->count, batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 	if (to_drain > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) 		free_pcppages_bulk(zone, to_drain, pcp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)  * Drain pcplists of the indicated processor and zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090)  * The processor must either be the current processor and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091)  * thread pinned to the current processor or a processor that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092)  * is not online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) static void drain_pages_zone(unsigned int cpu, struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 	struct per_cpu_pageset *pset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 	struct per_cpu_pages *pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) 	pset = per_cpu_ptr(zone->pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) 	pcp = &pset->pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 	if (pcp->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 		free_pcppages_bulk(zone, pcp->count, pcp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110)  * Drain pcplists of all zones on the indicated processor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112)  * The processor must either be the current processor and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113)  * thread pinned to the current processor or a processor that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114)  * is not online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) static void drain_pages(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 	for_each_populated_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) 		drain_pages_zone(cpu, zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126)  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128)  * The CPU has to be pinned. When zone parameter is non-NULL, spill just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129)  * the single zone's pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) void drain_local_pages(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 	int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 	if (zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 		drain_pages_zone(cpu, zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 		drain_pages(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) static void drain_local_pages_wq(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 	struct pcpu_drain *drain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 	drain = container_of(work, struct pcpu_drain, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) 	 * drain_all_pages doesn't use proper cpu hotplug protection so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 	 * we can race with cpu offline when the WQ can move this from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) 	 * a cpu pinned worker to an unbound one. We can operate on a different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) 	 * cpu which is allright but we also have to make sure to not move to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 	 * a different one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 	drain_local_pages(drain->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160)  * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162)  * When zone parameter is non-NULL, spill just the single zone's pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164)  * Note that this can be extremely slow as the draining happens in a workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) void drain_all_pages(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) 	 * Allocate in the BSS so we wont require allocation in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) 	 * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 	static cpumask_t cpus_with_pcps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) 	 * Make sure nobody triggers this path before mm_percpu_wq is fully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) 	 * initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) 	if (WARN_ON_ONCE(!mm_percpu_wq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) 	 * Do not drain if one is already in progress unless it's specific to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) 	 * a zone. Such callers are primarily CMA and memory hotplug and need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) 	 * the drain to be complete when the call returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) 	if (unlikely(!mutex_trylock(&pcpu_drain_mutex))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) 		if (!zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) 		mutex_lock(&pcpu_drain_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) 	 * We don't care about racing with CPU hotplug event
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) 	 * as offline notification will cause the notified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 	 * cpu to drain that CPU pcps and on_each_cpu_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) 	 * disables preemption as part of its processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 	for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 		struct per_cpu_pageset *pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 		struct zone *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 		bool has_pcps = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 		if (zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 			pcp = per_cpu_ptr(zone->pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 			if (pcp->pcp.count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 				has_pcps = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 			for_each_populated_zone(z) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 				pcp = per_cpu_ptr(z->pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 				if (pcp->pcp.count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 					has_pcps = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 		if (has_pcps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 			cpumask_set_cpu(cpu, &cpus_with_pcps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 			cpumask_clear_cpu(cpu, &cpus_with_pcps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 	for_each_cpu(cpu, &cpus_with_pcps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 		drain->zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 		INIT_WORK(&drain->work, drain_local_pages_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 		queue_work_on(cpu, mm_percpu_wq, &drain->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 	for_each_cpu(cpu, &cpus_with_pcps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) 	mutex_unlock(&pcpu_drain_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) #ifdef CONFIG_HIBERNATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241)  * Touch the watchdog for every WD_PAGE_COUNT pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) #define WD_PAGE_COUNT	(128*1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) void mark_free_pages(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 	unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 	unsigned int order, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 	if (zone_is_empty(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) 	max_zone_pfn = zone_end_pfn(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) 		if (pfn_valid(pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 			page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) 			if (!--page_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 				touch_nmi_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) 				page_count = WD_PAGE_COUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) 			if (page_zone(page) != zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 			if (!swsusp_page_is_forbidden(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) 				swsusp_unset_page_free(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 	for_each_migratetype_order(order, t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 		list_for_each_entry(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 				&zone->free_area[order].free_list[t], lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) 			unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) 			pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 			for (i = 0; i < (1UL << order); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) 				if (!--page_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) 					touch_nmi_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 					page_count = WD_PAGE_COUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) 				swsusp_set_page_free(pfn_to_page(pfn + i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 	spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) #endif /* CONFIG_PM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) 	int migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 	if (!free_pcp_prepare(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 	migratetype = get_pfnblock_migratetype(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) 	set_pcppage_migratetype(page, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) static void free_unref_page_commit(struct page *page, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 	struct zone *zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) 	struct per_cpu_pages *pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) 	int migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) 	bool pcp_skip_cma_pages = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) 	migratetype = get_pcppage_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) 	__count_vm_event(PGFREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) 	 * We only track unmovable, reclaimable and movable on pcp lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) 	 * Free ISOLATE pages back to the allocator because they are being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) 	 * offlined but treat HIGHATOMIC as movable pages so we can get those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 	 * areas back if necessary. Otherwise, we may have to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) 	 * excessively into the page allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 	if (migratetype >= MIGRATE_PCPTYPES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) 		trace_android_vh_pcplist_add_cma_pages_bypass(migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 			&pcp_skip_cma_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) 		if (unlikely(is_migrate_isolate(migratetype)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) 				pcp_skip_cma_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 			free_one_page(zone, page, pfn, 0, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) 				      FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) 		migratetype = MIGRATE_MOVABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) 	list_add(&page->lru, &pcp->lists[migratetype]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) 	pcp->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) 	if (pcp->count >= pcp->high) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) 		unsigned long batch = READ_ONCE(pcp->batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) 		free_pcppages_bulk(zone, batch, pcp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344)  * Free a 0-order page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) void free_unref_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) 	if (!free_unref_page_prepare(page, pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) 	free_unref_page_commit(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360)  * Free a list of 0-order pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) void free_unref_page_list(struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 	struct page *page, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) 	unsigned long flags, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) 	int batch_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) 	/* Prepare pages for freeing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) 	list_for_each_entry_safe(page, next, list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) 		pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) 		if (!free_unref_page_prepare(page, pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) 			list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) 		set_page_private(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) 	list_for_each_entry_safe(page, next, list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) 		unsigned long pfn = page_private(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) 		set_page_private(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) 		trace_mm_page_free_batched(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) 		free_unref_page_commit(page, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) 		 * Guard against excessive IRQ disabled times when we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 		 * a large list of pages to free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 		if (++batch_count == SWAP_CLUSTER_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 			local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 			batch_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 			local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398)  * split_page takes a non-compound higher-order page, and splits it into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399)  * n (1<<order) sub-pages: page[0..n]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400)  * Each sub-page must be freed individually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402)  * Note: this is probably too low level an operation for use in drivers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403)  * Please consult with lkml before using this in your driver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) void split_page(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 	VM_BUG_ON_PAGE(PageCompound(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	VM_BUG_ON_PAGE(!page_count(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) 	for (i = 1; i < (1 << order); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) 		set_page_refcounted(page + i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) 	split_page_owner(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) 	split_page_memcg(page, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) EXPORT_SYMBOL_GPL(split_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) int __isolate_free_page(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) 	unsigned long watermark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) 	int mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) 	BUG_ON(!PageBuddy(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) 	zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) 	mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) 	if (!is_migrate_isolate(mt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 		 * Obey watermarks as if the page was being allocated. We can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) 		 * emulate a high-order watermark check with a raised order-0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) 		 * watermark, because we already know our high-order page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) 		 * exists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) 		watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) 		if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) 		__mod_zone_freepage_state(zone, -(1UL << order), mt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) 	/* Remove page from free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) 	del_page_from_free_list(page, zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) 	 * Set the pageblock if the isolated page is at least half of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) 	 * pageblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) 	if (order >= pageblock_order - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) 		struct page *endpage = page + (1 << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) 		for (; page < endpage; page += pageblock_nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) 			int mt = get_pageblock_migratetype(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) 			if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) 			    && !is_migrate_highatomic(mt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) 				set_pageblock_migratetype(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) 							  MIGRATE_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) 	return 1UL << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468)  * __putback_isolated_page - Return a now-isolated page back where we got it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469)  * @page: Page that was isolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470)  * @order: Order of the isolated page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471)  * @mt: The page's pageblock's migratetype
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473)  * This function is meant to return a page pulled from the free lists via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)  * __isolate_free_page back to the free lists they were pulled from.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) void __putback_isolated_page(struct page *page, unsigned int order, int mt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) 	struct zone *zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) 	/* zone lock should be held when this function is called */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) 	lockdep_assert_held(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) 	/* Return isolated page to tail of freelist. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) 	__free_one_page(page, page_to_pfn(page), zone, order, mt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) 			FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489)  * Update NUMA hit/miss statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491)  * Must be called with interrupts disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) 	enum numa_stat_item local_stat = NUMA_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) 	/* skip numa counters update if numa stats is disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) 	if (!static_branch_likely(&vm_numa_stat_key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) 	if (zone_to_nid(z) != numa_node_id())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) 		local_stat = NUMA_OTHER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) 	if (zone_to_nid(z) == zone_to_nid(preferred_zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) 		__inc_numa_state(z, NUMA_HIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) 		__inc_numa_state(z, NUMA_MISS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) 		__inc_numa_state(preferred_zone, NUMA_FOREIGN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) 	__inc_numa_state(z, local_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) /* Remove page from the per-cpu list, caller must protect the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 			unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) 			struct per_cpu_pages *pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 			gfp_t gfp_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) 	struct list_head *list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) 		/* First try to get CMA pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) 		if (migratetype == MIGRATE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 				alloc_flags & ALLOC_CMA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 			list = get_populated_pcp_list(zone, 0, pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) 					get_cma_migrate_type(), alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 		if (list == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) 			 * Either CMA is not suitable or there are no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) 			 * free CMA pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) 			list = get_populated_pcp_list(zone, 0, pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) 					migratetype, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) 			if (unlikely(list == NULL) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) 					unlikely(list_empty(list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) 				return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) 		page = list_first_entry(list, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) 		pcp->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) 	} while (check_new_pcp(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) /* Lock and remove page from the per-cpu list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) static struct page *rmqueue_pcplist(struct zone *preferred_zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) 			struct zone *zone, gfp_t gfp_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) 			int migratetype, unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) 	struct per_cpu_pages *pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) 				 gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) 	if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) 		zone_statistics(preferred_zone, zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574)  * Allocate a page from the given zone. Use pcplists for order-0 allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) static inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) struct page *rmqueue(struct zone *preferred_zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) 			struct zone *zone, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 			gfp_t gfp_flags, unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 			int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 	if (likely(order == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) 		page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) 				       migratetype, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) 	 * We most definitely don't want callers attempting to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) 	 * allocate greater than order-1 page units with __GFP_NOFAIL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) 	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) 		page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) 		 * order-0 request can reach here when the pcplist is skipped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) 		 * due to non-CMA allocation context. HIGHATOMIC area is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) 		 * reserved for high-order atomic allocation, so order-0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) 		 * request should skip it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) 		if (order > 0 && alloc_flags & ALLOC_HARDER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) 			if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) 				trace_mm_page_alloc_zone_locked(page, order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) 			if (migratetype == MIGRATE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 					alloc_flags & ALLOC_CMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) 				page = __rmqueue_cma(zone, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) 						     alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) 			if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) 				page = __rmqueue(zone, order, migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) 						 alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) 	} while (page && check_new_pages(page, order));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) 	spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 		goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) 	__mod_zone_freepage_state(zone, -(1 << order),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 				  get_pcppage_migratetype(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 	zone_statistics(preferred_zone, zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 	trace_android_vh_rmqueue(preferred_zone, zone, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) 			gfp_flags, alloc_flags, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) 	/* Separate test+clear to avoid unnecessary atomics */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) 	if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) 		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) 		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) 	VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) #ifdef CONFIG_FAIL_PAGE_ALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) static struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) 	struct fault_attr attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) 	bool ignore_gfp_highmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) 	bool ignore_gfp_reclaim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) 	u32 min_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) } fail_page_alloc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) 	.attr = FAULT_ATTR_INITIALIZER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) 	.ignore_gfp_reclaim = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) 	.ignore_gfp_highmem = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) 	.min_order = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) static int __init setup_fail_page_alloc(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 	return setup_fault_attr(&fail_page_alloc.attr, str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) __setup("fail_page_alloc=", setup_fail_page_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) 	if (order < fail_page_alloc.min_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) 	if (gfp_mask & __GFP_NOFAIL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) 	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) 	if (fail_page_alloc.ignore_gfp_reclaim &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) 			(gfp_mask & __GFP_DIRECT_RECLAIM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) 	return should_fail(&fail_page_alloc.attr, 1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) static int __init fail_page_alloc_debugfs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 	umode_t mode = S_IFREG | 0600;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) 	struct dentry *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) 	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) 					&fail_page_alloc.attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) 	debugfs_create_bool("ignore-gfp-wait", mode, dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 			    &fail_page_alloc.ignore_gfp_reclaim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) 	debugfs_create_bool("ignore-gfp-highmem", mode, dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 			    &fail_page_alloc.ignore_gfp_highmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 	debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) late_initcall(fail_page_alloc_debugfs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) #else /* CONFIG_FAIL_PAGE_ALLOC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) #endif /* CONFIG_FAIL_PAGE_ALLOC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) 	return __should_fail_alloc_page(gfp_mask, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) static inline long __zone_watermark_unusable_free(struct zone *z,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) 				unsigned int order, unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) 	const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) 	long unusable_free = (1 << order) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) 	 * If the caller does not have rights to ALLOC_HARDER then subtract
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) 	 * the high-atomic reserves. This will over-estimate the size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) 	 * atomic reserve but it avoids a search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) 	if (likely(!alloc_harder))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) 		unusable_free += z->nr_reserved_highatomic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) 	/* If allocation can't use CMA areas don't use free CMA pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) 	if (!(alloc_flags & ALLOC_CMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) 	return unusable_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746)  * Return true if free base pages are above 'mark'. For high-order checks it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747)  * will return true of the order-0 watermark is reached and there is at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748)  * one free page of a suitable size. Checking now avoids taking the zone lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749)  * to check in the allocation paths if no pages are free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) 			 int highest_zoneidx, unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) 			 long free_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) 	long min = mark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) 	int o;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) 	const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) 	/* free_pages may go negative - that's OK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) 	free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) 	if (alloc_flags & ALLOC_HIGH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) 		min -= min / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) 	if (unlikely(alloc_harder)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) 		 * OOM victims can try even harder than normal ALLOC_HARDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) 		 * users on the grounds that it's definitely going to be in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) 		 * the exit path shortly and free memory. Any allocation it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) 		 * makes during the free path will be small and short-lived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) 		if (alloc_flags & ALLOC_OOM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) 			min -= min / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) 			min -= min / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) 	 * Check watermarks for an order-0 allocation request. If these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) 	 * are not met, then a high-order request also cannot go ahead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) 	 * even if a suitable page happened to be free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) 	if (free_pages <= min + z->lowmem_reserve[highest_zoneidx])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) 	/* If this is an order-0 request then the watermark is fine */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) 	if (!order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) 	/* For a high-order request, check at least one suitable page is free */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) 	for (o = order; o < MAX_ORDER; o++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) 		struct free_area *area = &z->free_area[o];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) 		int mt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) 		if (!area->nr_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) 		for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) 			 * Note that this check is needed only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) 			 * when MIGRATE_CMA < MIGRATE_PCPTYPES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) 			if (mt == MIGRATE_CMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) 			if (!free_area_empty(area, mt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) 				return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) 		if ((alloc_flags & ALLOC_CMA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) 		    !free_area_empty(area, MIGRATE_CMA)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) 		if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) 		      int highest_zoneidx, unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) 	return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) 					zone_page_state(z, NR_FREE_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) EXPORT_SYMBOL_GPL(zone_watermark_ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) 				unsigned long mark, int highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) 				unsigned int alloc_flags, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) 	long free_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) 	free_pages = zone_page_state(z, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) 	 * Fast check for order-0 only. If this fails then the reserves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) 	 * need to be calculated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) 	if (!order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) 		long fast_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) 		fast_free = free_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) 		fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) 		if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) 	if (__zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) 					free_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) 	 * Ignore watermark boosting for GFP_ATOMIC order-0 allocations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) 	 * when checking the min watermark. The min watermark is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) 	 * point where boosting is ignored so that kswapd is woken up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) 	 * when below the low watermark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) 	if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) 		&& ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) 		mark = z->_watermark[WMARK_MIN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) 		return __zone_watermark_ok(z, order, mark, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) 					alloc_flags, free_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) 			unsigned long mark, int highest_zoneidx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) 		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) 	return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) 								free_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) EXPORT_SYMBOL_GPL(zone_watermark_ok_safe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) 	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) 				node_reclaim_distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) #else	/* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) #endif	/* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898)  * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899)  * fragmentation is subtle. If the preferred zone was HIGHMEM then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900)  * premature use of a lower zone may cause lowmem pressure problems that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901)  * are worse than fragmentation. If the next zone is ZONE_DMA then it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902)  * probably too small. It only makes sense to spread allocations to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903)  * fragmentation between the Normal and DMA32 zones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) static inline unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) 	unsigned int alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) 	 * __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) 	 * to save a branch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) 	alloc_flags = (__force int) (gfp_mask & __GFP_KSWAPD_RECLAIM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) #ifdef CONFIG_ZONE_DMA32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) 	if (!zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) 		return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 	if (zone_idx(zone) != ZONE_NORMAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) 		return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) 	 * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) 	 * the pointer is within zone->zone_pgdat->node_zones[]. Also assume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) 	 * on UMA that if Normal is populated then so is DMA32.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) 	BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) 	if (nr_online_nodes > 1 && !populated_zone(--zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) 		return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) 	alloc_flags |= ALLOC_NOFRAGMENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) #endif /* CONFIG_ZONE_DMA32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) 	return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) 					unsigned int alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) 	unsigned int pflags = current->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) 	if (!(pflags & PF_MEMALLOC_NOCMA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) 			gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) 			gfp_mask & __GFP_CMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) 		alloc_flags |= ALLOC_CMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) 	return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953)  * get_page_from_freelist goes through the zonelist trying to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954)  * a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) static struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) 						const struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) 	struct pglist_data *last_pgdat_dirty_limit = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) 	bool no_fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) 	 * Scan zonelist, looking for a zone with enough free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) 	 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) 	no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) 	z = ac->preferred_zoneref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) 	for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) 					ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) 		unsigned long mark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) 		if (cpusets_enabled() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) 			(alloc_flags & ALLOC_CPUSET) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) 			!__cpuset_zone_allowed(zone, gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) 		 * When allocating a page cache page for writing, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) 		 * want to get it from a node that is within its dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) 		 * limit, such that no single node holds more than its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) 		 * proportional share of globally allowed dirty pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) 		 * The dirty limits take into account the node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) 		 * lowmem reserves and high watermark so that kswapd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) 		 * should be able to balance it without having to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) 		 * write pages from its LRU list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) 		 * XXX: For now, allow allocations to potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) 		 * exceed the per-node dirty limit in the slowpath
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) 		 * (spread_dirty_pages unset) before going into reclaim,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) 		 * which is important when on a NUMA setup the allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) 		 * nodes are together not big enough to reach the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) 		 * global limit.  The proper fix for these situations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) 		 * will require awareness of nodes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) 		 * dirty-throttling and the flusher threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) 		if (ac->spread_dirty_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) 			if (last_pgdat_dirty_limit == zone->zone_pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) 			if (!node_dirty_ok(zone->zone_pgdat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) 				last_pgdat_dirty_limit = zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) 		if (no_fallback && nr_online_nodes > 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) 		    zone != ac->preferred_zoneref->zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) 			int local_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) 			 * If moving to a remote node, retry but allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) 			 * fragmenting fallbacks. Locality is more important
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) 			 * than fragmentation avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) 			local_nid = zone_to_nid(ac->preferred_zoneref->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) 			if (zone_to_nid(zone) != local_nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) 				alloc_flags &= ~ALLOC_NOFRAGMENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) 				goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) 		mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) 		if (!zone_watermark_fast(zone, order, mark,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) 				       ac->highest_zoneidx, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) 				       gfp_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) 			int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) 			 * Watermark failed for this zone, but see if we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) 			 * grow this zone if it contains deferred pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) 			if (static_branch_unlikely(&deferred_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) 				if (_deferred_grow_zone(zone, order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) 					goto try_this_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) 			/* Checked here to keep the fast path fast */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) 			BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) 			if (alloc_flags & ALLOC_NO_WATERMARKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) 				goto try_this_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) 			if (node_reclaim_mode == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) 			    !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) 			ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) 			switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) 			case NODE_RECLAIM_NOSCAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) 				/* did not scan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) 			case NODE_RECLAIM_FULL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) 				/* scanned but unreclaimable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) 				/* did we reclaim enough */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) 				if (zone_watermark_ok(zone, order, mark,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) 					ac->highest_zoneidx, alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) 					goto try_this_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) try_this_zone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) 				gfp_mask, alloc_flags, ac->migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) 			prep_new_page(page, order, gfp_mask, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) 			 * If this is a high-order atomic allocation then check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) 			 * if the pageblock should be reserved for the future
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) 			if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) 				reserve_highatomic_pageblock(page, zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) 			/* Try again if zone has deferred pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) 			if (static_branch_unlikely(&deferred_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) 				if (_deferred_grow_zone(zone, order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) 					goto try_this_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) 	 * It's possible on a UMA machine to get through all zones that are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) 	 * fragmented. If avoiding fragmentation, reset and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) 	if (no_fallback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) 		alloc_flags &= ~ALLOC_NOFRAGMENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) 	unsigned int filter = SHOW_MEM_FILTER_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) 	 * This documents exceptions given to allocations in certain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) 	 * contexts that are allowed to allocate outside current's set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) 	 * of allowed nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) 	if (!(gfp_mask & __GFP_NOMEMALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) 		if (tsk_is_oom_victim(current) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) 		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) 			filter &= ~SHOW_MEM_FILTER_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) 	if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) 		filter &= ~SHOW_MEM_FILTER_NODES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) 	show_mem(filter, nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) 	struct va_format vaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) 	va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) 	static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) 	if ((gfp_mask & __GFP_NOWARN) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) 	     !__ratelimit(&nopage_rs) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) 	     ((gfp_mask & __GFP_DMA) && !has_managed_dma()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) 	va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) 	vaf.fmt = fmt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) 	vaf.va = &args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) 	pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) 			current->comm, &vaf, gfp_mask, &gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) 			nodemask_pr_args(nodemask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) 	va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) 	cpuset_print_current_mems_allowed();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) 	pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) 	dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) 	warn_alloc_show_mem(gfp_mask, nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) static inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) 			      unsigned int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) 			      const struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) 	page = get_page_from_freelist(gfp_mask, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) 			alloc_flags|ALLOC_CPUSET, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) 	 * fallback to ignore cpuset restriction if our nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) 	 * are depleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) 		page = get_page_from_freelist(gfp_mask, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) 				alloc_flags, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) static inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) 	const struct alloc_context *ac, unsigned long *did_some_progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) 	struct oom_control oc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) 		.zonelist = ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) 		.nodemask = ac->nodemask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) 		.memcg = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) 		.gfp_mask = gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) 		.order = order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) 	*did_some_progress = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) 	 * Acquire the oom lock.  If that fails, somebody else is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) 	 * making progress for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) 	if (!mutex_trylock(&oom_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) 		*did_some_progress = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) 		schedule_timeout_uninterruptible(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) 	 * Go through the zonelist yet one more time, keep very high watermark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) 	 * here, this is only to catch a parallel oom killing, we must fail if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) 	 * we're still under heavy pressure. But make sure that this reclaim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) 	 * attempt shall not depend on __GFP_DIRECT_RECLAIM && !__GFP_NORETRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) 	 * allocation which will never fail due to oom_lock already held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) 	page = get_page_from_freelist((gfp_mask | __GFP_HARDWALL) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) 				      ~__GFP_DIRECT_RECLAIM, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) 				      ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) 	/* Coredumps can quickly deplete all memory reserves */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) 	if (current->flags & PF_DUMPCORE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) 	/* The OOM killer will not help higher order allocs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) 	if (order > PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) 	 * We have already exhausted all our reclaim opportunities without any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) 	 * success so it is time to admit defeat. We will skip the OOM killer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) 	 * because it is very likely that the caller has a more reasonable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) 	 * fallback than shooting a random task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) 	 * The OOM killer may not free memory on a specific node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) 	if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) 	/* The OOM killer does not needlessly kill tasks for lowmem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) 	if (ac->highest_zoneidx < ZONE_NORMAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) 	if (pm_suspended_storage())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) 	 * XXX: GFP_NOFS allocations should rather fail than rely on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) 	 * other request to make a forward progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) 	 * We are in an unfortunate situation where out_of_memory cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) 	 * do much for this context but let's try it to at least get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) 	 * access to memory reserved if the current task is killed (see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) 	 * out_of_memory). Once filesystems are ready to handle allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) 	 * failures more gracefully we should just bail out here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) 	/* Exhausted what can be done so it's blame time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) 	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) 		*did_some_progress = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) 		 * Help non-failing allocations by giving them access to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) 		 * reserves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 		if (gfp_mask & __GFP_NOFAIL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) 			page = __alloc_pages_cpuset_fallback(gfp_mask, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) 					ALLOC_NO_WATERMARKS, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) 	mutex_unlock(&oom_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257)  * Maximum number of compaction retries wit a progress before OOM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258)  * killer is consider as the only way to move forward.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) #define MAX_COMPACT_RETRIES 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) /* Try memory compaction for high-order allocations before reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) static struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) 		unsigned int alloc_flags, const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) 		enum compact_priority prio, enum compact_result *compact_result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) 	unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) 	unsigned int noreclaim_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) 	if (!order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) 	psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) 	noreclaim_flag = memalloc_noreclaim_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) 	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) 								prio, &page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) 	memalloc_noreclaim_restore(noreclaim_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) 	psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) 	 * At least in one zone compaction wasn't deferred or skipped, so let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) 	 * count a compaction stall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) 	count_vm_event(COMPACTSTALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) 	/* Prep a captured page if available */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) 		prep_new_page(page, order, gfp_mask, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) 	/* Try get a page from the freelist if available */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) 		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) 	if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) 		struct zone *zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) 		zone->compact_blockskip_flush = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) 		compaction_defer_reset(zone, order, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) 		count_vm_event(COMPACTSUCCESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) 	 * It's bad if compaction run occurs and fails. The most likely reason
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) 	 * is that pages exist, but not enough to satisfy watermarks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) 	count_vm_event(COMPACTFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) 	cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) 		     enum compact_result compact_result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) 		     enum compact_priority *compact_priority,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) 		     int *compaction_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) 	int max_retries = MAX_COMPACT_RETRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) 	int min_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) 	int retries = *compaction_retries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) 	enum compact_priority priority = *compact_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) 	if (!order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) 	if (compaction_made_progress(compact_result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) 		(*compaction_retries)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) 	 * compaction considers all the zone as desperately out of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) 	 * so it doesn't really make much sense to retry except when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) 	 * failure could be caused by insufficient priority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) 	if (compaction_failed(compact_result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) 		goto check_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) 	 * compaction was skipped because there are not enough order-0 pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) 	 * to work with, so we retry only if it looks like reclaim can help.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) 	if (compaction_needs_reclaim(compact_result)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) 		ret = compaction_zonelist_suitable(ac, order, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) 	 * make sure the compaction wasn't deferred or didn't bail out early
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) 	 * due to locks contention before we declare that we should give up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) 	 * But the next retry should use a higher priority if allowed, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) 	 * we don't just keep bailing out endlessly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) 	if (compaction_withdrawn(compact_result)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) 		goto check_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) 	 * !costly requests are much more important than __GFP_RETRY_MAYFAIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) 	 * costly ones because they are de facto nofail and invoke OOM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) 	 * killer to move on while costly can fail and users are ready
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) 	 * to cope with that. 1/4 retries is rather arbitrary but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) 	 * would need much more detailed feedback from compaction to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) 	 * make a better decision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) 	if (order > PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) 		max_retries /= 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) 	if (*compaction_retries <= max_retries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) 		ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) 	 * Make sure there are attempts at the highest priority if we exhausted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) 	 * all retries or failed at the lower priorities.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) check_priority:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) 	min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) 			MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) 	if (*compact_priority > min_priority) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) 		(*compact_priority)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) 		*compaction_retries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) 		ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) 	trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) static inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) 		unsigned int alloc_flags, const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) 		enum compact_priority prio, enum compact_result *compact_result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) 	*compact_result = COMPACT_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) 		     enum compact_result compact_result,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) 		     enum compact_priority *compact_priority,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) 		     int *compaction_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) 	if (!order || order > PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) 	 * There are setups with compaction disabled which would prefer to loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) 	 * inside the allocator rather than hit the oom killer prematurely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) 	 * Let's give them a good hope and keep retrying while the order-0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) 	 * watermarks are OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) 				ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) 		if (zone_watermark_ok(zone, 0, min_wmark_pages(zone),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) 					ac->highest_zoneidx, alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) #endif /* CONFIG_COMPACTION */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) #ifdef CONFIG_LOCKDEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) static struct lockdep_map __fs_reclaim_map =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) 	STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) static bool __need_fs_reclaim(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) 	gfp_mask = current_gfp_context(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) 	/* no reclaim without waiting on it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) 	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) 	/* this guy won't enter reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) 	if (current->flags & PF_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) 	/* We're only interested __GFP_FS allocations for now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) 	if (!(gfp_mask & __GFP_FS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) 	if (gfp_mask & __GFP_NOLOCKDEP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) void __fs_reclaim_acquire(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) 	lock_map_acquire(&__fs_reclaim_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) void __fs_reclaim_release(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) 	lock_map_release(&__fs_reclaim_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) void fs_reclaim_acquire(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) 	if (__need_fs_reclaim(gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) 		__fs_reclaim_acquire();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) void fs_reclaim_release(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) 	if (__need_fs_reclaim(gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) 		__fs_reclaim_release();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) EXPORT_SYMBOL_GPL(fs_reclaim_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) /* Perform direct synchronous page reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) __perform_reclaim(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) 					const struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) 	unsigned int noreclaim_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) 	unsigned long progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) 	cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) 	/* We now go into synchronous reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) 	cpuset_memory_pressure_bump();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) 	fs_reclaim_acquire(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) 	noreclaim_flag = memalloc_noreclaim_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) 	progress = try_to_free_pages(ac->zonelist, order, gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) 								ac->nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) 	memalloc_noreclaim_restore(noreclaim_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) 	fs_reclaim_release(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) 	cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) 	return progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) /* The really slow allocator path where we enter direct reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) static inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) 		unsigned int alloc_flags, const struct alloc_context *ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) 		unsigned long *did_some_progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) 	unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) 	bool drained = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) 	bool skip_pcp_drain = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) 	psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) 	*did_some_progress = __perform_reclaim(gfp_mask, order, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) 	if (unlikely(!(*did_some_progress)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) 	 * If an allocation failed after direct reclaim, it could be because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) 	 * pages are pinned on the per-cpu lists or in high alloc reserves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) 	 * Shrink them and try again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) 	if (!page && !drained) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) 		unreserve_highatomic_pageblock(ac, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) 		trace_android_vh_drain_all_pages_bypass(gfp_mask, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) 			alloc_flags, ac->migratetype, *did_some_progress, &skip_pcp_drain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) 		if (!skip_pcp_drain)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) 			drain_all_pages(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) 		drained = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) 	psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) 			     const struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) 	pg_data_t *last_pgdat = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) 	enum zone_type highest_zoneidx = ac->highest_zoneidx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) 					ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) 		if (last_pgdat != zone->zone_pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) 			wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) 		last_pgdat = zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) static inline unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) gfp_to_alloc_flags(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) 	unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) 	 * __GFP_HIGH is assumed to be the same as ALLOC_HIGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) 	 * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) 	 * to save two branches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) 	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) 	BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) 	 * The caller may dip into page reserves a bit more if the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) 	 * cannot run direct reclaim, or if the caller has realtime scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) 	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) 	 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) 	alloc_flags |= (__force int)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) 		(gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) 	if (gfp_mask & __GFP_ATOMIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) 		 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) 		 * if it can't schedule.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) 		if (!(gfp_mask & __GFP_NOMEMALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) 			alloc_flags |= ALLOC_HARDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) 		 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) 		 * comment for __cpuset_node_allowed().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) 		alloc_flags &= ~ALLOC_CPUSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) 	} else if (unlikely(rt_task(current)) && !in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) 		alloc_flags |= ALLOC_HARDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) 	alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) 	return alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) static bool oom_reserves_allowed(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) 	if (!tsk_is_oom_victim(tsk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) 	 * !MMU doesn't have oom reaper so give access to memory reserves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) 	 * only to the thread with TIF_MEMDIE set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) 	if (!IS_ENABLED(CONFIG_MMU) && !test_thread_flag(TIF_MEMDIE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624)  * Distinguish requests which really need access to full memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625)  * reserves from oom victims which can live with a portion of it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) static inline int __gfp_pfmemalloc_flags(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) 	if (unlikely(gfp_mask & __GFP_NOMEMALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) 	if (gfp_mask & __GFP_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) 		return ALLOC_NO_WATERMARKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) 	if (in_serving_softirq() && (current->flags & PF_MEMALLOC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) 		return ALLOC_NO_WATERMARKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) 	if (!in_interrupt()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) 		if (current->flags & PF_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) 			return ALLOC_NO_WATERMARKS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) 		else if (oom_reserves_allowed(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) 			return ALLOC_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) 	return !!__gfp_pfmemalloc_flags(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651)  * Checks whether it makes sense to retry the reclaim to make a forward progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652)  * for the given allocation request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654)  * We give up when we either have tried MAX_RECLAIM_RETRIES in a row
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655)  * without success, or when we couldn't even meet the watermark if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656)  * reclaimed all remaining pages on the LRU lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658)  * Returns true if a retry is viable or false to enter the oom path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) should_reclaim_retry(gfp_t gfp_mask, unsigned order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) 		     struct alloc_context *ac, int alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) 		     bool did_some_progress, int *no_progress_loops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) 	 * Costly allocations might have made a progress but this doesn't mean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) 	 * their order will become available due to high fragmentation so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) 	 * always increment the no progress counter for them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) 	if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) 		*no_progress_loops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) 		(*no_progress_loops)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) 	 * Make sure we converge to OOM if we cannot make any progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) 	 * several times in the row.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) 	if (*no_progress_loops > MAX_RECLAIM_RETRIES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) 		/* Before OOM, exhaust highatomic_reserve */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) 		return unreserve_highatomic_pageblock(ac, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) 	 * Keep reclaiming pages while there is a chance this will lead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) 	 * somewhere.  If none of the target zones can satisfy our allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) 	 * request even if all reclaimable pages are considered then we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) 	 * screwed and have to go OOM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) 				ac->highest_zoneidx, ac->nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) 		unsigned long available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) 		unsigned long reclaimable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) 		unsigned long min_wmark = min_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) 		bool wmark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) 		available = reclaimable = zone_reclaimable_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) 		 * Would the allocation succeed if we reclaimed all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) 		 * reclaimable pages?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) 		wmark = __zone_watermark_ok(zone, order, min_wmark,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) 				ac->highest_zoneidx, alloc_flags, available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) 		trace_reclaim_retry_zone(z, order, reclaimable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) 				available, min_wmark, *no_progress_loops, wmark);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) 		if (wmark) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) 			 * If we didn't make any progress and have a lot of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) 			 * dirty + writeback pages then we should wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) 			 * an IO to complete to slow down the reclaim and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) 			 * prevent from pre mature OOM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) 			if (!did_some_progress) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) 				unsigned long write_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) 				write_pending = zone_page_state_snapshot(zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) 							NR_ZONE_WRITE_PENDING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) 				if (2 * write_pending > reclaimable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) 					congestion_wait(BLK_RW_ASYNC, HZ/10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) 					return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) 			ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) 	 * Memory allocation/reclaim might be called from a WQ context and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) 	 * current implementation of the WQ concurrency control doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) 	 * recognize that a particular WQ is congested if the worker thread is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) 	 * looping without ever sleeping. Therefore we have to do a short sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) 	 * here rather than calling cond_resched().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) 	if (current->flags & PF_WQ_WORKER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) 		schedule_timeout_uninterruptible(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) 	 * It's possible that cpuset's mems_allowed and the nodemask from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) 	 * mempolicy don't intersect. This should be normally dealt with by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) 	 * policy_nodemask(), but it's possible to race with cpuset update in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) 	 * such a way the check therein was true, and then it became false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) 	 * before we got our cpuset_mems_cookie here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) 	 * This assumes that for all allocations, ac->nodemask can come only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) 	 * from MPOL_BIND mempolicy (whose documented semantics is to be ignored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) 	 * when it does not intersect with the cpuset restrictions) or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) 	 * caller can deal with a violated nodemask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) 	if (cpusets_enabled() && ac->nodemask &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) 			!cpuset_nodemask_valid_mems_allowed(ac->nodemask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) 		ac->nodemask = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) 	 * When updating a task's mems_allowed or mempolicy nodemask, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) 	 * possible to race with parallel threads in such a way that our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) 	 * allocation can fail while the mask is being updated. If we are about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) 	 * to fail, check if the cpuset changed during allocation and if so,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) 	 * retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) 	if (read_mems_allowed_retry(cpuset_mems_cookie))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) static inline struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) 						struct alloc_context *ac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) 	bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) 	const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) 	unsigned int alloc_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) 	unsigned long did_some_progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) 	enum compact_priority compact_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) 	enum compact_result compact_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) 	int compaction_retries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) 	int no_progress_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) 	unsigned int cpuset_mems_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) 	int reserve_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) 	 * We also sanity check to catch abuse of atomic reserves being used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) 	 * callers that are not in atomic context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) 	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) 				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) 		gfp_mask &= ~__GFP_ATOMIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) retry_cpuset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) 	compaction_retries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) 	no_progress_loops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) 	compact_priority = DEF_COMPACT_PRIORITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) 	cpuset_mems_cookie = read_mems_allowed_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) 	 * The fast path uses conservative alloc_flags to succeed only until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) 	 * kswapd needs to be woken up, and to avoid the cost of setting up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) 	 * alloc_flags precisely. So we do that now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) 	alloc_flags = gfp_to_alloc_flags(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) 	 * We need to recalculate the starting point for the zonelist iterator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) 	 * because we might have used different nodemask in the fast path, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) 	 * there was a cpuset modification and we are retrying - otherwise we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) 	 * could end up iterating over non-eligible zones endlessly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) 					ac->highest_zoneidx, ac->nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) 	if (!ac->preferred_zoneref->zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) 	if (alloc_flags & ALLOC_KSWAPD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) 		wake_all_kswapds(order, gfp_mask, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) 	 * The adjusted alloc_flags might result in immediate success, so try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) 	 * that first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) 		goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) 	 * For costly allocations, try direct compaction first, as it's likely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) 	 * that we have enough base pages and don't need to reclaim. For non-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) 	 * movable high-order allocations, do that as well, as compaction will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) 	 * try prevent permanent fragmentation by migrating from blocks of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) 	 * same migratetype.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) 	 * Don't try this for allocations that are allowed to ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) 	 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) 	if (can_direct_reclaim &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) 			(costly_order ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) 			   (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) 			&& !gfp_pfmemalloc_allowed(gfp_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) 		page = __alloc_pages_direct_compact(gfp_mask, order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) 						alloc_flags, ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) 						INIT_COMPACT_PRIORITY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) 						&compact_result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) 			goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) 		 * Checks for costly allocations with __GFP_NORETRY, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) 		 * includes some THP page fault allocations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) 		if (costly_order && (gfp_mask & __GFP_NORETRY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) 			 * If allocating entire pageblock(s) and compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) 			 * failed because all zones are below low watermarks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) 			 * or is prohibited because it recently failed at this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) 			 * order, fail immediately unless the allocator has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) 			 * requested compaction and reclaim retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) 			 * Reclaim is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) 			 *  - potentially very expensive because zones are far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) 			 *    below their low watermarks or this is part of very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) 			 *    bursty high order allocations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) 			 *  - not guaranteed to help because isolate_freepages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) 			 *    may not iterate over freed pages as part of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) 			 *    linear scan, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) 			 *  - unlikely to make entire pageblocks free on its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) 			 *    own.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) 			if (compact_result == COMPACT_SKIPPED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) 			    compact_result == COMPACT_DEFERRED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) 				goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) 			 * Looks like reclaim/compaction is worth trying, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) 			 * sync compaction could be very expensive, so keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) 			 * using async compaction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) 			compact_priority = INIT_COMPACT_PRIORITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) 	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) 	if (alloc_flags & ALLOC_KSWAPD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) 		wake_all_kswapds(order, gfp_mask, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) 	reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) 	if (reserve_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) 		alloc_flags = current_alloc_flags(gfp_mask, reserve_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) 	 * Reset the nodemask and zonelist iterators if memory policies can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) 	 * ignored. These allocations are high priority and system rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) 	 * user oriented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) 	if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) 		ac->nodemask = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) 		ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) 					ac->highest_zoneidx, ac->nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) 	/* Attempt with potentially adjusted zonelist and alloc_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) 	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) 		goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) 	/* Caller is not willing to reclaim, we can't balance anything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) 	if (!can_direct_reclaim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) 	/* Avoid recursion of direct reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) 	if (current->flags & PF_MEMALLOC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) 	/* Try direct reclaim and then allocating */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) 	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) 							&did_some_progress);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) 		goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) 	/* Try direct compaction and then allocating */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) 	page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) 					compact_priority, &compact_result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) 		goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) 	/* Do not loop if specifically requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) 	if (gfp_mask & __GFP_NORETRY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) 	 * Do not retry costly high order allocations unless they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) 	 * __GFP_RETRY_MAYFAIL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) 	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) 				 did_some_progress > 0, &no_progress_loops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) 	 * It doesn't make any sense to retry for the compaction if the order-0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) 	 * reclaim is not able to make any progress because the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) 	 * implementation of the compaction depends on the sufficient amount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) 	 * of free memory (see __compaction_suitable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) 	if (did_some_progress > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) 			should_compact_retry(ac, order, alloc_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) 				compact_result, &compact_priority,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) 				&compaction_retries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) 	/* Deal with possible cpuset update races before we start OOM killing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) 	if (check_retry_cpuset(cpuset_mems_cookie, ac))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) 		goto retry_cpuset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) 	/* Reclaim has failed us, start killing things */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) 	page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) 	if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) 		goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) 	/* Avoid allocations with no watermarks from looping endlessly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) 	if (tsk_is_oom_victim(current) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) 	    (alloc_flags & ALLOC_OOM ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) 	     (gfp_mask & __GFP_NOMEMALLOC)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) 		goto nopage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) 	/* Retry as long as the OOM killer is making progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) 	if (did_some_progress) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) 		no_progress_loops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) nopage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) 	/* Deal with possible cpuset update races before we fail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) 	if (check_retry_cpuset(cpuset_mems_cookie, ac))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) 		goto retry_cpuset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) 	 * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) 	 * we always retry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) 	if (gfp_mask & __GFP_NOFAIL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) 		 * All existing users of the __GFP_NOFAIL are blockable, so warn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) 		 * of any new users that actually require GFP_NOWAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) 		if (WARN_ON_ONCE(!can_direct_reclaim))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) 		 * PF_MEMALLOC request from this context is rather bizarre
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) 		 * because we cannot reclaim anything and only can loop waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) 		 * for somebody to do a work for us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) 		WARN_ON_ONCE(current->flags & PF_MEMALLOC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) 		 * non failing costly orders are a hard requirement which we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) 		 * are not prepared for much so let's warn about these users
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) 		 * so that we can identify them and convert them to something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) 		 * else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) 		WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) 		 * Help non-failing allocations by giving them access to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) 		 * reserves but do not use ALLOC_NO_WATERMARKS because this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) 		 * could deplete whole memory reserves which would just make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) 		 * the situation worse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) 		page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) 			goto got_pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) 	warn_alloc(gfp_mask, ac->nodemask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) 			"page allocation failure: order:%u", order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) got_pg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) 		int preferred_nid, nodemask_t *nodemask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) 		struct alloc_context *ac, gfp_t *alloc_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) 		unsigned int *alloc_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) 	ac->highest_zoneidx = gfp_zone(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) 	ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) 	ac->nodemask = nodemask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) 	ac->migratetype = gfp_migratetype(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) 	if (cpusets_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) 		*alloc_mask |= __GFP_HARDWALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) 		 * When we are in the interrupt context, it is irrelevant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) 		 * to the current task context. It means that any node ok.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) 		if (!in_interrupt() && !ac->nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) 			ac->nodemask = &cpuset_current_mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) 			*alloc_flags |= ALLOC_CPUSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) 	fs_reclaim_acquire(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) 	fs_reclaim_release(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) 	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) 	if (should_fail_alloc_page(gfp_mask, order))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) 	*alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) 	/* Dirty zone balancing only done in the fast path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) 	ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) 	 * The preferred zone is used for statistics but crucially it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) 	 * also used as the starting point for the zonelist iterator. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) 	 * may get reset for allocations that ignore memory policies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) 					ac->highest_zoneidx, ac->nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091)  * This is the 'heart' of the zoned buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) struct page *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) 							nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) 	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) 	struct alloc_context ac = { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) 	 * There are several places where we assume that the order value is sane
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) 	 * so bail out early if the request is out of bound.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) 	if (unlikely(order >= MAX_ORDER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) 		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) 	gfp_mask &= gfp_allowed_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) 	alloc_mask = gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) 	 * Forbid the first pass from falling back to types that fragment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) 	 * memory until all local zones are considered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) 	alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) 	/* First allocation attempt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) 	page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) 	if (likely(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) 	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) 	 * resp. GFP_NOIO which has to be inherited for all allocation requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) 	 * from a particular context which has been marked by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) 	 * memalloc_no{fs,io}_{save,restore}.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) 	alloc_mask = current_gfp_context(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) 	ac.spread_dirty_pages = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) 	 * Restore the original nodemask if it was potentially replaced with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) 	 * &cpuset_current_mems_allowed to optimize the fast-path attempt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) 	ac.nodemask = nodemask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) 	page = __alloc_pages_slowpath(alloc_mask, order, &ac);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) 	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) 	    unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) 		__free_pages(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) 		page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) 	trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) EXPORT_SYMBOL(__alloc_pages_nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158)  * Common helper functions. Never use with __GFP_HIGHMEM because the returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159)  * address cannot represent highmem pages. Use alloc_pages and then kmap if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160)  * you need to access high mem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) 	page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) 	return (unsigned long) page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) EXPORT_SYMBOL(__get_free_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) unsigned long get_zeroed_page(gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) 	return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) EXPORT_SYMBOL(get_zeroed_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) static inline void free_the_page(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) 	if (order == 0)		/* Via pcp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) 		free_unref_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) 		__free_pages_ok(page, order, FPI_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) void __free_pages(struct page *page, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) 	trace_android_vh_free_pages(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) 	if (put_page_testzero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) 		free_the_page(page, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) 	else if (!PageHead(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) 		while (order-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) 			free_the_page(page + (1 << order), order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) EXPORT_SYMBOL(__free_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) void free_pages(unsigned long addr, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) 	if (addr != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) 		VM_BUG_ON(!virt_addr_valid((void *)addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) 		__free_pages(virt_to_page((void *)addr), order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) EXPORT_SYMBOL(free_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209)  * Page Fragment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210)  *  An arbitrary-length arbitrary-offset area of memory which resides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211)  *  within a 0 or higher order page.  Multiple fragments within that page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212)  *  are individually refcounted, in the page's reference counter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214)  * The page_frag functions below provide a simple allocation framework for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215)  * page fragments.  This is used by the network stack and network device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216)  * drivers to provide a backing region of memory for use as either an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217)  * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) 					     gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) 	gfp_t gfp = gfp_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) 	gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) 		    __GFP_NOMEMALLOC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) 	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) 				PAGE_FRAG_CACHE_MAX_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) 	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) 	if (unlikely(!page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) 	nc->va = page ? page_address(page) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) void __page_frag_cache_drain(struct page *page, unsigned int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) 	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) 	if (page_ref_sub_and_test(page, count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) 		free_the_page(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) EXPORT_SYMBOL(__page_frag_cache_drain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) void *page_frag_alloc(struct page_frag_cache *nc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) 		      unsigned int fragsz, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) 	unsigned int size = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) 	int offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) 	if (unlikely(!nc->va)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) refill:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) 		page = __page_frag_cache_refill(nc, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) 		/* if size can vary use size else just use PAGE_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) 		size = nc->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) 		/* Even if we own the page, we do not use atomic_set().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) 		 * This would break get_page_unless_zero() users.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) 		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) 		/* reset page count bias and offset to start of new frag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) 		nc->pfmemalloc = page_is_pfmemalloc(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) 		nc->offset = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) 	offset = nc->offset - fragsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) 	if (unlikely(offset < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) 		page = virt_to_page(nc->va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) 			goto refill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) 		if (unlikely(nc->pfmemalloc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) 			free_the_page(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) 			goto refill;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) 		/* if size can vary use size else just use PAGE_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) 		size = nc->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) 		/* OK, page count is 0, we can safely set it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) 		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) 		/* reset page count bias and offset to start of new frag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) 		offset = size - fragsz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) 	nc->pagecnt_bias--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) 	nc->offset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) 	return nc->va + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) EXPORT_SYMBOL(page_frag_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309)  * Frees a page fragment allocated out of either a compound or order 0 page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) void page_frag_free(void *addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) 	struct page *page = virt_to_head_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) 	if (unlikely(put_page_testzero(page)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) 		free_the_page(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) EXPORT_SYMBOL(page_frag_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) static void *make_alloc_exact(unsigned long addr, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) 		size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) 	if (addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) 		unsigned long alloc_end = addr + (PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) 		unsigned long used = addr + PAGE_ALIGN(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) 		split_page(virt_to_page((void *)addr), order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) 		while (used < alloc_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) 			free_page(used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) 			used += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) 	return (void *)addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337)  * alloc_pages_exact - allocate an exact number physically-contiguous pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338)  * @size: the number of bytes to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339)  * @gfp_mask: GFP flags for the allocation, must not contain __GFP_COMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341)  * This function is similar to alloc_pages(), except that it allocates the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342)  * minimum number of pages to satisfy the request.  alloc_pages() can only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343)  * allocate memory in power-of-two pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345)  * This function is also limited by MAX_ORDER.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347)  * Memory allocated by this function must be released by free_pages_exact().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349)  * Return: pointer to the allocated area or %NULL in case of error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) 	unsigned int order = get_order(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) 	if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) 		gfp_mask &= ~__GFP_COMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) 	addr = __get_free_pages(gfp_mask, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) 	return make_alloc_exact(addr, order, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) EXPORT_SYMBOL(alloc_pages_exact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365)  * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366)  *			   pages on a node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367)  * @nid: the preferred node ID where memory should be allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368)  * @size: the number of bytes to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369)  * @gfp_mask: GFP flags for the allocation, must not contain __GFP_COMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371)  * Like alloc_pages_exact(), but try to allocate on node nid first before falling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372)  * back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374)  * Return: pointer to the allocated area or %NULL in case of error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) 	unsigned int order = get_order(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) 	struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) 	if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) 		gfp_mask &= ~__GFP_COMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) 	p = alloc_pages_node(nid, gfp_mask, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) 	if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) 	return make_alloc_exact((unsigned long)page_address(p), order, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391)  * free_pages_exact - release memory allocated via alloc_pages_exact()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392)  * @virt: the value returned by alloc_pages_exact.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393)  * @size: size of allocation, same value as passed to alloc_pages_exact().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395)  * Release the memory allocated by a previous call to alloc_pages_exact.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) void free_pages_exact(void *virt, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) 	unsigned long addr = (unsigned long)virt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) 	unsigned long end = addr + PAGE_ALIGN(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) 	while (addr < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) 		free_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) 		addr += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) EXPORT_SYMBOL(free_pages_exact);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410)  * nr_free_zone_pages - count number of pages beyond high watermark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411)  * @offset: The zone index of the highest zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413)  * nr_free_zone_pages() counts the number of pages which are beyond the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414)  * high watermark within all zones at or below a given zone index.  For each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415)  * zone, the number of pages is calculated as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417)  *     nr_free_zone_pages = managed_pages - high_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419)  * Return: number of pages beyond high watermark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) static unsigned long nr_free_zone_pages(int offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) 	/* Just pick one node, since fallback list is circular */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) 	unsigned long sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) 	struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) 	for_each_zone_zonelist(zone, z, zonelist, offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) 		unsigned long size = zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) 		unsigned long high = high_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) 		if (size > high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) 			sum += size - high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) 	return sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442)  * nr_free_buffer_pages - count number of pages beyond high watermark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444)  * nr_free_buffer_pages() counts the number of pages which are beyond the high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445)  * watermark within ZONE_DMA and ZONE_NORMAL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447)  * Return: number of pages beyond high watermark within ZONE_DMA and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448)  * ZONE_NORMAL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) unsigned long nr_free_buffer_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) 	return nr_free_zone_pages(gfp_zone(GFP_USER));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) static inline void show_node(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) 	if (IS_ENABLED(CONFIG_NUMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) 		printk("Node %d ", zone_to_nid(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) long si_mem_available(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) 	long available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) 	unsigned long pagecache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) 	unsigned long wmark_low = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) 	unsigned long pages[NR_LRU_LISTS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) 	unsigned long reclaimable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) 	int lru;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) 		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) 	for_each_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) 		wmark_low += low_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) 	 * Estimate the amount of memory available for userspace allocations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) 	 * without causing swapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) 	available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) 	 * Not all the page cache can be freed, otherwise the system will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) 	 * start swapping. Assume at least half of the page cache, or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) 	 * low watermark worth of cache, needs to stay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) 	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) 	pagecache -= min(pagecache / 2, wmark_low);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) 	available += pagecache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) 	 * Part of the reclaimable slab and other kernel memory consists of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) 	 * items that are in use, and cannot be freed. Cap this estimate at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) 	 * low watermark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) 	reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) 		global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) 	available += reclaimable - min(reclaimable / 2, wmark_low);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) 	if (available < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) 		available = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) 	return available;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) EXPORT_SYMBOL_GPL(si_mem_available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) void si_meminfo(struct sysinfo *val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) 	val->totalram = totalram_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) 	val->sharedram = global_node_page_state(NR_SHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) 	val->freeram = global_zone_page_state(NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) 	val->bufferram = nr_blockdev_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) 	val->totalhigh = totalhigh_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) 	val->freehigh = nr_free_highpages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) 	val->mem_unit = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) EXPORT_SYMBOL(si_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) void si_meminfo_node(struct sysinfo *val, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) 	int zone_type;		/* needs to be signed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) 	unsigned long managed_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) 	unsigned long managed_highpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) 	unsigned long free_highpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) 	pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) 		managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) 	val->totalram = managed_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) 	val->sharedram = node_page_state(pgdat, NR_SHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) 	val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) 		struct zone *zone = &pgdat->node_zones[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) 		if (is_highmem(zone)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) 			managed_highpages += zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) 			free_highpages += zone_page_state(zone, NR_FREE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) 	val->totalhigh = managed_highpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) 	val->freehigh = free_highpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) 	val->totalhigh = managed_highpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) 	val->freehigh = free_highpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) 	val->mem_unit = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555)  * Determine whether the node should be displayed or not, depending on whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556)  * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) 	if (!(flags & SHOW_MEM_FILTER_NODES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) 	 * no node mask - aka implicit memory numa policy. Do not bother with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) 	 * the synchronization - read_mems_allowed_begin - because we do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) 	 * have to be precise here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) 	if (!nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) 		nodemask = &cpuset_current_mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) 	return !node_isset(nid, *nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) #define K(x) ((x) << (PAGE_SHIFT-10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) static void show_migration_types(unsigned char type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) 	static const char types[MIGRATE_TYPES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) 		[MIGRATE_UNMOVABLE]	= 'U',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) 		[MIGRATE_MOVABLE]	= 'M',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) 		[MIGRATE_RECLAIMABLE]	= 'E',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) 		[MIGRATE_HIGHATOMIC]	= 'H',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) 		[MIGRATE_CMA]		= 'C',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) #ifdef CONFIG_MEMORY_ISOLATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) 		[MIGRATE_ISOLATE]	= 'I',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) 	char tmp[MIGRATE_TYPES + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) 	char *p = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) 	for (i = 0; i < MIGRATE_TYPES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) 		if (type & (1 << i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) 			*p++ = types[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) 	*p = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) 	printk(KERN_CONT "(%s) ", tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604)  * Show free area list (used inside shift_scroll-lock stuff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605)  * We also calculate the percentage fragmentation. We do this by counting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606)  * memory on each free list with the exception of the first item on the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608)  * Bits in @filter:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609)  * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610)  *   cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) void show_free_areas(unsigned int filter, nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) 	unsigned long free_pcp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) 	pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) 	for_each_populated_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) 		for_each_online_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) 			free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) 	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) 		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) 		" unevictable:%lu dirty:%lu writeback:%lu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) 		" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) 		" free:%lu free_pcp:%lu free_cma:%lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) 		global_node_page_state(NR_ACTIVE_ANON),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) 		global_node_page_state(NR_INACTIVE_ANON),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) 		global_node_page_state(NR_ISOLATED_ANON),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) 		global_node_page_state(NR_ACTIVE_FILE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) 		global_node_page_state(NR_INACTIVE_FILE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) 		global_node_page_state(NR_ISOLATED_FILE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) 		global_node_page_state(NR_UNEVICTABLE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) 		global_node_page_state(NR_FILE_DIRTY),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) 		global_node_page_state(NR_WRITEBACK),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) 		global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) 		global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) 		global_node_page_state(NR_FILE_MAPPED),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) 		global_node_page_state(NR_SHMEM),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) 		global_zone_page_state(NR_PAGETABLE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) 		global_zone_page_state(NR_BOUNCE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) 		global_zone_page_state(NR_FREE_PAGES),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) 		free_pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) 		global_zone_page_state(NR_FREE_CMA_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) 	for_each_online_pgdat(pgdat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) 		if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) 		printk("Node %d"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) 			" active_anon:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) 			" inactive_anon:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) 			" active_file:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) 			" inactive_file:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) 			" unevictable:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) 			" isolated(anon):%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) 			" isolated(file):%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) 			" mapped:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) 			" dirty:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) 			" writeback:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) 			" shmem:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) 			" shmem_thp: %lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) 			" shmem_pmdmapped: %lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) 			" anon_thp: %lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) 			" writeback_tmp:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) 			" kernel_stack:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) #ifdef CONFIG_SHADOW_CALL_STACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) 			" shadow_call_stack:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) 			" all_unreclaimable? %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) 			"\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) 			pgdat->node_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) 			K(node_page_state(pgdat, NR_ACTIVE_ANON)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) 			K(node_page_state(pgdat, NR_INACTIVE_ANON)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) 			K(node_page_state(pgdat, NR_ACTIVE_FILE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) 			K(node_page_state(pgdat, NR_INACTIVE_FILE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) 			K(node_page_state(pgdat, NR_UNEVICTABLE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) 			K(node_page_state(pgdat, NR_ISOLATED_ANON)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) 			K(node_page_state(pgdat, NR_ISOLATED_FILE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) 			K(node_page_state(pgdat, NR_FILE_MAPPED)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) 			K(node_page_state(pgdat, NR_FILE_DIRTY)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) 			K(node_page_state(pgdat, NR_WRITEBACK)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) 			K(node_page_state(pgdat, NR_SHMEM)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) 			K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) 			K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) 					* HPAGE_PMD_NR),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) 			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) 			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) 			node_page_state(pgdat, NR_KERNEL_STACK_KB),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) #ifdef CONFIG_SHADOW_CALL_STACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) 			node_page_state(pgdat, NR_KERNEL_SCS_KB),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) 			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) 				"yes" : "no");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) 	for_each_populated_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) 		free_pcp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) 		for_each_online_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) 			free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) 		show_node(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) 		printk(KERN_CONT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) 			"%s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) 			" free:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) 			" min:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) 			" low:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) 			" high:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) 			" reserved_highatomic:%luKB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) 			" active_anon:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) 			" inactive_anon:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) 			" active_file:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) 			" inactive_file:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) 			" unevictable:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) 			" writepending:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) 			" present:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) 			" managed:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) 			" mlocked:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) 			" pagetables:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) 			" bounce:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) 			" free_pcp:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) 			" local_pcp:%ukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) 			" free_cma:%lukB"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) 			"\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) 			zone->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) 			K(zone_page_state(zone, NR_FREE_PAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) 			K(min_wmark_pages(zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) 			K(low_wmark_pages(zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) 			K(high_wmark_pages(zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) 			K(zone->nr_reserved_highatomic),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) 			K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) 			K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) 			K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) 			K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) 			K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) 			K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) 			K(zone->present_pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) 			K(zone_managed_pages(zone)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) 			K(zone_page_state(zone, NR_MLOCK)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) 			K(zone_page_state(zone, NR_PAGETABLE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) 			K(zone_page_state(zone, NR_BOUNCE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) 			K(free_pcp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) 			K(this_cpu_read(zone->pageset->pcp.count)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) 			K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) 		printk("lowmem_reserve[]:");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) 		for (i = 0; i < MAX_NR_ZONES; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) 			printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) 		printk(KERN_CONT "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) 	for_each_populated_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) 		unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) 		unsigned long nr[MAX_ORDER], flags, total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) 		unsigned char types[MAX_ORDER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) 		show_node(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) 		printk(KERN_CONT "%s: ", zone->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) 		spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) 		for (order = 0; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) 			struct free_area *area = &zone->free_area[order];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) 			int type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) 			nr[order] = area->nr_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) 			total += nr[order] << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) 			types[order] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) 			for (type = 0; type < MIGRATE_TYPES; type++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) 				if (!free_area_empty(area, type))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) 					types[order] |= 1 << type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) 		spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) 		for (order = 0; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) 			printk(KERN_CONT "%lu*%lukB ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) 			       nr[order], K(1UL) << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) 			if (nr[order])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) 				show_migration_types(types[order]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) 		printk(KERN_CONT "= %lukB\n", K(total));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) 	hugetlb_show_meminfo();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) 	printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) 	show_swap_cache_info();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) 	zoneref->zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) 	zoneref->zone_idx = zone_idx(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814)  * Builds allocation fallback zone lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816)  * Add all populated zones of a node to the zonelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) 	enum zone_type zone_type = MAX_NR_ZONES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) 	int nr_zones = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) 		zone_type--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) 		zone = pgdat->node_zones + zone_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) 		if (managed_zone(zone)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) 			zoneref_set_zone(zone, &zonerefs[nr_zones++]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) 			check_highest_zone(zone_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) 	} while (zone_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) 	return nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) static int __parse_numa_zonelist_order(char *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) 	 * We used to support different zonlists modes but they turned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) 	 * out to be just not useful. Let's keep the warning in place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) 	 * if somebody still use the cmd line parameter so that we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) 	 * not fail it silently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) 	if (!(*s == 'd' || *s == 'D' || *s == 'n' || *s == 'N')) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) 		pr_warn("Ignoring unsupported numa_zonelist_order value:  %s\n", s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) char numa_zonelist_order[] = "Node";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856)  * sysctl handler for numa_zonelist_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) int numa_zonelist_order_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) 	if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) 		return __parse_numa_zonelist_order(buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) 	return proc_dostring(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) #define MAX_NODE_LOAD (nr_online_nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) static int node_load[MAX_NUMNODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871)  * find_next_best_node - find the next node that should appear in a given node's fallback list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872)  * @node: node whose fallback list we're appending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873)  * @used_node_mask: nodemask_t of already used nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875)  * We use a number of factors to determine which is the next node that should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876)  * appear on a given node's fallback list.  The node should not have appeared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877)  * already in @node's fallback list, and it should be the next closest node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878)  * according to the distance array (which contains arbitrary distance values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879)  * from each node to each node in the system), and should also prefer nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880)  * with no CPUs, since presumably they'll have very little allocation pressure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881)  * on them otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883)  * Return: node id of the found node or %NUMA_NO_NODE if no node is found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) static int find_next_best_node(int node, nodemask_t *used_node_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) 	int n, val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) 	int min_val = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) 	int best_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) 	/* Use the local node if we haven't already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) 	if (!node_isset(node, *used_node_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) 		node_set(node, *used_node_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) 		return node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) 	for_each_node_state(n, N_MEMORY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) 		/* Don't want a node to appear more than once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) 		if (node_isset(n, *used_node_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) 		/* Use the distance array to find the distance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) 		val = node_distance(node, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) 		/* Penalize nodes under us ("prefer the next node") */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) 		val += (n < node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) 		/* Give preference to headless and unused nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) 		if (!cpumask_empty(cpumask_of_node(n)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) 			val += PENALTY_FOR_NODE_WITH_CPUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) 		/* Slight preference for less loaded node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) 		val *= (MAX_NODE_LOAD*MAX_NUMNODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) 		val += node_load[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) 		if (val < min_val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) 			min_val = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) 			best_node = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) 	if (best_node >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) 		node_set(best_node, *used_node_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) 	return best_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931)  * Build zonelists ordered by node and zones within node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932)  * This results in maximum locality--normal zone overflows into local
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933)  * DMA zone, if any--but risks exhausting DMA zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) 		unsigned nr_nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) 	struct zoneref *zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) 	zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) 	for (i = 0; i < nr_nodes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) 		int nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) 		pg_data_t *node = NODE_DATA(node_order[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) 		nr_zones = build_zonerefs_node(node, zonerefs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) 		zonerefs += nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) 	zonerefs->zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) 	zonerefs->zone_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956)  * Build gfp_thisnode zonelists
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) static void build_thisnode_zonelists(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) 	struct zoneref *zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) 	int nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) 	zonerefs = pgdat->node_zonelists[ZONELIST_NOFALLBACK]._zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) 	nr_zones = build_zonerefs_node(pgdat, zonerefs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) 	zonerefs += nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) 	zonerefs->zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) 	zonerefs->zone_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971)  * Build zonelists ordered by zone and nodes within zones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972)  * This results in conserving DMA zone[s] until all Normal memory is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973)  * exhausted, but results in overflowing to remote node while memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974)  * may still exist in local DMA zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) static void build_zonelists(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) 	static int node_order[MAX_NUMNODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) 	int node, load, nr_nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) 	nodemask_t used_mask = NODE_MASK_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) 	int local_node, prev_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) 	/* NUMA-aware ordering of nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) 	local_node = pgdat->node_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) 	load = nr_online_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) 	prev_node = local_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) 	memset(node_order, 0, sizeof(node_order));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) 	while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) 		 * We don't want to pressure a particular node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) 		 * So adding penalty to the first node in same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) 		 * distance group to make it round-robin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) 		if (node_distance(local_node, node) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) 		    node_distance(local_node, prev_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) 			node_load[node] = load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) 		node_order[nr_nodes++] = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) 		prev_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) 		load--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) 	build_zonelists_in_node_order(pgdat, node_order, nr_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) 	build_thisnode_zonelists(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) #ifdef CONFIG_HAVE_MEMORYLESS_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011)  * Return node id of node used for "local" allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012)  * I.e., first node id of first zone in arg node's generic zonelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013)  * Used for initializing percpu 'numa_mem', which is used primarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014)  * for kernel allocations, so use GFP_KERNEL flags to locate zonelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016) int local_memory_node(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) 	z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) 				   gfp_zone(GFP_KERNEL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) 				   NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) 	return zone_to_nid(z->zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) static void setup_min_unmapped_ratio(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) static void setup_min_slab_ratio(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) #else	/* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) static void build_zonelists(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) 	int node, local_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) 	struct zoneref *zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) 	int nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) 	local_node = pgdat->node_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) 	zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) 	nr_zones = build_zonerefs_node(pgdat, zonerefs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) 	zonerefs += nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044) 	 * Now we build the zonelist so that it contains the zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) 	 * of all the other nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) 	 * We don't want to pressure a particular node, so when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) 	 * building the zones for node N, we make sure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) 	 * zones coming right after the local ones are those from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) 	 * node N+1 (modulo N)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) 	for (node = local_node + 1; node < MAX_NUMNODES; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) 		if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) 		nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) 		zonerefs += nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) 	for (node = 0; node < local_node; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) 		if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) 		nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) 		zonerefs += nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) 	zonerefs->zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) 	zonerefs->zone_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) #endif	/* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071)  * Boot pageset table. One per cpu which is going to be used for all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072)  * zones and all nodes. The parameters will be set in such a way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073)  * that an item put on a list will immediately be handed over to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074)  * the buddy list. This is safe since pageset manipulation is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075)  * with interrupts disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077)  * The boot_pagesets must be kept even after bootup is complete for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078)  * unused processors and/or zones. They do play a role for bootstrapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079)  * hotplugged processors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081)  * zoneinfo_show() and maybe other functions do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082)  * not check if the processor is online before following the pageset pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083)  * Other parts of the kernel may not check if the zone is available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) static void __build_all_zonelists(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) 	int __maybe_unused cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) 	pg_data_t *self = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) 	static DEFINE_SPINLOCK(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) 	spin_lock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) 	memset(node_load, 0, sizeof(node_load));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) 	 * This node is hotadded and no memory is yet present.   So just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) 	 * building zonelists is fine - no need to touch other nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) 	if (self && !node_online(self->node_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) 		build_zonelists(self);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) 		for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) 			pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) 			build_zonelists(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) #ifdef CONFIG_HAVE_MEMORYLESS_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) 		 * We now know the "local memory node" for each node--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) 		 * i.e., the node of the first zone in the generic zonelist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) 		 * Set up numa_mem percpu variable for on-line cpus.  During
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) 		 * boot, only the boot cpu should be on-line;  we'll init the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) 		 * secondary cpus' numa_mem as they come on-line.  During
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) 		 * node/memory hotplug, we'll fixup all on-line cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) 		for_each_online_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) 			set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) 	spin_unlock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) static noinline void __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) build_all_zonelists_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) 	__build_all_zonelists(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) 	 * Initialize the boot_pagesets that are going to be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) 	 * for bootstrapping processors. The real pagesets for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) 	 * each zone will be allocated later when the per cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) 	 * allocator is available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) 	 * boot_pagesets are used also for bootstrapping offline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) 	 * cpus if the system is already booted because the pagesets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) 	 * are needed to initialize allocators on a specific cpu too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) 	 * F.e. the percpu allocator needs the page allocator which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) 	 * needs the percpu allocator in order to allocate its pagesets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) 	 * (a chicken-egg dilemma).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) 	for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) 		setup_pageset(&per_cpu(boot_pageset, cpu), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) 	mminit_verify_zonelist();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) 	cpuset_init_current_mems_allowed();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160)  * unless system_state == SYSTEM_BOOTING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162)  * __ref due to call of __init annotated helper build_all_zonelists_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163)  * [protected by SYSTEM_BOOTING].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) void __ref build_all_zonelists(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) 	unsigned long vm_total_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) 	if (system_state == SYSTEM_BOOTING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) 		build_all_zonelists_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) 		__build_all_zonelists(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) 		/* cpuset refresh routine should be here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) 	/* Get the number of free pages beyond high watermark in all zones. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) 	vm_total_pages = nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) 	 * Disable grouping by mobility if the number of pages in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) 	 * system is too low to allow the mechanism to work. It would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) 	 * more accurate, but expensive to check per-zone. This check is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) 	 * made on memory-hotadd so a system can start with mobility
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) 	 * disabled and enable it later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) 	if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) 		page_group_by_mobility_disabled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) 		page_group_by_mobility_disabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) 	pr_info("Built %u zonelists, mobility grouping %s.  Total pages: %ld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) 		nr_online_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) 		page_group_by_mobility_disabled ? "off" : "on",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) 		vm_total_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) 	pr_info("Policy zone: %s\n", zone_names[policy_zone]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) /* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) static bool __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) overlap_memmap_init(unsigned long zone, unsigned long *pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) 	static struct memblock_region *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) 	if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205) 		if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) 			for_each_mem_region(r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) 				if (*pfn < memblock_region_memory_end_pfn(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) 		if (*pfn >= memblock_region_memory_base_pfn(r) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) 		    memblock_is_mirror(r)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) 			*pfn = memblock_region_memory_end_pfn(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221)  * Initially all pages are reserved - free ones are freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222)  * up by memblock_free_all() once the early boot process is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223)  * done. Non-atomic initialization, single-pass.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225)  * All aligned pageblocks are initialized to the specified migratetype
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226)  * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227)  * zone stats (e.g., nr_isolate_pageblock) are touched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) 		unsigned long start_pfn, unsigned long zone_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) 		enum meminit_context context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) 		struct vmem_altmap *altmap, int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) 	unsigned long pfn, end_pfn = start_pfn + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) 	if (highest_memmap_pfn < end_pfn - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) 		highest_memmap_pfn = end_pfn - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) #ifdef CONFIG_ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) 	 * Honor reservation requested by the driver for this ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) 	 * memory. We limit the total number of pages to initialize to just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) 	 * those that might contain the memory mapping. We will defer the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) 	 * ZONE_DEVICE page initialization until after we have released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) 	 * the hotplug lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) 	if (zone == ZONE_DEVICE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) 		if (!altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) 		if (start_pfn == altmap->base_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) 			start_pfn += altmap->reserve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) 		end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) #ifdef CONFIG_ROCKCHIP_THUNDER_BOOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) 	/* Zero all page struct in advance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) 	memset(pfn_to_page(start_pfn), 0, sizeof(struct page) * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) 	for (pfn = start_pfn; pfn < end_pfn; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) 		 * There can be holes in boot-time mem_map[]s handed to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) 		 * function.  They do not exist on hotplugged memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) 		if (context == MEMINIT_EARLY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) 			if (overlap_memmap_init(zone, &pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) 			if (defer_init(nid, pfn, zone_end_pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) 		page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) 		__init_single_page(page, pfn, zone, nid, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) 		if (context == MEMINIT_HOTPLUG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) 			__SetPageReserved(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) 		 * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) 		 * such that unmovable allocations won't be scattered all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) 		 * over the place during system boot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) 		if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) 			set_pageblock_migratetype(page, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) 		pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) #ifdef CONFIG_ZONE_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) void __ref memmap_init_zone_device(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) 				   unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) 				   unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) 				   struct dev_pagemap *pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) 	unsigned long pfn, end_pfn = start_pfn + nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) 	struct pglist_data *pgdat = zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) 	struct vmem_altmap *altmap = pgmap_altmap(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) 	unsigned long zone_idx = zone_idx(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) 	unsigned long start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) 	int nid = pgdat->node_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) 	if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) 	 * The call to memmap_init should have already taken care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311) 	 * of the pages reserved for the memmap, so we can just jump to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) 	 * the end of that region and start processing the device pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) 	if (altmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) 		start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) 		nr_pages = end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) 		struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) 		__init_single_page(page, pfn, zone_idx, nid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) 		 * Mark page reserved as it will need to wait for onlining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) 		 * phase for it to be fully associated with a zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328) 		 * We can use the non-atomic __set_bit operation for setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) 		 * the flag as we are still initializing the pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) 		__SetPageReserved(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) 		 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) 		 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) 		 * ever freed or placed on a driver-private list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) 		page->pgmap = pgmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) 		page->zone_device_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) 		 * Mark the block movable so that blocks are reserved for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) 		 * movable at startup. This will force kernel allocations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) 		 * to reserve their blocks rather than leaking throughout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) 		 * the address space during boot when many long-lived
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) 		 * kernel allocations are made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) 		 * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) 		 * because this is done early in section_activate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) 		if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) 	pr_info("%s initialised %lu pages in %ums\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) 		nr_pages, jiffies_to_msecs(jiffies - start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) static void __meminit zone_init_free_lists(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) 	unsigned int order, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) 	for_each_migratetype_order(order, t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) 		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) 		zone->free_area[order].nr_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) #if !defined(CONFIG_FLAT_NODE_MEM_MAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373)  * Only struct pages that correspond to ranges defined by memblock.memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374)  * are zeroed and initialized by going through __init_single_page() during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375)  * memmap_init_zone_range().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377)  * But, there could be struct pages that correspond to holes in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378)  * memblock.memory. This can happen because of the following reasons:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379)  * - physical memory bank size is not necessarily the exact multiple of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380)  *   arbitrary section size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381)  * - early reserved memory may not be listed in memblock.memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382)  * - memory layouts defined with memmap= kernel parameter may not align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383)  *   nicely with memmap sections
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385)  * Explicitly initialize those struct pages so that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386)  * - PG_Reserved is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387)  * - zone and node links point to zone and node that span the page if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388)  *   hole is in the middle of a zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389)  * - zone and node links point to adjacent zone/node if the hole falls on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390)  *   the zone boundary; the pages in such holes will be prepended to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391)  *   zone/node above the hole except for the trailing pages in the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392)  *   section that will be appended to the zone/node below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) static void __init init_unavailable_range(unsigned long spfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) 					  unsigned long epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) 					  int zone, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) 	unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) 	u64 pgcnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) 	for (pfn = spfn; pfn < epfn; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) 		if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) 			pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) 				+ pageblock_nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) 		__init_single_page(pfn_to_page(pfn), pfn, zone, node, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) 		__SetPageReserved(pfn_to_page(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) 		pgcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) 	if (pgcnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) 		pr_info("On node %d, zone %s: %lld pages in unavailable ranges",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) 			node, zone_names[zone], pgcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) static inline void init_unavailable_range(unsigned long spfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) 					  unsigned long epfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) 					  int zone, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) static void __init memmap_init_zone_range(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) 					  unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) 					  unsigned long end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) 					  unsigned long *hole_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) 	unsigned long zone_start_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) 	unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) 	int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) 	start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) 	end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) 	if (start_pfn >= end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) 	memmap_init_zone(end_pfn - start_pfn, nid, zone_id, start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) 			  zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) 	if (*hole_pfn < start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) 		init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) 	*hole_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) void __init __weak memmap_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) 	unsigned long hole_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) 	int i, j, zone_id, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) 		struct pglist_data *node = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) 		for (j = 0; j < MAX_NR_ZONES; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) 			struct zone *zone = node->node_zones + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) 			if (!populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) 			memmap_init_zone_range(zone, start_pfn, end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) 					       &hole_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) 			zone_id = j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) #ifdef CONFIG_SPARSEMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) 	 * Initialize the memory map for hole in the range [memory_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) 	 * section_end].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) 	 * Append the pages in this hole to the highest zone in the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) 	 * node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) 	 * The call to init_unavailable_range() is outside the ifdef to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) 	 * silence the compiler warining about zone_id set but not used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) 	 * for FLATMEM it is a nop anyway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) 	end_pfn = round_up(end_pfn, PAGES_PER_SECTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) 	if (hole_pfn < end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) 		init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) /* A stub for backwards compatibility with custom implementatin on IA-64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) void __meminit __weak arch_memmap_init(unsigned long size, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) 				       unsigned long zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) 				       unsigned long range_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) static int zone_batchsize(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) 	int batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) 	 * The per-cpu-pages pools are set to around 1000th of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) 	 * size of the zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) 	batch = zone_managed_pages(zone) / 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) 	/* But no more than a meg. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) 	if (batch * PAGE_SIZE > 1024 * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) 		batch = (1024 * 1024) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) 	batch /= 4;		/* We effectively *= 4 below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) 	if (batch < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) 		batch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) 	 * Clamp the batch to a 2^n - 1 value. Having a power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) 	 * of 2 value was found to be more likely to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) 	 * suboptimal cache aliasing properties in some cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) 	 * For example if 2 tasks are alternately allocating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) 	 * batches of pages, one task can end up with a lot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) 	 * of pages of one half of the possible page colors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) 	 * and the other with pages of the other colors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) 	batch = rounddown_pow_of_two(batch + batch/2) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) 	return batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) 	/* The deferral and batching of frees should be suppressed under NOMMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) 	 * conditions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) 	 * The problem is that NOMMU needs to be able to allocate large chunks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) 	 * of contiguous memory as there's no hardware page translation to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) 	 * assemble apparent contiguous memory from discontiguous pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) 	 * Queueing large contiguous runs of pages for batching, however,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) 	 * causes the pages to actually be freed in smaller chunks.  As there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) 	 * can be a significant delay between the individual batches being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) 	 * recycled, this leads to the once large chunks of space being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) 	 * fragmented and becoming unavailable for high-order allocations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542)  * pcp->high and pcp->batch values are related and dependent on one another:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543)  * ->batch must never be higher then ->high.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544)  * The following function updates them in a safe manner without read side
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545)  * locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547)  * Any new users of pcp->batch and pcp->high should ensure they can cope with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548)  * those fields changing asynchronously (acording to the above rule).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550)  * mutex_is_locked(&pcp_batch_high_lock) required when calling this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551)  * outside of boot time (or some other assurance that no concurrent updaters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552)  * exist).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554) static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) 		unsigned long batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557)        /* start with a fail safe value for batch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) 	pcp->batch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561)        /* Update high, then batch, in order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) 	pcp->high = high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) 	pcp->batch = batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) /* a companion to pageset_set_high() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) 	pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) static void pageset_init(struct per_cpu_pageset *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) 	struct per_cpu_pages *pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) 	int migratetype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) 	memset(p, 0, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) 	pcp = &p->pcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582) 	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) 		INIT_LIST_HEAD(&pcp->lists[migratetype]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) 	pageset_init(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) 	pageset_set_batch(p, batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593)  * pageset_set_high() sets the high water mark for hot per_cpu_pagelist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594)  * to the value high for the pageset p.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) static void pageset_set_high(struct per_cpu_pageset *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) 				unsigned long high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) 	unsigned long batch = max(1UL, high / 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) 	if ((high / 4) > (PAGE_SHIFT * 8))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) 		batch = PAGE_SHIFT * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) 	pageset_update(&p->pcp, high, batch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) static void pageset_set_high_and_batch(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) 				       struct per_cpu_pageset *pcp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) 	if (percpu_pagelist_fraction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) 		pageset_set_high(pcp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) 			(zone_managed_pages(zone) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) 				percpu_pagelist_fraction));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) 		pageset_set_batch(pcp, zone_batchsize(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) static void __meminit zone_pageset_init(struct zone *zone, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) 	struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) 	pageset_init(pcp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) 	pageset_set_high_and_batch(zone, pcp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) void __meminit setup_zone_pageset(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) 	zone->pageset = alloc_percpu(struct per_cpu_pageset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) 	for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) 		zone_pageset_init(zone, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634)  * Allocate per cpu pagesets and initialize them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635)  * Before this call only boot pagesets were available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) void __init setup_per_cpu_pageset(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) 	struct pglist_data *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) 	int __maybe_unused cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) 	for_each_populated_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) 		setup_zone_pageset(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) 	 * Unpopulated zones continue using the boot pagesets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) 	 * The numa stats for these pagesets need to be reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) 	 * Otherwise, they will end up skewing the stats of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) 	 * the nodes these zones are associated with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) 	for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) 		struct per_cpu_pageset *pcp = &per_cpu(boot_pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) 		memset(pcp->vm_numa_stat_diff, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) 		       sizeof(pcp->vm_numa_stat_diff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) 	for_each_online_pgdat(pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) 		pgdat->per_cpu_nodestats =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) 			alloc_percpu(struct per_cpu_nodestat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) static __meminit void zone_pcp_init(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) 	 * per cpu subsystem is not up at this point. The following code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669) 	 * relies on the ability of the linker to provide the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) 	 * offset of a (static) per cpu variable into the per cpu area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) 	zone->pageset = &boot_pageset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) 	if (populated_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) 		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) 			zone->name, zone->present_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) 					 zone_batchsize(zone));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) void __meminit init_currently_empty_zone(struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) 					unsigned long zone_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) 					unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) 	struct pglist_data *pgdat = zone->zone_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) 	int zone_idx = zone_idx(zone) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) 	if (zone_idx > pgdat->nr_zones)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) 		pgdat->nr_zones = zone_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) 	zone->zone_start_pfn = zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692) 	mminit_dprintk(MMINIT_TRACE, "memmap_init",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) 			"Initialising map node %d zone %lu pfns %lu -> %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) 			pgdat->node_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) 			(unsigned long)zone_idx(zone),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) 			zone_start_pfn, (zone_start_pfn + size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) 	zone_init_free_lists(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) 	zone->initialized = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703)  * get_pfn_range_for_nid - Return the start and end page frames for a node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704)  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705)  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706)  * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708)  * It returns the start and end page frame of a node based on information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709)  * provided by memblock_set_node(). If called for a node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710)  * with no available memory, a warning is printed and the start and end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711)  * PFNs will be 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) void __init get_pfn_range_for_nid(unsigned int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) 			unsigned long *start_pfn, unsigned long *end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) 	unsigned long this_start_pfn, this_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) 	*start_pfn = -1UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) 	*end_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) 	for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) 		*start_pfn = min(*start_pfn, this_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) 		*end_pfn = max(*end_pfn, this_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) 	if (*start_pfn == -1UL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728) 		*start_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732)  * This finds a zone that can be used for ZONE_MOVABLE pages. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733)  * assumption is made that zones within a node are ordered in monotonic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734)  * increasing memory addresses so that the "highest" populated zone is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) static void __init find_usable_zone_for_movable(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) 	int zone_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) 	for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) 		if (zone_index == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) 		if (arch_zone_highest_possible_pfn[zone_index] >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) 				arch_zone_lowest_possible_pfn[zone_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) 	VM_BUG_ON(zone_index == -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) 	movable_zone = zone_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753)  * The zone ranges provided by the architecture do not include ZONE_MOVABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754)  * because it is sized independent of architecture. Unlike the other zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755)  * the starting point for ZONE_MOVABLE is not fixed. It may be different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756)  * in each node depending on the size of each node and how evenly kernelcore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757)  * is distributed. This helper function adjusts the zone ranges
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758)  * provided by the architecture for a given node by using the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759)  * highest usable zone for ZONE_MOVABLE. This preserves the assumption that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760)  * zones within a node are in order of monotonic increases memory addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762) static void __init adjust_zone_range_for_zone_movable(int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) 					unsigned long zone_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) 					unsigned long node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) 					unsigned long node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) 					unsigned long *zone_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) 					unsigned long *zone_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) 	/* Only adjust if ZONE_MOVABLE is on this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) 	if (zone_movable_pfn[nid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771) 		/* Size ZONE_MOVABLE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) 		if (zone_type == ZONE_MOVABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) 			*zone_start_pfn = zone_movable_pfn[nid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) 			*zone_end_pfn = min(node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) 				arch_zone_highest_possible_pfn[movable_zone]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) 		/* Adjust for ZONE_MOVABLE starting within this range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) 		} else if (!mirrored_kernelcore &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779) 			*zone_start_pfn < zone_movable_pfn[nid] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) 			*zone_end_pfn > zone_movable_pfn[nid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) 			*zone_end_pfn = zone_movable_pfn[nid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) 		/* Check if this whole range is within ZONE_MOVABLE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) 		} else if (*zone_start_pfn >= zone_movable_pfn[nid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) 			*zone_start_pfn = *zone_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790)  * Return the number of pages a zone spans in a node, including holes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791)  * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) static unsigned long __init zone_spanned_pages_in_node(int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) 					unsigned long zone_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) 					unsigned long node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) 					unsigned long node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) 					unsigned long *zone_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) 					unsigned long *zone_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) 	unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) 	unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) 	/* When hotadd a new node from cpu_up(), the node should be empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) 	if (!node_start_pfn && !node_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) 	/* Get the start and end of the zone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) 	*zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) 	*zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) 	adjust_zone_range_for_zone_movable(nid, zone_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) 				node_start_pfn, node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) 				zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) 	/* Check that this node has pages within the zone's required range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) 	if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) 	/* Move the zone boundaries inside the node if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818) 	*zone_end_pfn = min(*zone_end_pfn, node_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) 	*zone_start_pfn = max(*zone_start_pfn, node_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) 	/* Return the spanned pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) 	return *zone_end_pfn - *zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826)  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827)  * then all holes in the requested range will be accounted for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) unsigned long __init __absent_pages_in_range(int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) 				unsigned long range_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) 				unsigned long range_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833) 	unsigned long nr_absent = range_end_pfn - range_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837) 	for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) 		start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) 		end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) 		nr_absent -= end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) 	return nr_absent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846)  * absent_pages_in_range - Return number of page frames in holes within a range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847)  * @start_pfn: The start PFN to start searching for holes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848)  * @end_pfn: The end PFN to stop searching for holes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850)  * Return: the number of pages frames in memory holes within a range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) unsigned long __init absent_pages_in_range(unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) 							unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) 	return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) /* Return the number of page frames in holes in a zone on a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) static unsigned long __init zone_absent_pages_in_node(int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) 					unsigned long zone_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861) 					unsigned long node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) 					unsigned long node_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) 	unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) 	unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) 	unsigned long zone_start_pfn, zone_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) 	unsigned long nr_absent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) 	/* When hotadd a new node from cpu_up(), the node should be empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) 	if (!node_start_pfn && !node_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873) 	zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) 	zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876) 	adjust_zone_range_for_zone_movable(nid, zone_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) 			node_start_pfn, node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) 			&zone_start_pfn, &zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) 	nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) 	 * ZONE_MOVABLE handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) 	 * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) 	 * and vice versa.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) 	if (mirrored_kernelcore && zone_movable_pfn[nid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) 		unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) 		struct memblock_region *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) 		for_each_mem_region(r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) 			start_pfn = clamp(memblock_region_memory_base_pfn(r),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) 					  zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) 			end_pfn = clamp(memblock_region_memory_end_pfn(r),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) 					zone_start_pfn, zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) 			if (zone_type == ZONE_MOVABLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) 			    memblock_is_mirror(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) 				nr_absent += end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) 			if (zone_type == ZONE_NORMAL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) 			    !memblock_is_mirror(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) 				nr_absent += end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6906) 	return nr_absent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6909) static void __init calculate_node_totalpages(struct pglist_data *pgdat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6910) 						unsigned long node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6911) 						unsigned long node_end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6913) 	unsigned long realtotalpages = 0, totalpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6914) 	enum zone_type i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6916) 	for (i = 0; i < MAX_NR_ZONES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6917) 		struct zone *zone = pgdat->node_zones + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6918) 		unsigned long zone_start_pfn, zone_end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6919) 		unsigned long spanned, absent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6920) 		unsigned long size, real_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6922) 		spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6923) 						     node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6924) 						     node_end_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6925) 						     &zone_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6926) 						     &zone_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6927) 		absent = zone_absent_pages_in_node(pgdat->node_id, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6928) 						   node_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6929) 						   node_end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6931) 		size = spanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6932) 		real_size = size - absent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6934) 		if (size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6935) 			zone->zone_start_pfn = zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6936) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6937) 			zone->zone_start_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6938) 		zone->spanned_pages = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6939) 		zone->present_pages = real_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6941) 		totalpages += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6942) 		realtotalpages += real_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6943) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6945) 	pgdat->node_spanned_pages = totalpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6946) 	pgdat->node_present_pages = realtotalpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6947) 	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6948) 							realtotalpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6951) #ifndef CONFIG_SPARSEMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6952) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6953)  * Calculate the size of the zone->blockflags rounded to an unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6954)  * Start by making sure zonesize is a multiple of pageblock_order by rounding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6955)  * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6956)  * round what is now in bits to nearest long in bits, then return it in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6957)  * bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6958)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6959) static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6960) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6961) 	unsigned long usemapsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6963) 	zonesize += zone_start_pfn & (pageblock_nr_pages-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6964) 	usemapsize = roundup(zonesize, pageblock_nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6965) 	usemapsize = usemapsize >> pageblock_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6966) 	usemapsize *= NR_PAGEBLOCK_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6967) 	usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6969) 	return usemapsize / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6972) static void __ref setup_usemap(struct pglist_data *pgdat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6973) 				struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6974) 				unsigned long zone_start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6975) 				unsigned long zonesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6976) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6977) 	unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6978) 	zone->pageblock_flags = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6979) 	if (usemapsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6980) 		zone->pageblock_flags =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6981) 			memblock_alloc_node(usemapsize, SMP_CACHE_BYTES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6982) 					    pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6983) 		if (!zone->pageblock_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6984) 			panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6985) 			      usemapsize, zone->name, pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6986) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6988) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6989) static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6990) 				unsigned long zone_start_pfn, unsigned long zonesize) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6991) #endif /* CONFIG_SPARSEMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6993) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6995) /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6996) void __init set_pageblock_order(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6998) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7000) 	/* Check that pageblock_nr_pages has not already been setup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7001) 	if (pageblock_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7002) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7004) 	if (HPAGE_SHIFT > PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7005) 		order = HUGETLB_PAGE_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7006) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7007) 		order = MAX_ORDER - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7009) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7010) 	 * Assume the largest contiguous order of interest is a huge page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7011) 	 * This value may be variable depending on boot parameters on IA64 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7012) 	 * powerpc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7013) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7014) 	pageblock_order = order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7016) #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7018) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7019)  * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7020)  * is unused as pageblock_order is set at compile-time. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7021)  * include/linux/pageblock-flags.h for the values of pageblock_order based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7022)  * the kernel config
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7023)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7024) void __init set_pageblock_order(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7028) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7030) static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7031) 						unsigned long present_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7033) 	unsigned long pages = spanned_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7035) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7036) 	 * Provide a more accurate estimation if there are holes within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7037) 	 * the zone and SPARSEMEM is in use. If there are holes within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7038) 	 * zone, each populated memory region may cost us one or two extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7039) 	 * memmap pages due to alignment because memmap pages for each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7040) 	 * populated regions may not be naturally aligned on page boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7041) 	 * So the (present_pages >> 4) heuristic is a tradeoff for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7042) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7043) 	if (spanned_pages > present_pages + (present_pages >> 4) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7044) 	    IS_ENABLED(CONFIG_SPARSEMEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7045) 		pages = present_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7047) 	return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7050) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7051) static void pgdat_init_split_queue(struct pglist_data *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7053) 	struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7055) 	spin_lock_init(&ds_queue->split_queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7056) 	INIT_LIST_HEAD(&ds_queue->split_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7057) 	ds_queue->split_queue_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7059) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7060) static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7061) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7063) #ifdef CONFIG_COMPACTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7064) static void pgdat_init_kcompactd(struct pglist_data *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7066) 	init_waitqueue_head(&pgdat->kcompactd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7068) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7069) static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7070) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7072) static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7074) 	pgdat_resize_init(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7076) 	pgdat_init_split_queue(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7077) 	pgdat_init_kcompactd(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7079) 	init_waitqueue_head(&pgdat->kswapd_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7080) 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7082) 	pgdat_page_ext_init(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7083) 	spin_lock_init(&pgdat->lru_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7084) 	lruvec_init(&pgdat->__lruvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7085) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7087) static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7088) 							unsigned long remaining_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7090) 	atomic_long_set(&zone->managed_pages, remaining_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7091) 	zone_set_nid(zone, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7092) 	zone->name = zone_names[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7093) 	zone->zone_pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7094) 	spin_lock_init(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7095) 	zone_seqlock_init(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7096) 	zone_pcp_init(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7099) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7100)  * Set up the zone data structures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7101)  * - init pgdat internals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7102)  * - init all zones belonging to this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7103)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7104)  * NOTE: this function is only called during memory hotplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7105)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7106) #ifdef CONFIG_MEMORY_HOTPLUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7107) void __ref free_area_init_core_hotplug(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7109) 	enum zone_type z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7110) 	pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7112) 	pgdat_init_internals(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7113) 	for (z = 0; z < MAX_NR_ZONES; z++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7114) 		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7116) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7119)  * Set up the zone data structures:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7120)  *   - mark all pages reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7121)  *   - mark all memory queues empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7122)  *   - clear the memory bitmaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7123)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7124)  * NOTE: pgdat should get zeroed by caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7125)  * NOTE: this function is only called during early init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7126)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7127) static void __init free_area_init_core(struct pglist_data *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7129) 	enum zone_type j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7130) 	int nid = pgdat->node_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7132) 	pgdat_init_internals(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7133) 	pgdat->per_cpu_nodestats = &boot_nodestats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7135) 	for (j = 0; j < MAX_NR_ZONES; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7136) 		struct zone *zone = pgdat->node_zones + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7137) 		unsigned long size, freesize, memmap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7138) 		unsigned long zone_start_pfn = zone->zone_start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7140) 		size = zone->spanned_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7141) 		freesize = zone->present_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7143) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7144) 		 * Adjust freesize so that it accounts for how much memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7145) 		 * is used by this zone for memmap. This affects the watermark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7146) 		 * and per-cpu initialisations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7147) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7148) 		memmap_pages = calc_memmap_size(size, freesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7149) 		if (!is_highmem_idx(j)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7150) 			if (freesize >= memmap_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7151) 				freesize -= memmap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7152) 				if (memmap_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7153) 					printk(KERN_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7154) 					       "  %s zone: %lu pages used for memmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7155) 					       zone_names[j], memmap_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7156) 			} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7157) 				pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7158) 					zone_names[j], memmap_pages, freesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7159) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7161) 		/* Account for reserved pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7162) 		if (j == 0 && freesize > dma_reserve) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7163) 			freesize -= dma_reserve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7164) 			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7165) 					zone_names[0], dma_reserve);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7166) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7168) 		if (!is_highmem_idx(j))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7169) 			nr_kernel_pages += freesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7170) 		/* Charge for highmem memmap if there are enough kernel pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7171) 		else if (nr_kernel_pages > memmap_pages * 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7172) 			nr_kernel_pages -= memmap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7173) 		nr_all_pages += freesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7175) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7176) 		 * Set an approximate value for lowmem here, it will be adjusted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7177) 		 * when the bootmem allocator frees pages into the buddy system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7178) 		 * And all highmem pages will be managed by the buddy system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7179) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7180) 		zone_init_internals(zone, j, nid, freesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7182) 		if (!size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7183) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7185) 		set_pageblock_order();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7186) 		setup_usemap(pgdat, zone, zone_start_pfn, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7187) 		init_currently_empty_zone(zone, zone_start_pfn, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7188) 		arch_memmap_init(size, nid, j, zone_start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7189) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7192) #ifdef CONFIG_FLAT_NODE_MEM_MAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7193) static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7195) 	unsigned long __maybe_unused start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7196) 	unsigned long __maybe_unused offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7198) 	/* Skip empty nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7199) 	if (!pgdat->node_spanned_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7200) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7202) 	start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7203) 	offset = pgdat->node_start_pfn - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7204) 	/* ia64 gets its own node_mem_map, before this, without bootmem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7205) 	if (!pgdat->node_mem_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7206) 		unsigned long size, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7207) 		struct page *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7209) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7210) 		 * The zone's endpoints aren't required to be MAX_ORDER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7211) 		 * aligned but the node_mem_map endpoints must be in order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7212) 		 * for the buddy allocator to function correctly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7213) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7214) 		end = pgdat_end_pfn(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7215) 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7216) 		size =  (end - start) * sizeof(struct page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7217) 		map = memblock_alloc_node(size, SMP_CACHE_BYTES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7218) 					  pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7219) 		if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7220) 			panic("Failed to allocate %ld bytes for node %d memory map\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7221) 			      size, pgdat->node_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7222) 		pgdat->node_mem_map = map + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7224) 	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7225) 				__func__, pgdat->node_id, (unsigned long)pgdat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7226) 				(unsigned long)pgdat->node_mem_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7227) #ifndef CONFIG_NEED_MULTIPLE_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7228) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7229) 	 * With no DISCONTIG, the global mem_map is just set as node 0's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7230) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7231) 	if (pgdat == NODE_DATA(0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7232) 		mem_map = NODE_DATA(0)->node_mem_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7233) 		if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7234) 			mem_map -= offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7235) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7236) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7238) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7239) static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7240) #endif /* CONFIG_FLAT_NODE_MEM_MAP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7242) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7243) static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7245) 	pgdat->first_deferred_pfn = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7247) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7248) static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7249) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7251) static void __init free_area_init_node(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7253) 	pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7254) 	unsigned long start_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7255) 	unsigned long end_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7257) 	/* pg_data_t should be reset to zero when it's allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7258) 	WARN_ON(pgdat->nr_zones || pgdat->kswapd_highest_zoneidx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7260) 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7262) 	pgdat->node_id = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7263) 	pgdat->node_start_pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7264) 	pgdat->per_cpu_nodestats = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7266) 	pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7267) 		(u64)start_pfn << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7268) 		end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7269) 	calculate_node_totalpages(pgdat, start_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7271) 	alloc_node_mem_map(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7272) 	pgdat_set_deferred_range(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7274) 	free_area_init_core(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7277) void __init free_area_init_memoryless_node(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7279) 	free_area_init_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7282) #if MAX_NUMNODES > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7283) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7284)  * Figure out the number of possible node ids.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7285)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7286) void __init setup_nr_node_ids(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7288) 	unsigned int highest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7290) 	highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7291) 	nr_node_ids = highest + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7293) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7295) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7296)  * node_map_pfn_alignment - determine the maximum internode alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7297)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7298)  * This function should be called after node map is populated and sorted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7299)  * It calculates the maximum power of two alignment which can distinguish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7300)  * all the nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7301)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7302)  * For example, if all nodes are 1GiB and aligned to 1GiB, the return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7303)  * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)).  If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7304)  * nodes are shifted by 256MiB, 256MiB.  Note that if only the last node is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7305)  * shifted, 1GiB is enough and this function will indicate so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7306)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7307)  * This is used to test whether pfn -> nid mapping of the chosen memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7308)  * model has fine enough granularity to avoid incorrect mapping for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7309)  * populated node map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7310)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7311)  * Return: the determined alignment in pfn's.  0 if there is no alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7312)  * requirement (single node).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7313)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7314) unsigned long __init node_map_pfn_alignment(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7316) 	unsigned long accl_mask = 0, last_end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7317) 	unsigned long start, end, mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7318) 	int last_nid = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7319) 	int i, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7321) 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7322) 		if (!start || last_nid < 0 || last_nid == nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7323) 			last_nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7324) 			last_end = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7325) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7326) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7328) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7329) 		 * Start with a mask granular enough to pin-point to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7330) 		 * start pfn and tick off bits one-by-one until it becomes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7331) 		 * too coarse to separate the current node from the last.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7332) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7333) 		mask = ~((1 << __ffs(start)) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7334) 		while (mask && last_end <= (start & (mask << 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7335) 			mask <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7337) 		/* accumulate all internode masks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7338) 		accl_mask |= mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7339) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7341) 	/* convert mask to number of pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7342) 	return ~accl_mask + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7345) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7346)  * find_min_pfn_with_active_regions - Find the minimum PFN registered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7347)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7348)  * Return: the minimum PFN based on information provided via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7349)  * memblock_set_node().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7350)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7351) unsigned long __init find_min_pfn_with_active_regions(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7353) 	return PHYS_PFN(memblock_start_of_DRAM());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7357)  * early_calculate_totalpages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7358)  * Sum pages in active regions for movable zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7359)  * Populate N_MEMORY for calculating usable_nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7360)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7361) static unsigned long __init early_calculate_totalpages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7363) 	unsigned long totalpages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7364) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7365) 	int i, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7367) 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7368) 		unsigned long pages = end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7370) 		totalpages += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7371) 		if (pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7372) 			node_set_state(nid, N_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7373) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7374) 	return totalpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7377) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7378)  * Find the PFN the Movable zone begins in each node. Kernel memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7379)  * is spread evenly between nodes as long as the nodes have enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7380)  * memory. When they don't, some nodes will have more kernelcore than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7381)  * others
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7382)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7383) static void __init find_zone_movable_pfns_for_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7384) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7385) 	int i, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7386) 	unsigned long usable_startpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7387) 	unsigned long kernelcore_node, kernelcore_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7388) 	/* save the state before borrow the nodemask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7389) 	nodemask_t saved_node_state = node_states[N_MEMORY];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7390) 	unsigned long totalpages = early_calculate_totalpages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7391) 	int usable_nodes = nodes_weight(node_states[N_MEMORY]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7392) 	struct memblock_region *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7394) 	/* Need to find movable_zone earlier when movable_node is specified. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7395) 	find_usable_zone_for_movable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7397) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7398) 	 * If movable_node is specified, ignore kernelcore and movablecore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7399) 	 * options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7400) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7401) 	if (movable_node_is_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7402) 		for_each_mem_region(r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7403) 			if (!memblock_is_hotpluggable(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7404) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7406) 			nid = memblock_get_region_node(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7408) 			usable_startpfn = PFN_DOWN(r->base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7409) 			zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7410) 				min(usable_startpfn, zone_movable_pfn[nid]) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7411) 				usable_startpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7412) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7414) 		goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7415) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7417) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7418) 	 * If kernelcore=mirror is specified, ignore movablecore option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7419) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7420) 	if (mirrored_kernelcore) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7421) 		bool mem_below_4gb_not_mirrored = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7423) 		for_each_mem_region(r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7424) 			if (memblock_is_mirror(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7425) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7427) 			nid = memblock_get_region_node(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7429) 			usable_startpfn = memblock_region_memory_base_pfn(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7431) 			if (usable_startpfn < 0x100000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7432) 				mem_below_4gb_not_mirrored = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7433) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7434) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7436) 			zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7437) 				min(usable_startpfn, zone_movable_pfn[nid]) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7438) 				usable_startpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7439) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7441) 		if (mem_below_4gb_not_mirrored)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7442) 			pr_warn("This configuration results in unmirrored kernel memory.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7444) 		goto out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7445) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7447) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7448) 	 * If kernelcore=nn% or movablecore=nn% was specified, calculate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7449) 	 * amount of necessary memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7450) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7451) 	if (required_kernelcore_percent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7452) 		required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7453) 				       10000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7454) 	if (required_movablecore_percent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7455) 		required_movablecore = (totalpages * 100 * required_movablecore_percent) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7456) 					10000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7458) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7459) 	 * If movablecore= was specified, calculate what size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7460) 	 * kernelcore that corresponds so that memory usable for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7461) 	 * any allocation type is evenly spread. If both kernelcore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7462) 	 * and movablecore are specified, then the value of kernelcore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7463) 	 * will be used for required_kernelcore if it's greater than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7464) 	 * what movablecore would have allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7465) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7466) 	if (required_movablecore) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7467) 		unsigned long corepages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7469) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7470) 		 * Round-up so that ZONE_MOVABLE is at least as large as what
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7471) 		 * was requested by the user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7472) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7473) 		required_movablecore =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7474) 			roundup(required_movablecore, MAX_ORDER_NR_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7475) 		required_movablecore = min(totalpages, required_movablecore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7476) 		corepages = totalpages - required_movablecore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7478) 		required_kernelcore = max(required_kernelcore, corepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7479) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7481) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7482) 	 * If kernelcore was not specified or kernelcore size is larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7483) 	 * than totalpages, there is no ZONE_MOVABLE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7484) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7485) 	if (!required_kernelcore || required_kernelcore >= totalpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7486) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7488) 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7489) 	usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7491) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7492) 	/* Spread kernelcore memory as evenly as possible throughout nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7493) 	kernelcore_node = required_kernelcore / usable_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7494) 	for_each_node_state(nid, N_MEMORY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7495) 		unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7497) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7498) 		 * Recalculate kernelcore_node if the division per node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7499) 		 * now exceeds what is necessary to satisfy the requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7500) 		 * amount of memory for the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7501) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7502) 		if (required_kernelcore < kernelcore_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7503) 			kernelcore_node = required_kernelcore / usable_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7505) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7506) 		 * As the map is walked, we track how much memory is usable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7507) 		 * by the kernel using kernelcore_remaining. When it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7508) 		 * 0, the rest of the node is usable by ZONE_MOVABLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7509) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7510) 		kernelcore_remaining = kernelcore_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7512) 		/* Go through each range of PFNs within this node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7513) 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7514) 			unsigned long size_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7516) 			start_pfn = max(start_pfn, zone_movable_pfn[nid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7517) 			if (start_pfn >= end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7518) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7520) 			/* Account for what is only usable for kernelcore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7521) 			if (start_pfn < usable_startpfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7522) 				unsigned long kernel_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7523) 				kernel_pages = min(end_pfn, usable_startpfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7524) 								- start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7526) 				kernelcore_remaining -= min(kernel_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7527) 							kernelcore_remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7528) 				required_kernelcore -= min(kernel_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7529) 							required_kernelcore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7531) 				/* Continue if range is now fully accounted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7532) 				if (end_pfn <= usable_startpfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7534) 					/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7535) 					 * Push zone_movable_pfn to the end so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7536) 					 * that if we have to rebalance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7537) 					 * kernelcore across nodes, we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7538) 					 * not double account here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7539) 					 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7540) 					zone_movable_pfn[nid] = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7541) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7542) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7543) 				start_pfn = usable_startpfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7544) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7546) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7547) 			 * The usable PFN range for ZONE_MOVABLE is from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7548) 			 * start_pfn->end_pfn. Calculate size_pages as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7549) 			 * number of pages used as kernelcore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7550) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7551) 			size_pages = end_pfn - start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7552) 			if (size_pages > kernelcore_remaining)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7553) 				size_pages = kernelcore_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7554) 			zone_movable_pfn[nid] = start_pfn + size_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7556) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7557) 			 * Some kernelcore has been met, update counts and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7558) 			 * break if the kernelcore for this node has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7559) 			 * satisfied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7560) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7561) 			required_kernelcore -= min(required_kernelcore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7562) 								size_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7563) 			kernelcore_remaining -= size_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7564) 			if (!kernelcore_remaining)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7565) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7566) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7567) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7569) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7570) 	 * If there is still required_kernelcore, we do another pass with one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7571) 	 * less node in the count. This will push zone_movable_pfn[nid] further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7572) 	 * along on the nodes that still have memory until kernelcore is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7573) 	 * satisfied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7574) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7575) 	usable_nodes--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7576) 	if (usable_nodes && required_kernelcore > usable_nodes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7577) 		goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7579) out2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7580) 	/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7581) 	for (nid = 0; nid < MAX_NUMNODES; nid++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7582) 		unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7584) 		zone_movable_pfn[nid] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7585) 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7587) 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7588) 		if (zone_movable_pfn[nid] >= end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7589) 			zone_movable_pfn[nid] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7590) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7592) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7593) 	/* restore the node_state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7594) 	node_states[N_MEMORY] = saved_node_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7597) /* Any regular or high memory on that node ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7598) static void check_for_memory(pg_data_t *pgdat, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7600) 	enum zone_type zone_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7602) 	for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7603) 		struct zone *zone = &pgdat->node_zones[zone_type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7604) 		if (populated_zone(zone)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7605) 			if (IS_ENABLED(CONFIG_HIGHMEM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7606) 				node_set_state(nid, N_HIGH_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7607) 			if (zone_type <= ZONE_NORMAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7608) 				node_set_state(nid, N_NORMAL_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7609) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7610) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7611) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7614) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7615)  * Some architecturs, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7616)  * such cases we allow max_zone_pfn sorted in the descending order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7617)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7618) bool __weak arch_has_descending_max_zone_pfns(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7620) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7623) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7624)  * free_area_init - Initialise all pg_data_t and zone data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7625)  * @max_zone_pfn: an array of max PFNs for each zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7626)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7627)  * This will call free_area_init_node() for each active node in the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7628)  * Using the page ranges provided by memblock_set_node(), the size of each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7629)  * zone in each node and their holes is calculated. If the maximum PFN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7630)  * between two adjacent zones match, it is assumed that the zone is empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7631)  * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7632)  * that arch_max_dma32_pfn has no pages. It is also assumed that a zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7633)  * starts where the previous one ended. For example, ZONE_DMA32 starts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7634)  * at arch_max_dma_pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7635)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7636) void __init free_area_init(unsigned long *max_zone_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7638) 	unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7639) 	int i, nid, zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7640) 	bool descending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7642) 	/* Record where the zone boundaries are */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7643) 	memset(arch_zone_lowest_possible_pfn, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7644) 				sizeof(arch_zone_lowest_possible_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7645) 	memset(arch_zone_highest_possible_pfn, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7646) 				sizeof(arch_zone_highest_possible_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7648) 	start_pfn = find_min_pfn_with_active_regions();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7649) 	descending = arch_has_descending_max_zone_pfns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7651) 	for (i = 0; i < MAX_NR_ZONES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7652) 		if (descending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7653) 			zone = MAX_NR_ZONES - i - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7654) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7655) 			zone = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7657) 		if (zone == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7658) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7660) 		end_pfn = max(max_zone_pfn[zone], start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7661) 		arch_zone_lowest_possible_pfn[zone] = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7662) 		arch_zone_highest_possible_pfn[zone] = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7664) 		start_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7665) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7667) 	/* Find the PFNs that ZONE_MOVABLE begins at in each node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7668) 	memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7669) 	find_zone_movable_pfns_for_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7671) 	/* Print out the zone ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7672) 	pr_info("Zone ranges:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7673) 	for (i = 0; i < MAX_NR_ZONES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7674) 		if (i == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7675) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7676) 		pr_info("  %-8s ", zone_names[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7677) 		if (arch_zone_lowest_possible_pfn[i] ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7678) 				arch_zone_highest_possible_pfn[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7679) 			pr_cont("empty\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7680) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7681) 			pr_cont("[mem %#018Lx-%#018Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7682) 				(u64)arch_zone_lowest_possible_pfn[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7683) 					<< PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7684) 				((u64)arch_zone_highest_possible_pfn[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7685) 					<< PAGE_SHIFT) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7688) 	/* Print out the PFNs ZONE_MOVABLE begins at in each node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7689) 	pr_info("Movable zone start for each node\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7690) 	for (i = 0; i < MAX_NUMNODES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7691) 		if (zone_movable_pfn[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7692) 			pr_info("  Node %d: %#018Lx\n", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7693) 			       (u64)zone_movable_pfn[i] << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7694) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7696) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7697) 	 * Print out the early node map, and initialize the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7698) 	 * subsection-map relative to active online memory ranges to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7699) 	 * enable future "sub-section" extensions of the memory map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7700) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7701) 	pr_info("Early memory node ranges\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7702) 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7703) 		pr_info("  node %3d: [mem %#018Lx-%#018Lx]\n", nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7704) 			(u64)start_pfn << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7705) 			((u64)end_pfn << PAGE_SHIFT) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7706) 		subsection_map_init(start_pfn, end_pfn - start_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7707) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7709) 	/* Initialise every node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7710) 	mminit_verify_pageflags_layout();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7711) 	setup_nr_node_ids();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7712) 	for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7713) 		pg_data_t *pgdat = NODE_DATA(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7714) 		free_area_init_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7716) 		/* Any memory on that node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7717) 		if (pgdat->node_present_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7718) 			node_set_state(nid, N_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7719) 		check_for_memory(pgdat, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7720) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7722) 	memmap_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7725) static int __init cmdline_parse_core(char *p, unsigned long *core,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7726) 				     unsigned long *percent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7728) 	unsigned long long coremem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7729) 	char *endptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7731) 	if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7732) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7734) 	/* Value may be a percentage of total memory, otherwise bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7735) 	coremem = simple_strtoull(p, &endptr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7736) 	if (*endptr == '%') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7737) 		/* Paranoid check for percent values greater than 100 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7738) 		WARN_ON(coremem > 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7740) 		*percent = coremem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7741) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7742) 		coremem = memparse(p, &p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7743) 		/* Paranoid check that UL is enough for the coremem value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7744) 		WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7746) 		*core = coremem >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7747) 		*percent = 0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7748) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7749) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7752) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7753)  * kernelcore=size sets the amount of memory for use for allocations that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7754)  * cannot be reclaimed or migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7755)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7756) static int __init cmdline_parse_kernelcore(char *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7758) 	/* parse kernelcore=mirror */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7759) 	if (parse_option_str(p, "mirror")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7760) 		mirrored_kernelcore = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7761) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7762) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7764) 	return cmdline_parse_core(p, &required_kernelcore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7765) 				  &required_kernelcore_percent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7768) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7769)  * movablecore=size sets the amount of memory for use for allocations that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7770)  * can be reclaimed or migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7771)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7772) static int __init cmdline_parse_movablecore(char *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7774) 	return cmdline_parse_core(p, &required_movablecore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7775) 				  &required_movablecore_percent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7778) early_param("kernelcore", cmdline_parse_kernelcore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7779) early_param("movablecore", cmdline_parse_movablecore);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7781) void adjust_managed_page_count(struct page *page, long count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7783) 	atomic_long_add(count, &page_zone(page)->managed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7784) 	totalram_pages_add(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7785) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7786) 	if (PageHighMem(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7787) 		totalhigh_pages_add(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7788) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7790) EXPORT_SYMBOL(adjust_managed_page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7792) unsigned long free_reserved_area(void *start, void *end, int poison, const char *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7794) 	void *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7795) 	unsigned long pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7797) 	start = (void *)PAGE_ALIGN((unsigned long)start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7798) 	end = (void *)((unsigned long)end & PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7799) 	for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7800) 		struct page *page = virt_to_page(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7801) 		void *direct_map_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7803) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7804) 		 * 'direct_map_addr' might be different from 'pos'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7805) 		 * because some architectures' virt_to_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7806) 		 * work with aliases.  Getting the direct map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7807) 		 * address ensures that we get a _writeable_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7808) 		 * alias for the memset().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7809) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7810) 		direct_map_addr = page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7811) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7812) 		 * Perform a kasan-unchecked memset() since this memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7813) 		 * has not been initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7814) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7815) 		direct_map_addr = kasan_reset_tag(direct_map_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7816) 		if ((unsigned int)poison <= 0xFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7817) 			memset(direct_map_addr, poison, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7819) 		free_reserved_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7820) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7822) 	if (pages && s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7823) 		pr_info("Freeing %s memory: %ldK\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7824) 			s, pages << (PAGE_SHIFT - 10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7826) 	return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7829) #ifdef	CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7830) void free_highmem_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7831) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7832) 	__free_reserved_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7833) 	totalram_pages_inc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7834) 	atomic_long_inc(&page_zone(page)->managed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7835) 	totalhigh_pages_inc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7837) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7840) void __init mem_init_print_info(const char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7842) 	unsigned long physpages, codesize, datasize, rosize, bss_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7843) 	unsigned long init_code_size, init_data_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7845) 	physpages = get_num_physpages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7846) 	codesize = _etext - _stext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7847) 	datasize = _edata - _sdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7848) 	rosize = __end_rodata - __start_rodata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7849) 	bss_size = __bss_stop - __bss_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7850) 	init_data_size = __init_end - __init_begin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7851) 	init_code_size = _einittext - _sinittext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7853) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7854) 	 * Detect special cases and adjust section sizes accordingly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7855) 	 * 1) .init.* may be embedded into .data sections
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7856) 	 * 2) .init.text.* may be out of [__init_begin, __init_end],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7857) 	 *    please refer to arch/tile/kernel/vmlinux.lds.S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7858) 	 * 3) .rodata.* may be embedded into .text or .data sections.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7859) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7860) #define adj_init_size(start, end, size, pos, adj) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7861) 	do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7862) 		if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7863) 			size -= adj; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7864) 	} while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7866) 	adj_init_size(__init_begin, __init_end, init_data_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7867) 		     _sinittext, init_code_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7868) 	adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7869) 	adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7870) 	adj_init_size(_stext, _etext, codesize, __start_rodata, rosize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7871) 	adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7873) #undef	adj_init_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7875) 	pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7876) #ifdef	CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7877) 		", %luK highmem"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7878) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7879) 		"%s%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7880) 		nr_free_pages() << (PAGE_SHIFT - 10),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7881) 		physpages << (PAGE_SHIFT - 10),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7882) 		codesize >> 10, datasize >> 10, rosize >> 10,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7883) 		(init_data_size + init_code_size) >> 10, bss_size >> 10,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7884) 		(physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7885) 		totalcma_pages << (PAGE_SHIFT - 10),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7886) #ifdef	CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7887) 		totalhigh_pages() << (PAGE_SHIFT - 10),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7888) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7889) 		str ? ", " : "", str ? str : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7892) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7893)  * set_dma_reserve - set the specified number of pages reserved in the first zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7894)  * @new_dma_reserve: The number of pages to mark reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7895)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7896)  * The per-cpu batchsize and zone watermarks are determined by managed_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7897)  * In the DMA zone, a significant percentage may be consumed by kernel image
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7898)  * and other unfreeable allocations which can skew the watermarks badly. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7899)  * function may optionally be used to account for unfreeable pages in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7900)  * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7901)  * smaller per-cpu batchsize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7902)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7903) void __init set_dma_reserve(unsigned long new_dma_reserve)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7905) 	dma_reserve = new_dma_reserve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7908) static int page_alloc_cpu_dead(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7909) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7911) 	lru_add_drain_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7912) 	drain_pages(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7914) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7915) 	 * Spill the event counters of the dead processor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7916) 	 * into the current processors event counters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7917) 	 * This artificially elevates the count of the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7918) 	 * processor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7919) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7920) 	vm_events_fold_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7922) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7923) 	 * Zero the differential counters of the dead processor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7924) 	 * so that the vm statistics are consistent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7925) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7926) 	 * This is only okay since the processor is dead and cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7927) 	 * race with what we are doing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7928) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7929) 	cpu_vm_stats_fold(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7930) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7933) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7934) int hashdist = HASHDIST_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7936) static int __init set_hashdist(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7938) 	if (!str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7939) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7940) 	hashdist = simple_strtoul(str, &str, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7941) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7943) __setup("hashdist=", set_hashdist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7944) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7946) void __init page_alloc_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7948) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7950) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7951) 	if (num_node_state(N_MEMORY) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7952) 		hashdist = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7953) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7955) 	ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7956) 					"mm/page_alloc:dead", NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7957) 					page_alloc_cpu_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7958) 	WARN_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7961) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7962)  * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7963)  *	or min_free_kbytes changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7964)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7965) static void calculate_totalreserve_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7967) 	struct pglist_data *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7968) 	unsigned long reserve_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7969) 	enum zone_type i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7971) 	for_each_online_pgdat(pgdat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7973) 		pgdat->totalreserve_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7975) 		for (i = 0; i < MAX_NR_ZONES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7976) 			struct zone *zone = pgdat->node_zones + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7977) 			long max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7978) 			unsigned long managed_pages = zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7980) 			/* Find valid and maximum lowmem_reserve in the zone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7981) 			for (j = i; j < MAX_NR_ZONES; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7982) 				if (zone->lowmem_reserve[j] > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7983) 					max = zone->lowmem_reserve[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7984) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7986) 			/* we treat the high watermark as reserved pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7987) 			max += high_wmark_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7989) 			if (max > managed_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7990) 				max = managed_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7992) 			pgdat->totalreserve_pages += max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7994) 			reserve_pages += max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7995) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7996) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7997) 	totalreserve_pages = reserve_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8000) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8001)  * setup_per_zone_lowmem_reserve - called whenever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8002)  *	sysctl_lowmem_reserve_ratio changes.  Ensures that each zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8003)  *	has a correct pages reserved value, so an adequate number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8004)  *	pages are left in the zone after a successful __alloc_pages().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8005)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8006) static void setup_per_zone_lowmem_reserve(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8007) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8008) 	struct pglist_data *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8009) 	enum zone_type i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8011) 	for_each_online_pgdat(pgdat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8012) 		for (i = 0; i < MAX_NR_ZONES - 1; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8013) 			struct zone *zone = &pgdat->node_zones[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8014) 			int ratio = sysctl_lowmem_reserve_ratio[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8015) 			bool clear = !ratio || !zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8016) 			unsigned long managed_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8018) 			for (j = i + 1; j < MAX_NR_ZONES; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8019) 				struct zone *upper_zone = &pgdat->node_zones[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8021) 				managed_pages += zone_managed_pages(upper_zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8023) 				if (clear)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8024) 					zone->lowmem_reserve[j] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8025) 				else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8026) 					zone->lowmem_reserve[j] = managed_pages / ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8027) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8028) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8029) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8031) 	/* update totalreserve_pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8032) 	calculate_totalreserve_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8035) static void __setup_per_zone_wmarks(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8037) 	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8038) 	unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8039) 	unsigned long lowmem_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8040) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8041) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8043) 	/* Calculate total number of !ZONE_HIGHMEM pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8044) 	for_each_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8045) 		if (!is_highmem(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8046) 			lowmem_pages += zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8047) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8049) 	for_each_zone(zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8050) 		u64 tmp, low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8052) 		spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8053) 		tmp = (u64)pages_min * zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8054) 		do_div(tmp, lowmem_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8055) 		low = (u64)pages_low * zone_managed_pages(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8056) 		do_div(low, nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8057) 		if (is_highmem(zone)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8058) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8059) 			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8060) 			 * need highmem pages, so cap pages_min to a small
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8061) 			 * value here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8062) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8063) 			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8064) 			 * deltas control async page reclaim, and so should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8065) 			 * not be capped for highmem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8066) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8067) 			unsigned long min_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8069) 			min_pages = zone_managed_pages(zone) / 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8070) 			min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8071) 			zone->_watermark[WMARK_MIN] = min_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8072) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8073) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8074) 			 * If it's a lowmem zone, reserve a number of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8075) 			 * proportionate to the zone's size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8076) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8077) 			zone->_watermark[WMARK_MIN] = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8078) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8080) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8081) 		 * Set the kswapd watermarks distance according to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8082) 		 * scale factor in proportion to available memory, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8083) 		 * ensure a minimum size on small systems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8084) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8085) 		tmp = max_t(u64, tmp >> 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8086) 			    mult_frac(zone_managed_pages(zone),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8087) 				      watermark_scale_factor, 10000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8089) 		zone->watermark_boost = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8090) 		zone->_watermark[WMARK_LOW]  = min_wmark_pages(zone) + low + tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8091) 		zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + low + tmp * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8093) 		spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8094) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8096) 	/* update totalreserve_pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8097) 	calculate_totalreserve_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8100) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8101)  * setup_per_zone_wmarks - called when min_free_kbytes changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8102)  * or when memory is hot-{added|removed}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8103)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8104)  * Ensures that the watermark[min,low,high] values for each zone are set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8105)  * correctly with respect to min_free_kbytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8106)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8107) void setup_per_zone_wmarks(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8109) 	static DEFINE_SPINLOCK(lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8111) 	spin_lock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8112) 	__setup_per_zone_wmarks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8113) 	spin_unlock(&lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8116) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8117)  * Initialise min_free_kbytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8118)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8119)  * For small machines we want it small (128k min).  For large machines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8120)  * we want it large (256MB max).  But it is not linear, because network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8121)  * bandwidth does not increase linearly with machine size.  We use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8122)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8123)  *	min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8124)  *	min_free_kbytes = sqrt(lowmem_kbytes * 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8125)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8126)  * which yields
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8127)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8128)  * 16MB:	512k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8129)  * 32MB:	724k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8130)  * 64MB:	1024k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8131)  * 128MB:	1448k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8132)  * 256MB:	2048k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8133)  * 512MB:	2896k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8134)  * 1024MB:	4096k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8135)  * 2048MB:	5792k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8136)  * 4096MB:	8192k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8137)  * 8192MB:	11584k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8138)  * 16384MB:	16384k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8139)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8140) int __meminit init_per_zone_wmark_min(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8142) 	unsigned long lowmem_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8143) 	int new_min_free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8145) 	lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8146) 	new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8148) 	if (new_min_free_kbytes > user_min_free_kbytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8149) 		min_free_kbytes = new_min_free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8150) 		if (min_free_kbytes < 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8151) 			min_free_kbytes = 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8152) 		if (min_free_kbytes > 262144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8153) 			min_free_kbytes = 262144;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8154) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8155) 		pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8156) 				new_min_free_kbytes, user_min_free_kbytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8158) 	setup_per_zone_wmarks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8159) 	refresh_zone_stat_thresholds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8160) 	setup_per_zone_lowmem_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8162) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8163) 	setup_min_unmapped_ratio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8164) 	setup_min_slab_ratio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8165) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8167) 	khugepaged_min_free_kbytes_update();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8169) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8171) postcore_initcall(init_per_zone_wmark_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8174)  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8175)  *	that we can call two helper functions whenever min_free_kbytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8176)  *	or extra_free_kbytes changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8177)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8178) int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8179) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8181) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8183) 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8184) 	if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8185) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8187) 	if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8188) 		user_min_free_kbytes = min_free_kbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8189) 		setup_per_zone_wmarks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8190) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8191) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8194) int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8195) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8197) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8199) 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8200) 	if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8201) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8203) 	if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8204) 		setup_per_zone_wmarks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8206) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8209) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8210) static void setup_min_unmapped_ratio(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8212) 	pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8213) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8215) 	for_each_online_pgdat(pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8216) 		pgdat->min_unmapped_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8218) 	for_each_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8219) 		zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8220) 						         sysctl_min_unmapped_ratio) / 100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8224) int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8225) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8227) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8229) 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8230) 	if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8231) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8233) 	setup_min_unmapped_ratio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8235) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8238) static void setup_min_slab_ratio(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8240) 	pg_data_t *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8241) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8243) 	for_each_online_pgdat(pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8244) 		pgdat->min_slab_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8246) 	for_each_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8247) 		zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8248) 						     sysctl_min_slab_ratio) / 100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8251) int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8252) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8254) 	int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8256) 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8257) 	if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8258) 		return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8260) 	setup_min_slab_ratio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8262) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8264) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8267)  * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8268)  *	proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8269)  *	whenever sysctl_lowmem_reserve_ratio changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8270)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8271)  * The reserve ratio obviously has absolutely no relation with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8272)  * minimum watermarks. The lowmem reserve ratio can only make sense
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8273)  * if in function of the boot time zone sizes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8274)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8275) int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8276) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8278) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8280) 	proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8282) 	for (i = 0; i < MAX_NR_ZONES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8283) 		if (sysctl_lowmem_reserve_ratio[i] < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8284) 			sysctl_lowmem_reserve_ratio[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8287) 	setup_per_zone_lowmem_reserve();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8288) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8291) static void __zone_pcp_update(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8293) 	unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8295) 	for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8296) 		pageset_set_high_and_batch(zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8297) 				per_cpu_ptr(zone->pageset, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8301)  * percpu_pagelist_fraction - changes the pcp->high for each zone on each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8302)  * cpu.  It is the fraction of total pages in each zone that a hot per cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8303)  * pagelist can have before it gets flushed back to buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8304)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8305) int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8306) 		void *buffer, size_t *length, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8307) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8308) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8309) 	int old_percpu_pagelist_fraction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8310) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8312) 	mutex_lock(&pcp_batch_high_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8313) 	old_percpu_pagelist_fraction = percpu_pagelist_fraction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8315) 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8316) 	if (!write || ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8317) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8319) 	/* Sanity checking to avoid pcp imbalance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8320) 	if (percpu_pagelist_fraction &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8321) 	    percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8322) 		percpu_pagelist_fraction = old_percpu_pagelist_fraction;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8323) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8324) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8327) 	/* No change? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8328) 	if (percpu_pagelist_fraction == old_percpu_pagelist_fraction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8329) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8331) 	for_each_populated_zone(zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8332) 		__zone_pcp_update(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8333) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8334) 	mutex_unlock(&pcp_batch_high_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8335) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8338) #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8340)  * Returns the number of pages that arch has reserved but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8341)  * is not known to alloc_large_system_hash().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8342)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8343) static unsigned long __init arch_reserved_kernel_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8345) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8347) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8349) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8350)  * Adaptive scale is meant to reduce sizes of hash tables on large memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8351)  * machines. As memory size is increased the scale is also increased but at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8352)  * slower pace.  Starting from ADAPT_SCALE_BASE (64G), every time memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8353)  * quadruples the scale is increased by one, which means the size of hash table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8354)  * only doubles, instead of quadrupling as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8355)  * Because 32-bit systems cannot have large physical memory, where this scaling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8356)  * makes sense, it is disabled on such platforms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8357)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8358) #if __BITS_PER_LONG > 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8359) #define ADAPT_SCALE_BASE	(64ul << 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8360) #define ADAPT_SCALE_SHIFT	2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8361) #define ADAPT_SCALE_NPAGES	(ADAPT_SCALE_BASE >> PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8362) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8365)  * allocate a large system hash table from bootmem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8366)  * - it is assumed that the hash table must contain an exact power-of-2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8367)  *   quantity of entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8368)  * - limit is the number of hash buckets, not the total allocation size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8369)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8370) void *__init alloc_large_system_hash(const char *tablename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8371) 				     unsigned long bucketsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8372) 				     unsigned long numentries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8373) 				     int scale,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8374) 				     int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8375) 				     unsigned int *_hash_shift,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8376) 				     unsigned int *_hash_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8377) 				     unsigned long low_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8378) 				     unsigned long high_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8380) 	unsigned long long max = high_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8381) 	unsigned long log2qty, size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8382) 	void *table = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8383) 	gfp_t gfp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8384) 	bool virt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8386) 	/* allow the kernel cmdline to have a say */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8387) 	if (!numentries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8388) 		/* round applicable memory size up to nearest megabyte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8389) 		numentries = nr_kernel_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8390) 		numentries -= arch_reserved_kernel_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8392) 		/* It isn't necessary when PAGE_SIZE >= 1MB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8393) 		if (PAGE_SHIFT < 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8394) 			numentries = round_up(numentries, (1<<20)/PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8396) #if __BITS_PER_LONG > 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8397) 		if (!high_limit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8398) 			unsigned long adapt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8400) 			for (adapt = ADAPT_SCALE_NPAGES; adapt < numentries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8401) 			     adapt <<= ADAPT_SCALE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8402) 				scale++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8403) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8404) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8406) 		/* limit to 1 bucket per 2^scale bytes of low memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8407) 		if (scale > PAGE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8408) 			numentries >>= (scale - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8409) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8410) 			numentries <<= (PAGE_SHIFT - scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8412) 		/* Make sure we've got at least a 0-order allocation.. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8413) 		if (unlikely(flags & HASH_SMALL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8414) 			/* Makes no sense without HASH_EARLY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8415) 			WARN_ON(!(flags & HASH_EARLY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8416) 			if (!(numentries >> *_hash_shift)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8417) 				numentries = 1UL << *_hash_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8418) 				BUG_ON(!numentries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8419) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8420) 		} else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8421) 			numentries = PAGE_SIZE / bucketsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8422) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8423) 	numentries = roundup_pow_of_two(numentries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8425) 	/* limit allocation size to 1/16 total memory by default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8426) 	if (max == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8427) 		max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8428) 		do_div(max, bucketsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8430) 	max = min(max, 0x80000000ULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8432) 	if (numentries < low_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8433) 		numentries = low_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8434) 	if (numentries > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8435) 		numentries = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8437) 	log2qty = ilog2(numentries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8439) 	gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8440) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8441) 		virt = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8442) 		size = bucketsize << log2qty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8443) 		if (flags & HASH_EARLY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8444) 			if (flags & HASH_ZERO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8445) 				table = memblock_alloc(size, SMP_CACHE_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8446) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8447) 				table = memblock_alloc_raw(size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8448) 							   SMP_CACHE_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8449) 		} else if (get_order(size) >= MAX_ORDER || hashdist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8450) 			table = __vmalloc(size, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8451) 			virt = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8452) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8453) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8454) 			 * If bucketsize is not a power-of-two, we may free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8455) 			 * some pages at the end of hash table which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8456) 			 * alloc_pages_exact() automatically does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8457) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8458) 			table = alloc_pages_exact(size, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8459) 			kmemleak_alloc(table, size, 1, gfp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8460) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8461) 	} while (!table && size > PAGE_SIZE && --log2qty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8463) 	if (!table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8464) 		panic("Failed to allocate %s hash table\n", tablename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8466) 	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8467) 		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8468) 		virt ? "vmalloc" : "linear");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8470) 	if (_hash_shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8471) 		*_hash_shift = log2qty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8472) 	if (_hash_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8473) 		*_hash_mask = (1 << log2qty) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8475) 	return table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8479)  * This function checks whether pageblock includes unmovable pages or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8480)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8481)  * PageLRU check without isolation or lru_lock could race so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8482)  * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8483)  * check without lock_page also may miss some movable non-lru pages at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8484)  * race condition. So you can't expect this function should be exact.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8485)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8486)  * Returns a page without holding a reference. If the caller wants to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8487)  * dereference that page (e.g., dumping), it has to make sure that it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8488)  * cannot get removed (e.g., via memory unplug) concurrently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8489)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8490)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8491) struct page *has_unmovable_pages(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8492) 				 int migratetype, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8494) 	unsigned long iter = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8495) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8496) 	unsigned long offset = pfn % pageblock_nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8498) 	if (is_migrate_cma_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8499) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8500) 		 * CMA allocations (alloc_contig_range) really need to mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8501) 		 * isolate CMA pageblocks even when they are not movable in fact
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8502) 		 * so consider them movable here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8503) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8504) 		if (is_migrate_cma(migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8505) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8507) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8508) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8510) 	for (; iter < pageblock_nr_pages - offset; iter++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8511) 		if (!pfn_valid_within(pfn + iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8512) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8514) 		page = pfn_to_page(pfn + iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8516) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8517) 		 * Both, bootmem allocations and memory holes are marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8518) 		 * PG_reserved and are unmovable. We can even have unmovable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8519) 		 * allocations inside ZONE_MOVABLE, for example when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8520) 		 * specifying "movablecore".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8521) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8522) 		if (PageReserved(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8523) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8525) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8526) 		 * If the zone is movable and we have ruled out all reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8527) 		 * pages then it should be reasonably safe to assume the rest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8528) 		 * is movable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8529) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8530) 		if (zone_idx(zone) == ZONE_MOVABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8531) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8533) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8534) 		 * Hugepages are not in LRU lists, but they're movable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8535) 		 * THPs are on the LRU, but need to be counted as #small pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8536) 		 * We need not scan over tail pages because we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8537) 		 * handle each tail page individually in migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8538) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8539) 		if (PageHuge(page) || PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8540) 			struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8541) 			unsigned int skip_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8543) 			if (PageHuge(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8544) 				if (!hugepage_migration_supported(page_hstate(head)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8545) 					return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8546) 			} else if (!PageLRU(head) && !__PageMovable(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8547) 				return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8548) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8550) 			skip_pages = compound_nr(head) - (page - head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8551) 			iter += skip_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8552) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8553) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8555) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8556) 		 * We can't use page_count without pin a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8557) 		 * because another CPU can free compound page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8558) 		 * This check already skips compound tails of THP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8559) 		 * because their page->_refcount is zero at all time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8560) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8561) 		if (!page_ref_count(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8562) 			if (PageBuddy(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8563) 				iter += (1 << buddy_order(page)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8564) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8565) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8567) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8568) 		 * The HWPoisoned page may be not in buddy system, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8569) 		 * page_count() is not 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8570) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8571) 		if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8572) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8574) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8575) 		 * We treat all PageOffline() pages as movable when offlining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8576) 		 * to give drivers a chance to decrement their reference count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8577) 		 * in MEM_GOING_OFFLINE in order to indicate that these pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8578) 		 * can be offlined as there are no direct references anymore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8579) 		 * For actually unmovable PageOffline() where the driver does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8580) 		 * not support this, we will fail later when trying to actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8581) 		 * move these pages that still have a reference count > 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8582) 		 * (false negatives in this function only)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8583) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8584) 		if ((flags & MEMORY_OFFLINE) && PageOffline(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8585) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8587) 		if (__PageMovable(page) || PageLRU(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8588) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8590) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8591) 		 * If there are RECLAIMABLE pages, we need to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8592) 		 * it.  But now, memory offline itself doesn't call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8593) 		 * shrink_node_slabs() and it still to be fixed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8594) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8595) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8596) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8597) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8600) #ifdef CONFIG_CONTIG_ALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8601) static unsigned long pfn_max_align_down(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8603) 	return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8604) 			     pageblock_nr_pages) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8607) unsigned long pfn_max_align_up(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8609) 	return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8610) 				pageblock_nr_pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8613) #if defined(CONFIG_DYNAMIC_DEBUG) || \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8614) 	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8615) /* Usage: See admin-guide/dynamic-debug-howto.rst */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8616) static void alloc_contig_dump_pages(struct list_head *page_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8618) 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8620) 	if (DYNAMIC_DEBUG_BRANCH(descriptor)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8621) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8622) 		unsigned long nr_skip = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8623) 		unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8625) 		dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8626) 		list_for_each_entry(page, page_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8627) 			nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8628) 			/* The page will be freed by putback_movable_pages soon */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8629) 			if (page_count(page) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8630) 				nr_skip++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8631) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8632) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8633) 			dump_page(page, "migration failure");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8634) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8635) 		pr_warn("total dump_pages %lu skipping %lu\n", nr_pages, nr_skip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8636) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8638) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8639) static inline void alloc_contig_dump_pages(struct list_head *page_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8642) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8644) /* [start, end) must belong to a single zone. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8645) static int __alloc_contig_migrate_range(struct compact_control *cc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8646) 					unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8647) 					struct acr_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8649) 	/* This function is based on compact_zone() from compaction.c. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8650) 	unsigned int nr_reclaimed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8651) 	unsigned long pfn = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8652) 	unsigned int tries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8653) 	unsigned int max_tries = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8654) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8655) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8656) 	struct migration_target_control mtc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8657) 		.nid = zone_to_nid(cc->zone),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8658) 		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8659) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8661) 	if (cc->alloc_contig && cc->mode == MIGRATE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8662) 		max_tries = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8664) 	lru_cache_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8666) 	while (pfn < end || !list_empty(&cc->migratepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8667) 		if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8668) 			ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8669) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8670) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8672) 		if (list_empty(&cc->migratepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8673) 			cc->nr_migratepages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8674) 			pfn = isolate_migratepages_range(cc, pfn, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8675) 			if (!pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8676) 				ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8677) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8678) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8679) 			tries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8680) 		} else if (++tries == max_tries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8681) 			ret = ret < 0 ? ret : -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8682) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8683) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8685) 		nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8686) 							&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8687) 		info->nr_reclaimed += nr_reclaimed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8688) 		cc->nr_migratepages -= nr_reclaimed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8690) 		list_for_each_entry(page, &cc->migratepages, lru)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8691) 			info->nr_mapped += page_mapcount(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8693) 		ret = migrate_pages(&cc->migratepages, alloc_migration_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8694) 				NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8695) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8696) 			info->nr_migrated += cc->nr_migratepages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8697) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8699) 	lru_cache_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8700) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8701) 		if (ret == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8702) 			alloc_contig_dump_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8703) 			page_pinner_mark_migration_failed_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8704) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8706) 		if (!list_empty(&cc->migratepages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8707) 			page = list_first_entry(&cc->migratepages, struct page , lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8708) 			info->failed_pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8709) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8711) 		putback_movable_pages(&cc->migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8712) 		info->err |= ACR_ERR_MIGRATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8713) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8714) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8715) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8718) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8719)  * alloc_contig_range() -- tries to allocate given range of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8720)  * @start:	start PFN to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8721)  * @end:	one-past-the-last PFN to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8722)  * @migratetype:	migratetype of the underlaying pageblocks (either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8723)  *			#MIGRATE_MOVABLE or #MIGRATE_CMA).  All pageblocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8724)  *			in range must have the same migratetype and it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8725)  *			be either of the two.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8726)  * @gfp_mask:	GFP mask to use during compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8727)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8728)  * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8729)  * aligned.  The PFN range must belong to a single zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8730)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8731)  * The first thing this routine does is attempt to MIGRATE_ISOLATE all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8732)  * pageblocks in the range.  Once isolated, the pageblocks should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8733)  * be modified by others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8734)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8735)  * Return: zero on success or negative error code.  On success all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8736)  * pages which PFN is in [start, end) are allocated for the caller and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8737)  * need to be freed with free_contig_range().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8738)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8739) int alloc_contig_range(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8740) 		       unsigned migratetype, gfp_t gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8741) 		       struct acr_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8742) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8743) 	unsigned long outer_start, outer_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8744) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8745) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8746) 	bool skip_drain_all_pages = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8748) 	struct compact_control cc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8749) 		.nr_migratepages = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8750) 		.order = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8751) 		.zone = page_zone(pfn_to_page(start)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8752) 		.mode = gfp_mask & __GFP_NORETRY ? MIGRATE_ASYNC : MIGRATE_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8753) 		.ignore_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8754) 		.no_set_skip_hint = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8755) 		.gfp_mask = current_gfp_context(gfp_mask),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8756) 		.alloc_contig = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8757) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8758) 	INIT_LIST_HEAD(&cc.migratepages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8760) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8761) 	 * What we do here is we mark all pageblocks in range as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8762) 	 * MIGRATE_ISOLATE.  Because pageblock and max order pages may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8763) 	 * have different sizes, and due to the way page allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8764) 	 * work, we align the range to biggest of the two pages so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8765) 	 * that page allocator won't try to merge buddies from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8766) 	 * different pageblocks and change MIGRATE_ISOLATE to some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8767) 	 * other migration type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8768) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8769) 	 * Once the pageblocks are marked as MIGRATE_ISOLATE, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8770) 	 * migrate the pages from an unaligned range (ie. pages that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8771) 	 * we are interested in).  This will put all the pages in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8772) 	 * range back to page allocator as MIGRATE_ISOLATE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8773) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8774) 	 * When this is done, we take the pages in range from page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8775) 	 * allocator removing them from the buddy system.  This way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8776) 	 * page allocator will never consider using them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8777) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8778) 	 * This lets us mark the pageblocks back as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8779) 	 * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8780) 	 * aligned range but not in the unaligned, original range are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8781) 	 * put back to page allocator so that buddy can use them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8782) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8784) 	ret = start_isolate_page_range(pfn_max_align_down(start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8785) 				       pfn_max_align_up(end), migratetype, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8786) 				       &info->failed_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8787) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8788) 		info->err |= ACR_ERR_ISOLATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8789) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8790) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8792) 	trace_android_vh_cma_drain_all_pages_bypass(migratetype,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8793) 						&skip_drain_all_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8794) 	if (!skip_drain_all_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8795) 		drain_all_pages(cc.zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8797) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8798) 	 * In case of -EBUSY, we'd like to know which page causes problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8799) 	 * So, just fall through. test_pages_isolated() has a tracepoint
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8800) 	 * which will report the busy page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8801) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8802) 	 * It is possible that busy pages could become available before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8803) 	 * the call to test_pages_isolated, and the range will actually be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8804) 	 * allocated.  So, if we fall through be sure to clear ret so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8805) 	 * -EBUSY is not accidentally used or returned to caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8806) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8807) 	ret = __alloc_contig_migrate_range(&cc, start, end, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8808) 	if (ret && (ret != -EBUSY || (gfp_mask & __GFP_NORETRY)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8809) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8810) 	ret =0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8812) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8813) 	 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8814) 	 * aligned blocks that are marked as MIGRATE_ISOLATE.  What's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8815) 	 * more, all pages in [start, end) are free in page allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8816) 	 * What we are going to do is to allocate all pages from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8817) 	 * [start, end) (that is remove them from page allocator).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8818) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8819) 	 * The only problem is that pages at the beginning and at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8820) 	 * end of interesting range may be not aligned with pages that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8821) 	 * page allocator holds, ie. they can be part of higher order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8822) 	 * pages.  Because of this, we reserve the bigger range and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8823) 	 * once this is done free the pages we are not interested in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8824) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8825) 	 * We don't have to hold zone->lock here because the pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8826) 	 * isolated thus they won't get removed from buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8827) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8829) 	order = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8830) 	outer_start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8831) 	while (!PageBuddy(pfn_to_page(outer_start))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8832) 		if (++order >= MAX_ORDER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8833) 			outer_start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8834) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8835) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8836) 		outer_start &= ~0UL << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8837) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8839) 	if (outer_start != start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8840) 		order = buddy_order(pfn_to_page(outer_start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8842) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8843) 		 * outer_start page could be small order buddy page and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8844) 		 * it doesn't include start page. Adjust outer_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8845) 		 * in this case to report failed page properly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8846) 		 * on tracepoint in test_pages_isolated()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8847) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8848) 		if (outer_start + (1UL << order) <= start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8849) 			outer_start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8850) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8852) 	/* Make sure the range is really isolated. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8853) 	if (test_pages_isolated(outer_start, end, 0, &info->failed_pfn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8854) 		pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8855) 			__func__, outer_start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8856) 		ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8857) 		info->err |= ACR_ERR_TEST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8858) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8859) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8861) 	/* Grab isolated pages from freelists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8862) 	outer_end = isolate_freepages_range(&cc, outer_start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8863) 	if (!outer_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8864) 		ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8865) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8866) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8868) 	/* Free head and tail (if any) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8869) 	if (start != outer_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8870) 		free_contig_range(outer_start, start - outer_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8871) 	if (end != outer_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8872) 		free_contig_range(end, outer_end - end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8874) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8875) 	undo_isolate_page_range(pfn_max_align_down(start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8876) 				pfn_max_align_up(end), migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8877) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8879) EXPORT_SYMBOL(alloc_contig_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8881) static int __alloc_contig_pages(unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8882) 				unsigned long nr_pages, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8884) 	struct acr_info dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8885) 	unsigned long end_pfn = start_pfn + nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8887) 	return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8888) 				  gfp_mask, &dummy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8891) static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8892) 				   unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8894) 	unsigned long i, end_pfn = start_pfn + nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8895) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8897) 	for (i = start_pfn; i < end_pfn; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8898) 		page = pfn_to_online_page(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8899) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8900) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8902) 		if (page_zone(page) != z)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8903) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8905) 		if (PageReserved(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8906) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8908) 		if (page_count(page) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8909) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8911) 		if (PageHuge(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8912) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8913) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8914) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8917) static bool zone_spans_last_pfn(const struct zone *zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8918) 				unsigned long start_pfn, unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8919) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8920) 	unsigned long last_pfn = start_pfn + nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8922) 	return zone_spans_pfn(zone, last_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8925) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8926)  * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8927)  * @nr_pages:	Number of contiguous pages to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8928)  * @gfp_mask:	GFP mask to limit search and used during compaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8929)  * @nid:	Target node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8930)  * @nodemask:	Mask for other possible nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8931)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8932)  * This routine is a wrapper around alloc_contig_range(). It scans over zones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8933)  * on an applicable zonelist to find a contiguous pfn range which can then be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8934)  * tried for allocation with alloc_contig_range(). This routine is intended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8935)  * for allocation requests which can not be fulfilled with the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8936)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8937)  * The allocated memory is always aligned to a page boundary. If nr_pages is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8938)  * power of two then the alignment is guaranteed to be to the given nr_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8939)  * (e.g. 1GB request would be aligned to 1GB).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8940)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8941)  * Allocated pages can be freed with free_contig_range() or by manually calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8942)  * __free_page() on each allocated page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8943)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8944)  * Return: pointer to contiguous pages on success, or NULL if not successful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8945)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8946) struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8947) 				int nid, nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8949) 	unsigned long ret, pfn, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8950) 	struct zonelist *zonelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8951) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8952) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8954) 	zonelist = node_zonelist(nid, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8955) 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8956) 					gfp_zone(gfp_mask), nodemask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8957) 		spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8959) 		pfn = ALIGN(zone->zone_start_pfn, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8960) 		while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8961) 			if (pfn_range_valid_contig(zone, pfn, nr_pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8962) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8963) 				 * We release the zone lock here because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8964) 				 * alloc_contig_range() will also lock the zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8965) 				 * at some point. If there's an allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8966) 				 * spinning on this lock, it may win the race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8967) 				 * and cause alloc_contig_range() to fail...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8968) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8969) 				spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8970) 				ret = __alloc_contig_pages(pfn, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8971) 							gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8972) 				if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8973) 					return pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8974) 				spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8975) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8976) 			pfn += nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8977) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8978) 		spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8979) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8980) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8982) #endif /* CONFIG_CONTIG_ALLOC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8984) void free_contig_range(unsigned long pfn, unsigned int nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8985) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8986) 	unsigned int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8988) 	for (; nr_pages--; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8989) 		struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8991) 		count += page_count(page) != 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8992) 		__free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8993) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8994) 	WARN(count != 0, "%d pages are still in use!\n", count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8996) EXPORT_SYMBOL(free_contig_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8998) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8999)  * The zone indicated has a new number of managed_pages; batch sizes and percpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9000)  * page high values need to be recalulated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9001)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9002) void __meminit zone_pcp_update(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9004) 	mutex_lock(&pcp_batch_high_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9005) 	__zone_pcp_update(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9006) 	mutex_unlock(&pcp_batch_high_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9009) void zone_pcp_reset(struct zone *zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9011) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9012) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9013) 	struct per_cpu_pageset *pset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9015) 	/* avoid races with drain_pages()  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9016) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9017) 	if (zone->pageset != &boot_pageset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9018) 		for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9019) 			pset = per_cpu_ptr(zone->pageset, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9020) 			drain_zonestat(zone, pset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9021) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9022) 		free_percpu(zone->pageset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9023) 		zone->pageset = &boot_pageset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9024) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9025) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9028) #ifdef CONFIG_MEMORY_HOTREMOVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9030)  * All pages in the range must be in a single zone, must not contain holes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9031)  * must span full sections, and must be isolated before calling this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9032)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9033) void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9034) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9035) 	unsigned long pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9036) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9037) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9038) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9039) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9041) 	offline_mem_sections(pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9042) 	zone = page_zone(pfn_to_page(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9043) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9044) 	while (pfn < end_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9045) 		page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9046) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9047) 		 * The HWPoisoned page may be not in buddy system, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9048) 		 * page_count() is not 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9049) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9050) 		if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9051) 			pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9052) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9053) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9054) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9055) 		 * At this point all remaining PageOffline() pages have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9056) 		 * reference count of 0 and can simply be skipped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9057) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9058) 		if (PageOffline(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9059) 			BUG_ON(page_count(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9060) 			BUG_ON(PageBuddy(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9061) 			pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9062) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9063) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9065) 		BUG_ON(page_count(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9066) 		BUG_ON(!PageBuddy(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9067) 		order = buddy_order(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9068) 		del_page_from_free_list(page, zone, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9069) 		pfn += (1 << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9070) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9071) 	spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9073) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9075) bool is_free_buddy_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9077) 	struct zone *zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9078) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9079) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9080) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9082) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9083) 	for (order = 0; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9084) 		struct page *page_head = page - (pfn & ((1 << order) - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9086) 		if (PageBuddy(page_head) && buddy_order(page_head) >= order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9087) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9088) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9089) 	spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9091) 	return order < MAX_ORDER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9094) #ifdef CONFIG_MEMORY_FAILURE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9095) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9096)  * Break down a higher-order page in sub-pages, and keep our target out of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9097)  * buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9098)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9099) static void break_down_buddy_pages(struct zone *zone, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9100) 				   struct page *target, int low, int high,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9101) 				   int migratetype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9103) 	unsigned long size = 1 << high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9104) 	struct page *current_buddy, *next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9106) 	while (high > low) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9107) 		high--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9108) 		size >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9110) 		if (target >= &page[size]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9111) 			next_page = page + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9112) 			current_buddy = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9113) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9114) 			next_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9115) 			current_buddy = page + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9116) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9118) 		if (set_page_guard(zone, current_buddy, high, migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9119) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9121) 		if (current_buddy != target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9122) 			add_to_free_list(current_buddy, zone, high, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9123) 			set_buddy_order(current_buddy, high);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9124) 			page = next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9125) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9126) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9130)  * Take a page that will be marked as poisoned off the buddy allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9131)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9132) bool take_page_off_buddy(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9134) 	struct zone *zone = page_zone(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9135) 	unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9136) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9137) 	unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9138) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9140) 	spin_lock_irqsave(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9141) 	for (order = 0; order < MAX_ORDER; order++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9142) 		struct page *page_head = page - (pfn & ((1 << order) - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9143) 		int page_order = buddy_order(page_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9145) 		if (PageBuddy(page_head) && page_order >= order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9146) 			unsigned long pfn_head = page_to_pfn(page_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9147) 			int migratetype = get_pfnblock_migratetype(page_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9148) 								   pfn_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9150) 			del_page_from_free_list(page_head, zone, page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9151) 			break_down_buddy_pages(zone, page_head, page, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9152) 						page_order, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9153) 			if (!is_migrate_isolate(migratetype))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9154) 				__mod_zone_freepage_state(zone, -1, migratetype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9155) 			ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9156) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9157) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9158) 		if (page_count(page_head) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9159) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9160) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9161) 	spin_unlock_irqrestore(&zone->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9162) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9164) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9166) #ifdef CONFIG_ZONE_DMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9167) bool has_managed_dma(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9169) 	struct pglist_data *pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9171) 	for_each_online_pgdat(pgdat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9172) 		struct zone *zone = &pgdat->node_zones[ZONE_DMA];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9174) 		if (managed_zone(zone))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9175) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9176) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9177) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9179) #endif /* CONFIG_ZONE_DMA */