Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *  linux/arch/x86_64/mm/init.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *  Copyright (C) 1995  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *  Copyright (C) 2000  Pavel Machek <pavel@ucw.cz>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/initrd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/pfn.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/poison.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/dma-mapping.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/memory_hotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/kcore.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <asm/bios_ebda.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <asm/pgalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <asm/dma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <asm/e820/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <asm/apic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <asm/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <asm/sections.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <asm/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #include <asm/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #include <asm/set_memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <asm/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <asm/uv/uv.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <asm/ftrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include "mm_internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include "ident_map.c"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #define DEFINE_POPULATE(fname, type1, type2, init)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) static inline void fname##_init(struct mm_struct *mm,		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 		type1##_t *arg1, type2##_t *arg2, bool init)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) {								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	if (init)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 		fname##_safe(mm, arg1, arg2);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	else							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 		fname(mm, arg1, arg2);				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) DEFINE_POPULATE(p4d_populate, p4d, pud, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) DEFINE_POPULATE(pud_populate, pud, pmd, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #define DEFINE_ENTRY(type1, type2, init)			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) static inline void set_##type1##_init(type1##_t *arg1,		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 			type2##_t arg2, bool init)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) {								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	if (init)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 		set_##type1##_safe(arg1, arg2);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	else							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 		set_##type1(arg1, arg2);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) DEFINE_ENTRY(p4d, p4d, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) DEFINE_ENTRY(pud, pud, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) DEFINE_ENTRY(pmd, pmd, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) DEFINE_ENTRY(pte, pte, init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95)  * physical space so we can cache the place of the first one and move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96)  * around without checking the pgd every time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) /* Bits supported by the hardware: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) pteval_t __supported_pte_mask __read_mostly = ~0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) /* Bits allowed in normal kernel mappings: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) pteval_t __default_kernel_pte_mask __read_mostly = ~0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) EXPORT_SYMBOL_GPL(__supported_pte_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) EXPORT_SYMBOL(__default_kernel_pte_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) int force_personality32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  * noexec32=on|off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  * Control non executable heap for 32bit processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * To control the stack too use noexec=off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  * on	PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  * off	PROT_READ implies PROT_EXEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) static int __init nonx32_setup(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	if (!strcmp(str, "on"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 		force_personality32 &= ~READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	else if (!strcmp(str, "off"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 		force_personality32 |= READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) __setup("noexec32=", nonx32_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) static void sync_global_pgds_l5(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 		const pgd_t *pgd_ref = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 		/* Check for overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 		if (addr < start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 		if (pgd_none(*pgd_ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		spin_lock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 		list_for_each_entry(page, &pgd_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 			pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 			spinlock_t *pgt_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 			/* the pgt_lock only for Xen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 			spin_lock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 			if (pgd_none(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 				set_pgd(pgd, *pgd_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 			spin_unlock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		spin_unlock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) static void sync_global_pgds_l4(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 		pgd_t *pgd_ref = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		const p4d_t *p4d_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 		 * With folded p4d, pgd_none() is always false, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 		 * handle synchonization on p4d level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 		MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		p4d_ref = p4d_offset(pgd_ref, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		if (p4d_none(*p4d_ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		spin_lock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 		list_for_each_entry(page, &pgd_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 			pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 			p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 			spinlock_t *pgt_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 			p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 			/* the pgt_lock only for Xen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 			spin_lock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 			if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 				BUG_ON(p4d_page_vaddr(*p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 				       != p4d_page_vaddr(*p4d_ref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 			if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 				set_p4d(p4d, *p4d_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 			spin_unlock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 		spin_unlock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  * When memory was added make sure all the processes MM have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  * suitable PGD entries in the local PGD level page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) static void sync_global_pgds(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	if (pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		sync_global_pgds_l5(start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		sync_global_pgds_l4(start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221)  * NOTE: This function is marked __ref because it calls __init function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) static __ref void *spp_getpage(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	if (after_bootmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		panic("set_pte_phys: cannot allocate page data %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 			after_bootmem ? "after bootmem" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	pr_debug("spp_getpage %p\n", ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	return ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	if (pgd_none(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		p4d_t *p4d = (p4d_t *)spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		pgd_populate(&init_mm, pgd, p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 		if (p4d != p4d_offset(pgd, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 			printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 			       p4d, p4d_offset(pgd, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	return p4d_offset(pgd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	if (p4d_none(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		pud_t *pud = (pud_t *)spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		p4d_populate(&init_mm, p4d, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		if (pud != pud_offset(p4d, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			       pud, pud_offset(p4d, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	return pud_offset(p4d, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	if (pud_none(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		pmd_t *pmd = (pmd_t *) spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		pud_populate(&init_mm, pud, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		if (pmd != pmd_offset(pud, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 			printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 			       pmd, pmd_offset(pud, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	return pmd_offset(pud, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	if (pmd_none(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		pte_t *pte = (pte_t *) spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 		pmd_populate_kernel(&init_mm, pmd, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 		if (pte != pte_offset_kernel(pmd, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 			printk(KERN_ERR "PAGETABLE BUG #03!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	return pte_offset_kernel(pmd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	pmd_t *pmd = fill_pmd(pud, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	pte_t *pte = fill_pte(pmd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	set_pte(pte, new_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	 * It's enough to flush this one mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 * (PGE mappings get flushed as well)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	flush_tlb_one_kernel(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	p4d_t *p4d = p4d_page + p4d_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	pud_t *pud = fill_pud(p4d, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	__set_pte_vaddr(pud, vaddr, new_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	pud_t *pud = pud_page + pud_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	__set_pte_vaddr(pud, vaddr, new_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	p4d_t *p4d_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	pgd = pgd_offset_k(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	if (pgd_none(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		printk(KERN_ERR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 			"PGD FIXMAP MISSING, it should be setup in head.S!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	p4d_page = p4d_offset(pgd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) pmd_t * __init populate_extra_pmd(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	pgd = pgd_offset_k(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	p4d = fill_p4d(pgd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	pud = fill_pud(p4d, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	return fill_pmd(pud, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) pte_t * __init populate_extra_pte(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	pmd = populate_extra_pmd(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	return fill_pte(pmd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358)  * Create large page table mappings for a range of physical addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 					enum page_cache_mode cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	pgprot_t prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		protval_4k_2_large(cachemode2protval(cache));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 		pgd = pgd_offset_k((unsigned long)__va(phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		if (pgd_none(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 			p4d = (p4d_t *) spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 						_PAGE_USER));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		p4d = p4d_offset(pgd, (unsigned long)__va(phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 		if (p4d_none(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 			pud = (pud_t *) spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 			set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 						_PAGE_USER));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		pud = pud_offset(p4d, (unsigned long)__va(phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 		if (pud_none(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 			pmd = (pmd_t *) spp_getpage();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 						_PAGE_USER));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		pmd = pmd_offset(pud, phys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		BUG_ON(!pmd_none(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 		set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	__init_extra_mapping(phys, size, _PAGE_CACHE_MODE_WB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	__init_extra_mapping(phys, size, _PAGE_CACHE_MODE_UC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408)  * The head.S code sets up the kernel high mapping:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410)  *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412)  * phys_base holds the negative offset to the kernel, which is added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413)  * to the compile time generated pmds. This results in invalid pmds up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414)  * to the point where we hit the physaddr 0 mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416)  * We limit the mappings to the region from _text to _brk_end.  _brk_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417)  * is rounded up to the 2MB boundary. This catches the invalid pmds as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418)  * well, as they are located before _text:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) void __init cleanup_highmap(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	unsigned long vaddr = __START_KERNEL_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	pmd_t *pmd = level2_kernel_pgt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	 * Native path, max_pfn_mapped is not set yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	 * Xen has valid max_pfn_mapped set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	 *	arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	if (max_pfn_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 		if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		if (vaddr < (unsigned long) _text || vaddr > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 			set_pmd(pmd, __pmd(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444)  * Create PTE level page table mapping for physical addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445)  * It returns the last physical address mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) static unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	      pgprot_t prot, bool init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	unsigned long pages = 0, paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	unsigned long paddr_last = paddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	pte = pte_page + pte_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	i = pte_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		if (paddr >= paddr_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 			if (!after_bootmem &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 			    !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 					     E820_TYPE_RAM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 			    !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 					     E820_TYPE_RESERVED_KERN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 				set_pte_init(pte, __pte(0), init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		 * We will re-use the existing mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		 * Xen for example has some special requirements, like mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 		 * pagetable pages as RO. So assume someone who pre-setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		 * these mappings are more intelligent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		if (!pte_none(*pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 			if (!after_bootmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 				pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		if (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	update_page_count(PG_LEVEL_4K, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	return paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497)  * Create PMD level page table mapping for physical addresses. The virtual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498)  * and physical address have to be aligned at this level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499)  * It returns the last physical address mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) static unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	      unsigned long page_size_mask, pgprot_t prot, bool init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	unsigned long pages = 0, paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	unsigned long paddr_last = paddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	int i = pmd_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		pmd_t *pmd = pmd_page + pmd_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		pgprot_t new_prot = prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		if (paddr >= paddr_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 			if (!after_bootmem &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 			    !e820__mapped_any(paddr & PMD_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 					     E820_TYPE_RAM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 			    !e820__mapped_any(paddr & PMD_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 					     E820_TYPE_RESERVED_KERN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 				set_pmd_init(pmd, __pmd(0), init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		if (!pmd_none(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 			if (!pmd_large(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 				spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 				pte = (pte_t *)pmd_page_vaddr(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 				paddr_last = phys_pte_init(pte, paddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 							   paddr_end, prot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 							   init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 				spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 			 * If we are ok with PG_LEVEL_2M mapping, then we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 			 * use the existing mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 			 * Otherwise, we will split the large page mapping but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 			 * use the same existing protection bits except for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 			 * large page, so that we don't violate Intel's TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 			 * Application note (317080) which says, while changing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 			 * the page sizes, new and old translations should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			 * not differ with respect to page frame and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 			 * attributes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 			if (page_size_mask & (1 << PG_LEVEL_2M)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 				if (!after_bootmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 					pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 				paddr_last = paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 		if (page_size_mask & (1<<PG_LEVEL_2M)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 			pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 			spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			set_pte_init((pte_t *)pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 				     pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 					     __pgprot(pgprot_val(prot) | _PAGE_PSE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 				     init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 			spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 			paddr_last = paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		pte = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	update_page_count(PG_LEVEL_2M, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	return paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581)  * Create PUD level page table mapping for physical addresses. The virtual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582)  * and physical address do not have to be aligned at this level. KASLR can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583)  * randomize virtual addresses up to this level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584)  * It returns the last physical address mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) static unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	      unsigned long page_size_mask, pgprot_t _prot, bool init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	unsigned long pages = 0, paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	unsigned long paddr_last = paddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	unsigned long vaddr = (unsigned long)__va(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	int i = pud_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 		pgprot_t prot = _prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		vaddr = (unsigned long)__va(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		pud = pud_page + pud_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		if (paddr >= paddr_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 			if (!after_bootmem &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 			    !e820__mapped_any(paddr & PUD_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 					     E820_TYPE_RAM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 			    !e820__mapped_any(paddr & PUD_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 					     E820_TYPE_RESERVED_KERN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 				set_pud_init(pud, __pud(0), init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		if (!pud_none(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 			if (!pud_large(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 				pmd = pmd_offset(pud, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 				paddr_last = phys_pmd_init(pmd, paddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 							   paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 							   page_size_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 							   prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 			 * If we are ok with PG_LEVEL_1G mapping, then we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 			 * use the existing mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 			 * Otherwise, we will split the gbpage mapping but use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 			 * the same existing protection  bits except for large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 			 * page, so that we don't violate Intel's TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 			 * Application note (317080) which says, while changing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 			 * the page sizes, new and old translations should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 			 * not differ with respect to page frame and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 			 * attributes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 			if (page_size_mask & (1 << PG_LEVEL_1G)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 				if (!after_bootmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 					pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 				paddr_last = paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		if (page_size_mask & (1<<PG_LEVEL_1G)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 			pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 			spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 			prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 			set_pte_init((pte_t *)pud,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 				     pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 					     prot),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 				     init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 			spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 			paddr_last = paddr_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		pmd = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 					   page_size_mask, prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		pud_populate_init(&init_mm, pud, pmd, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	update_page_count(PG_LEVEL_1G, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	return paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) static unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	      unsigned long page_size_mask, pgprot_t prot, bool init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	paddr_last = paddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	vaddr = (unsigned long)__va(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	vaddr_end = (unsigned long)__va(paddr_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	if (!pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 				     page_size_mask, prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		p4d_t *p4d = p4d_page + p4d_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		paddr = __pa(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		if (paddr >= paddr_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 			paddr_next = __pa(vaddr_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 			if (!after_bootmem &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 					     E820_TYPE_RAM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 					     E820_TYPE_RESERVED_KERN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 				set_p4d_init(p4d, __p4d(0), init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		if (!p4d_none(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 			pud = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 			paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 					page_size_mask, prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		pud = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 					   page_size_mask, prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 		spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 		p4d_populate_init(&init_mm, p4d, pud, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	return paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) static unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) __kernel_physical_mapping_init(unsigned long paddr_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 			       unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 			       unsigned long page_size_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			       pgprot_t prot, bool init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	bool pgd_changed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	paddr_last = paddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	vaddr = (unsigned long)__va(paddr_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	vaddr_end = (unsigned long)__va(paddr_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	vaddr_start = vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 		pgd_t *pgd = pgd_offset_k(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 		p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 		if (pgd_val(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 			p4d = (p4d_t *)pgd_page_vaddr(*pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 			paddr_last = phys_p4d_init(p4d, __pa(vaddr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 						   __pa(vaddr_end),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 						   page_size_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 						   prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		p4d = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 		paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 					   page_size_mask, prot, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		if (pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 			pgd_populate_init(&init_mm, pgd, p4d, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 			p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 					  (pud_t *) p4d, init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 		pgd_changed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	if (pgd_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		sync_global_pgds(vaddr_start, vaddr_end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	return paddr_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776)  * Create page table mapping for the physical memory for specific physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777)  * addresses. Note that it can only be used to populate non-present entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778)  * The virtual and physical addresses have to be aligned on PMD level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779)  * down. It returns the last physical address mapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) kernel_physical_mapping_init(unsigned long paddr_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 			     unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 			     unsigned long page_size_mask, pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	return __kernel_physical_mapping_init(paddr_start, paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 					      page_size_mask, prot, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791)  * This function is similar to kernel_physical_mapping_init() above with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792)  * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793)  * when updating the mapping. The caller is responsible to flush the TLBs after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794)  * the function returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) unsigned long __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) kernel_physical_mapping_change(unsigned long paddr_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 			       unsigned long paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			       unsigned long page_size_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	return __kernel_physical_mapping_init(paddr_start, paddr_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 					      page_size_mask, PAGE_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 					      false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) #ifndef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) void __init initmem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) void __init paging_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	sparse_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	 * clear the default setting with node 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	 * note: don't use nodes_clear here, that is really clearing when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	 *	 numa support is not compiled in, and later node_set_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	 *	 will not set it back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	node_clear_state(0, N_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	node_clear_state(0, N_NORMAL_MEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	zone_sizes_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830)  * Memory hotplug specific functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) #ifdef CONFIG_MEMORY_HOTPLUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834)  * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835)  * updating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) static void update_end_of_memory_vars(u64 start, u64 size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	unsigned long end_pfn = PFN_UP(start + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	if (end_pfn > max_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		max_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 		max_low_pfn = end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	      struct mhp_params *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	ret = __add_pages(nid, start_pfn, nr_pages, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	WARN_ON_ONCE(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	/* update max_pfn, max_low_pfn and high_memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 				  nr_pages << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) int arch_add_memory(int nid, u64 start, u64 size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 		    struct mhp_params *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	unsigned long start_pfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	unsigned long nr_pages = size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	init_memory_mapping(start, start + size, params->pgprot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	return add_pages(nid, start_pfn, nr_pages, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) #define PAGE_INUSE 0xFD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) static void __meminit free_pagetable(struct page *page, int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	unsigned long magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	unsigned int nr_pages = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	/* bootmem page has reserved flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	if (PageReserved(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		__ClearPageReserved(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		magic = (unsigned long)page->freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 		if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 			while (nr_pages--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 				put_page_bootmem(page++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 			while (nr_pages--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 				free_reserved_page(page++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		free_pages((unsigned long)page_address(page), order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) static void __meminit free_hugepage_table(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	if (altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		free_pagetable(page, get_order(PMD_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	for (i = 0; i < PTRS_PER_PTE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		pte = pte_start + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		if (!pte_none(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	/* free a pte talbe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	free_pagetable(pmd_page(*pmd), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	pmd_clear(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	for (i = 0; i < PTRS_PER_PMD; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		pmd = pmd_start + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		if (!pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	/* free a pmd talbe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	free_pagetable(pud_page(*pud), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	pud_clear(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	for (i = 0; i < PTRS_PER_PUD; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		pud = pud_start + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		if (!pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	/* free a pud talbe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	free_pagetable(p4d_page(*p4d), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	p4d_clear(p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		 bool direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	unsigned long next, pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	void *page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	phys_addr_t phys_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	pte = pte_start + pte_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	for (; addr < end; addr = next, pte++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		next = (addr + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		if (next > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			next = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		if (!pte_present(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		 * We mapped [0,1G) memory as identity mapping when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 		 * initializing, in arch/x86/kernel/head_64.S. These
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		 * pagetables cannot be removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		if (phys_addr < (phys_addr_t)0x40000000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 			 * Do not free direct mapping pages since they were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 			 * freed when offlining, or simplely not in use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 			if (!direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 				free_pagetable(pte_page(*pte), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			pte_clear(&init_mm, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 			spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 			/* For non-direct mapping, pages means nothing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 			pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			 * If we are here, we are freeing vmemmap pages since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 			 * direct mapped memory ranges to be freed are aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 			 * If we are not removing the whole page, it means
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 			 * other page structs in this page are being used and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 			 * we canot remove them. So fill the unused page_structs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 			 * with 0xFD, and remove the page when it is wholly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			 * filled with 0xFD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			memset((void *)addr, PAGE_INUSE, next - addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 			page_addr = page_address(pte_page(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 				free_pagetable(pte_page(*pte), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 				spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 				pte_clear(&init_mm, addr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 				spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	/* Call free_pte_table() in remove_pmd_table(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	if (direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		update_page_count(PG_LEVEL_4K, -pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		 bool direct, struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	unsigned long next, pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	pte_t *pte_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	void *page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	pmd = pmd_start + pmd_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	for (; addr < end; addr = next, pmd++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		if (pmd_large(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 			if (IS_ALIGNED(addr, PMD_SIZE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			    IS_ALIGNED(next, PMD_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 				if (!direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 					free_hugepage_table(pmd_page(*pmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 							    altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 				spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 				pmd_clear(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 				spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 				pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 				/* If here, we are freeing vmemmap pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 				memset((void *)addr, PAGE_INUSE, next - addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 				page_addr = page_address(pmd_page(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 				if (!memchr_inv(page_addr, PAGE_INUSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 						PMD_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 					free_hugepage_table(pmd_page(*pmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 							    altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 					spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 					pmd_clear(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 					spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		remove_pte_table(pte_base, addr, next, direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		free_pte_table(pte_base, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	/* Call free_pmd_table() in remove_pud_table(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	if (direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		update_page_count(PG_LEVEL_2M, -pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		 struct vmem_altmap *altmap, bool direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	unsigned long next, pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	pmd_t *pmd_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	void *page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	pud = pud_start + pud_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	for (; addr < end; addr = next, pud++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 		next = pud_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 		if (!pud_present(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		if (pud_large(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 			if (IS_ALIGNED(addr, PUD_SIZE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 			    IS_ALIGNED(next, PUD_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 				if (!direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 					free_pagetable(pud_page(*pud),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 						       get_order(PUD_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 				spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 				pud_clear(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 				spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 				pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 				/* If here, we are freeing vmemmap pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 				memset((void *)addr, PAGE_INUSE, next - addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 				page_addr = page_address(pud_page(*pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 				if (!memchr_inv(page_addr, PAGE_INUSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 						PUD_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 					free_pagetable(pud_page(*pud),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 						       get_order(PUD_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 					spin_lock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 					pud_clear(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 					spin_unlock(&init_mm.page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		pmd_base = pmd_offset(pud, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		remove_pmd_table(pmd_base, addr, next, direct, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 		free_pmd_table(pmd_base, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	if (direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		update_page_count(PG_LEVEL_1G, -pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 		 struct vmem_altmap *altmap, bool direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	unsigned long next, pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	pud_t *pud_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	p4d = p4d_start + p4d_index(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	for (; addr < end; addr = next, p4d++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 		next = p4d_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		if (!p4d_present(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		BUILD_BUG_ON(p4d_large(*p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		pud_base = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		remove_pud_table(pud_base, addr, next, altmap, direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		 * For 4-level page tables we do not want to free PUDs, but in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		 * 5-level case we should free them. This code will have to change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 		 * to adapt for boot-time switching between 4 and 5 level page tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		if (pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 			free_pud_table(pud_base, p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	if (direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		update_page_count(PG_LEVEL_512G, -pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) /* start and end are both virtual address. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) remove_pagetable(unsigned long start, unsigned long end, bool direct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 		struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	for (addr = start; addr < end; addr = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		next = pgd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		pgd = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		p4d = p4d_offset(pgd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		remove_p4d_table(p4d, addr, next, altmap, direct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) void __ref vmemmap_free(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	remove_pagetable(start, end, false, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) static void __meminit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) kernel_physical_mapping_remove(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	start = (unsigned long)__va(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	end = (unsigned long)__va(end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	remove_pagetable(start, end, true, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) void __ref arch_remove_memory(int nid, u64 start, u64 size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 			      struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	unsigned long start_pfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	unsigned long nr_pages = size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	__remove_pages(start_pfn, nr_pages, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	kernel_physical_mapping_remove(start, start + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) #endif /* CONFIG_MEMORY_HOTPLUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) static struct kcore_list kcore_vsyscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) static void __init register_page_bootmem_info(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	for_each_online_node(i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		register_page_bootmem_info_node(NODE_DATA(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)  * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)  * Only the level which needs to be synchronized between all page-tables is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)  * allocated because the synchronization can be expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) static void __init preallocate_vmalloc_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	const char *lvl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 		pgd_t *pgd = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 		p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 		lvl = "p4d";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 		p4d = p4d_alloc(&init_mm, pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		if (!p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 			goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 		if (pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		 * The goal here is to allocate all possibly required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		 * hardware page tables pointed to by the top hardware
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		 * level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		 * On 4-level systems, the P4D layer is folded away and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		 * the above code does no preallocation.  Below, go down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		 * to the pud _software_ level to ensure the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		 * hardware level is allocated on 4-level systems too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		lvl = "pud";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		pud = pud_alloc(&init_mm, p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 			goto failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	 * The pages have to be there now or they will be missing in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	 * process page-tables later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) void __init mem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	pci_iommu_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	/* clear_bss() already clear the empty_zero_page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	/* this will put all memory onto the freelists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	memblock_free_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	after_bootmem = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	x86_init.hyper.init_after_bootmem();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	 * Must be done after boot memory is put on freelist, because here we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	 * might set fields in deferred struct pages that have not yet been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	 * initialized, and memblock_free_all() initializes all the reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	 * deferred pages for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	register_page_bootmem_info();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	/* Register memory areas for /proc/kcore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	if (get_gate_vma(&init_mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	preallocate_vmalloc_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	mem_init_print_info(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	 * More CPUs always led to greater speedups on tested systems, up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	 * all the nodes' CPUs.  Use all since the system is otherwise idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	 * now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	return max_t(int, cpumask_weight(node_cpumask), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) int kernel_set_to_readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) void mark_rodata_ro(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	unsigned long start = PFN_ALIGN(_text);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	unsigned long rodata_start = PFN_ALIGN(__start_rodata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	unsigned long end = (unsigned long)__end_rodata_hpage_align;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	unsigned long text_end = PFN_ALIGN(_etext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	unsigned long rodata_end = PFN_ALIGN(__end_rodata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	unsigned long all_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	       (end - start) >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	kernel_set_to_readonly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	 * The rodata/data/bss/brk section (but not the kernel text!)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	 * should also be not-executable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	 * We align all_end to PMD_SIZE because the existing mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	 * is a full PMD. If we would align _brk_end to PAGE_SIZE we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	 * split the PMD and the reminder between _brk_end and the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	 * of the PMD will remain mapped executable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	 * Any PMD which was setup after the one which covers _brk_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	 * has been zapped already via cleanup_highmem().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	set_ftrace_ops_ro();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) #ifdef CONFIG_CPA_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	printk(KERN_INFO "Testing CPA: again\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	free_kernel_image_pages("unused kernel image (text/rodata gap)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 				(void *)text_end, (void *)rodata_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	free_kernel_image_pages("unused kernel image (rodata/data gap)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 				(void *)rodata_end, (void *)_sdata);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	debug_checkwx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) int kern_addr_valid(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	if (above != 0 && above != -1UL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	pgd = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	if (pgd_none(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 	if (!p4d_present(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	pud = pud_offset(p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	if (!pud_present(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	if (pud_large(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 		return pfn_valid(pud_pfn(*pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	if (pmd_large(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 		return pfn_valid(pmd_pfn(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	pte = pte_offset_kernel(pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	if (pte_none(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	return pfn_valid(pte_pfn(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)  * Block size is the minimum amount of memory which can be hotplugged or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)  * hotremoved. It must be power of two and must be equal or larger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)  * MIN_MEMORY_BLOCK_SIZE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) #define MAX_BLOCK_SIZE (2UL << 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) /* Amount of ram needed to start using large blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) /* Adjustable memory block size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) static unsigned long set_memory_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) int __init set_memory_block_size_order(unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	unsigned long size = 1UL << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	set_memory_block_size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) static unsigned long probe_memory_block_size(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	unsigned long bz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	/* If memory block size has been set, then use it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	bz = set_memory_block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	if (bz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	/* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		bz = MIN_MEMORY_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	 * Use max block size to minimize overhead on bare metal, where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 	 * alignment for memory hotplug isn't a concern.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		bz = MAX_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	/* Find the largest allowed block size that aligns to memory end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		if (IS_ALIGNED(boot_mem_end, bz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	return bz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) static unsigned long memory_block_size_probed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) unsigned long memory_block_size_bytes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	if (!memory_block_size_probed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 		memory_block_size_probed = probe_memory_block_size();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	return memory_block_size_probed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) #ifdef CONFIG_SPARSEMEM_VMEMMAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) static long __meminitdata addr_start, addr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) static void __meminitdata *p_start, *p_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) static int __meminitdata node_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static int __meminit vmemmap_populate_hugepages(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 		unsigned long end, int node, struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	for (addr = start; addr < end; addr = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 		pgd = vmemmap_pgd_populate(addr, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		if (!pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		p4d = vmemmap_p4d_populate(pgd, addr, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		if (!p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 		pud = vmemmap_pud_populate(p4d, addr, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 		if (!pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 		pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 		if (pmd_none(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 			void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 			if (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 				pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 				entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 						PAGE_KERNEL_LARGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 				set_pmd(pmd, __pmd(pte_val(entry)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 				/* check to see if we have contiguous blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 				if (p_end != p || node_start != node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 					if (p_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 						pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 						       addr_start, addr_end-1, p_start, p_end-1, node_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 					addr_start = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 					node_start = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 					p_start = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 				addr_end = addr + PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 				p_end = p + PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			} else if (altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 				return -ENOMEM; /* no fallback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		} else if (pmd_large(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 			vmemmap_verify((pte_t *)pmd, node, addr, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		if (vmemmap_populate_basepages(addr, next, node, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	if (end - start < PAGES_PER_SECTION * sizeof(struct page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		err = vmemmap_populate_basepages(start, end, node, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 	else if (boot_cpu_has(X86_FEATURE_PSE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		err = vmemmap_populate_hugepages(start, end, node, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	else if (altmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		pr_err_once("%s: no cpu support for altmap allocations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 				__func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		err = vmemmap_populate_basepages(start, end, node, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 		sync_global_pgds(start, end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) void register_page_bootmem_memmap(unsigned long section_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 				  struct page *start_page, unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	unsigned long addr = (unsigned long)start_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	unsigned long end = (unsigned long)(start_page + nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	unsigned int nr_pmd_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	for (; addr < end; addr = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		pte_t *pte = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 		pgd = pgd_offset_k(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 		if (pgd_none(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 			next = (addr + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		p4d = p4d_offset(pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 		if (p4d_none(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 			next = (addr + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 		get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		pud = pud_offset(p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		if (pud_none(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 			next = (addr + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 		if (!boot_cpu_has(X86_FEATURE_PSE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 			next = (addr + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 			pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 			if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 			get_page_bootmem(section_nr, pmd_page(*pmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 					 MIX_SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 			pte = pte_offset_kernel(pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 			if (pte_none(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 			get_page_bootmem(section_nr, pte_page(*pte),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 					 SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 			next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 			pmd = pmd_offset(pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 			if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 			nr_pmd_pages = 1 << get_order(PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 			page = pmd_page(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 			while (nr_pmd_pages--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 				get_page_bootmem(section_nr, page++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 						 SECTION_INFO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) void __meminit vmemmap_populate_print_last(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	if (p_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 		pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 			addr_start, addr_end-1, p_start, p_end-1, node_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		p_start = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 		p_end = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		node_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) #endif