^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/pci.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/pfn.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/poison.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/memory_hotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/initrd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/cpumask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <asm/bios_ebda.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <asm/dma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <asm/e820/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <asm/apic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <asm/bugs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <asm/olpc_ofw.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <asm/pgalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <asm/sections.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <asm/paravirt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <asm/set_memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <asm/page_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <asm/cpu_entry_area.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <asm/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <asm/pgtable_areas.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <asm/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include "mm_internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) unsigned long highstart_pfn, highend_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) bool __read_mostly __vmalloc_start_set = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * Creates a middle page table and puts a pointer to it in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * given global directory entry. This only returns the gd entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * in non-PAE compilation mode, since the middle layer is folded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static pmd_t * __init one_md_table_init(pgd_t *pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) pmd_t *pmd_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #ifdef CONFIG_X86_PAE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pmd_table = (pmd_t *)alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) p4d = p4d_offset(pgd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) pud = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) BUG_ON(pmd_table != pmd_offset(pud, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return pmd_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) p4d = p4d_offset(pgd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) pud = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) pmd_table = pmd_offset(pud, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return pmd_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Create a page table and place a pointer to it in a middle page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * directory entry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static pte_t * __init one_page_table_init(pmd_t *pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) pte_t *page_table = (pte_t *)alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) BUG_ON(page_table != pte_offset_kernel(pmd, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return pte_offset_kernel(pmd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) pmd_t * __init populate_extra_pmd(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int pgd_idx = pgd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) int pmd_idx = pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) pte_t * __init populate_extra_pte(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) int pte_idx = pte_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) pmd = populate_extra_pmd(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return one_page_table_init(pmd) + pte_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static unsigned long __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) page_table_range_init_count(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) unsigned long count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) int pgd_idx, pmd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) unsigned long vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) vaddr = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) pgd_idx = pgd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) pmd_idx = pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) pmd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) vaddr += PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) pmd_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) unsigned long vaddr, pte_t *lastpte,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) void **adr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * Something (early fixmap) may already have put a pte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * page here, which causes the page table allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * to become nonlinear. Attempt to fix it, and if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * is still nonlinear then we have to bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (pmd_idx_kmap_begin != pmd_idx_kmap_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) pte_t *newpte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) BUG_ON(after_bootmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) newpte = *adr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) for (i = 0; i < PTRS_PER_PTE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) set_pte(newpte + i, pte[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) BUG_ON(newpte != pte_offset_kernel(pmd, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) __flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) pte = newpte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) && vaddr > fix_to_virt(FIX_KMAP_END)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) && lastpte && lastpte + PTRS_PER_PTE != pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * This function initializes a certain range of kernel virtual memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * with new bootmem page tables, everywhere page tables are missing in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * the given range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * NOTE: The pagetables are allocated contiguous on the physical space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * so we can cache the place of the first one and move around without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * checking the pgd every time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) static void __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) int pgd_idx, pmd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) unsigned long vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) pte_t *pte = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) unsigned long count = page_table_range_init_count(start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) void *adr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) adr = alloc_low_pages(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) vaddr = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) pgd_idx = pgd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) pmd_idx = pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) pgd = pgd_base + pgd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) pmd = one_md_table_init(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) pmd = pmd + pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) pmd++, pmd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) pte = page_table_kmap_check(one_page_table_init(pmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pmd, vaddr, pte, &adr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) vaddr += PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) pmd_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * The <linux/kallsyms.h> already defines is_kernel_text,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * using '__' prefix not to get in conflict.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static inline int __is_kernel_text(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * This maps the physical memory to kernel virtual address space, a total
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * of max_low_pfn pages, by creating page tables starting from address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * PAGE_OFFSET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) unsigned long __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) kernel_physical_mapping_init(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) unsigned long page_size_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) unsigned long last_map_addr = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) unsigned long start_pfn, end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) pgd_t *pgd_base = swapper_pg_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) int pgd_idx, pmd_idx, pte_ofs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) unsigned pages_2m, pages_4k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) int mapping_iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) start_pfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) end_pfn = end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * First iteration will setup identity mapping using large/small pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * based on use_pse, with other attributes same as set by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * the early code in head_32.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * as desired for the kernel identity mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * This two pass mechanism conforms to the TLB app note which says:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * "Software should not write to a paging-structure entry in a way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * that would change, for any linear address, both the page size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * and either the page frame or attributes."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) mapping_iter = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (!boot_cpu_has(X86_FEATURE_PSE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) use_pse = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) pages_2m = pages_4k = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) pfn = start_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) pgd = pgd_base + pgd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) pmd = one_md_table_init(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (pfn >= end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) #ifdef CONFIG_X86_PAE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) pmd += pmd_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) pmd_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) pmd++, pmd_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * Map with big pages if possible, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * create normal page tables:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) if (use_pse) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) unsigned int addr2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) pgprot_t prot = PAGE_KERNEL_LARGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * first pass will use the same initial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * identity mapping attribute + _PAGE_PSE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) pgprot_t init_prot =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) __pgprot(PTE_IDENT_ATTR |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) _PAGE_PSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) pfn &= PMD_MASK >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) PAGE_OFFSET + PAGE_SIZE-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (__is_kernel_text(addr) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) __is_kernel_text(addr2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) prot = PAGE_KERNEL_LARGE_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) pages_2m++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (mapping_iter == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) set_pmd(pmd, pfn_pmd(pfn, init_prot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) set_pmd(pmd, pfn_pmd(pfn, prot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) pfn += PTRS_PER_PTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) pte = one_page_table_init(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) pte += pte_ofs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) pgprot_t prot = PAGE_KERNEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * first pass will use the same initial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * identity mapping attribute.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (__is_kernel_text(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) prot = PAGE_KERNEL_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) pages_4k++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (mapping_iter == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) set_pte(pte, pfn_pte(pfn, init_prot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) set_pte(pte, pfn_pte(pfn, prot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (mapping_iter == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * update direct mapping page count only in the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) update_page_count(PG_LEVEL_2M, pages_2m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) update_page_count(PG_LEVEL_4K, pages_4k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * local global flush tlb, which will flush the previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * mappings present in both small and large page TLB's.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) __flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * Second iteration will set the actual desired PTE attributes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) mapping_iter = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) return last_map_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) pte_t *kmap_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) static void __init kmap_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) unsigned long kmap_vstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * Cache the first kmap pte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) kmap_pte = virt_to_kpte(kmap_vstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) static void __init permanent_kmaps_init(pgd_t *pgd_base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) unsigned long vaddr = PKMAP_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) pkmap_page_table = virt_to_kpte(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) void __init add_highpages_with_active_regions(int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) unsigned long start_pfn, unsigned long end_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) phys_addr_t start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) u64 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) start_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) start_pfn, end_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) for ( ; pfn < e_pfn; pfn++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (pfn_valid(pfn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) free_highmem_page(pfn_to_page(pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) static inline void permanent_kmaps_init(pgd_t *pgd_base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) #endif /* CONFIG_HIGHMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) void __init sync_initial_page_table(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) swapper_pg_dir + KERNEL_PGD_BOUNDARY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) KERNEL_PGD_PTRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * sync back low identity map too. It is used for example
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * in the 32-bit EFI stub.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) clone_pgd_range(initial_page_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) swapper_pg_dir + KERNEL_PGD_BOUNDARY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) void __init native_pagetable_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) unsigned long pfn, va;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) pgd_t *pgd, *base = swapper_pg_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * Remove any mappings which extend past the end of physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * memory from the boot time page table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * In virtual address space, we should have at least two pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * definition. And max_low_pfn is set to VMALLOC_END physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * address. If initial memory mapping is doing right job, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * should have pte used near max_low_pfn or one pmd is not present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) pgd = base + pgd_index(va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) p4d = p4d_offset(pgd, va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) pud = pud_offset(p4d, va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) pmd = pmd_offset(pud, va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /* should not be large page here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (pmd_large(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) pfn, pmd, __pa(pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) BUG_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) pte = pte_offset_kernel(pmd, va);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if (!pte_present(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) pfn, pmd, __pa(pmd), pte, __pa(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) pte_clear(NULL, va, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) paging_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * Build a proper pagetable for the kernel mappings. Up until this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * point, we've been running on some set of pagetables constructed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * the boot process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * If we're booting on native hardware, this will be a pagetable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * constructed in arch/x86/kernel/head_32.S. The root of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * pagetable will be swapper_pg_dir.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * If we're booting paravirtualized under a hypervisor, then there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * more options: we may already be running PAE, and the pagetable may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * or may not be based in swapper_pg_dir. In any case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * paravirt_pagetable_init() will set up swapper_pg_dir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * appropriately for the rest of the initialization to work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * In general, pagetable_init() assumes that the pagetable may already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * be partially populated, and so it avoids stomping on any existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * mappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) void __init early_ioremap_page_table_range_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) pgd_t *pgd_base = swapper_pg_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) unsigned long vaddr, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * Fixed mappings, only the page table structure has to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * created - mappings will be set by set_fixmap():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) page_table_range_init(vaddr, end, pgd_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) early_ioremap_reset();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static void __init pagetable_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) pgd_t *pgd_base = swapper_pg_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) permanent_kmaps_init(pgd_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) /* Bits supported by the hardware: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) /* Bits allowed in normal kernel mappings: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) EXPORT_SYMBOL_GPL(__supported_pte_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) EXPORT_SYMBOL(__default_kernel_pte_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) /* user-defined highmem size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) static unsigned int highmem_pages = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * highmem=size forces highmem to be exactly 'size' bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * This works even on boxes that have no highmem otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * This also works to reduce highmem size on bigger boxes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) static int __init parse_highmem(char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (!arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) early_param("highmem", parse_highmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) #define MSG_HIGHMEM_TOO_BIG \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) #define MSG_LOWMEM_TOO_SMALL \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * All of RAM fits into lowmem - but if user wants highmem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * artificially via the highmem=x boot parameter then create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) static void __init lowmem_pfn_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) /* max_low_pfn is 0, we already have early_res support */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) max_low_pfn = max_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (highmem_pages == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) highmem_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (highmem_pages >= max_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) highmem_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (highmem_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) pages_to_mb(highmem_pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) highmem_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) max_low_pfn -= highmem_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) if (highmem_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) #define MSG_HIGHMEM_TOO_SMALL \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) #define MSG_HIGHMEM_TRIMMED \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * We have more RAM than fits into lowmem - we try to put it into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * highmem, also taking the highmem=x boot parameter into account:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) static void __init highmem_pfn_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) max_low_pfn = MAXMEM_PFN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) if (highmem_pages == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) highmem_pages = max_pfn - MAXMEM_PFN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (highmem_pages + MAXMEM_PFN < max_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) max_pfn = MAXMEM_PFN + highmem_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if (highmem_pages + MAXMEM_PFN > max_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) pages_to_mb(max_pfn - MAXMEM_PFN),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) pages_to_mb(highmem_pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) highmem_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) #ifndef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) /* Maximum memory usable is what is directly addressable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) if (max_pfn > MAX_NONPAE_PFN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) max_pfn = MAXMEM_PFN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) #else /* !CONFIG_HIGHMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) #ifndef CONFIG_HIGHMEM64G
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if (max_pfn > MAX_NONPAE_PFN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) max_pfn = MAX_NONPAE_PFN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) #endif /* !CONFIG_HIGHMEM64G */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) #endif /* !CONFIG_HIGHMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) * Determine low and high memory ranges:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) void __init find_low_pfn_range(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) /* it could update max_pfn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) if (max_pfn <= MAXMEM_PFN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) lowmem_pfn_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) highmem_pfn_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) #ifndef CONFIG_NEED_MULTIPLE_NODES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) void __init initmem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) highstart_pfn = highend_pfn = max_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (max_pfn > max_low_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) highstart_pfn = max_low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) pages_to_mb(highend_pfn - highstart_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) #ifdef CONFIG_FLATMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) __vmalloc_start_set = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) pages_to_mb(max_low_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) setup_bootmem_allocator();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) #endif /* !CONFIG_NEED_MULTIPLE_NODES */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) void __init setup_bootmem_allocator(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) max_pfn_mapped<<PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * paging_init() sets up the page tables - note that the first 8MB are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * already mapped by head.S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * This routines also unmaps the page at virtual kernel address 0, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * that we can trap those pesky NULL-reference errors in the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) void __init paging_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) pagetable_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) __flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) kmap_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * NOTE: at this point the bootmem allocator is fully available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) olpc_dt_build_devicetree();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) sparse_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) zone_sizes_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) * Test if the WP bit works in supervisor mode. It isn't supported on 386's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) * and also on some strange 486's. All 586+'s are OK. This used to involve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) * black magic jumps to work around some nasty CPU bugs, but fortunately the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) * switch to using exceptions got rid of all that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) static void __init test_wp_bit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) char z = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) printk(KERN_INFO "Checking if this processor honours the WP bit even in supervisor mode...");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) clear_fixmap(FIX_WP_TEST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) printk(KERN_CONT "Ok.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) printk(KERN_CONT "No.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) panic("Linux doesn't support CPUs with broken WP.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) void __init mem_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) pci_iommu_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) #ifdef CONFIG_FLATMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) BUG_ON(!mem_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * be done before memblock_free_all(). Memblock use free low memory for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * temporary data (see find_range_array()) and for this purpose can use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * pages that was already passed to the buddy allocator, hence marked as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * not accessible in the page tables when compiled with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * important here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) set_highmem_pages_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /* this will put all low memory onto the freelists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) memblock_free_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) after_bootmem = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) x86_init.hyper.init_after_bootmem();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) mem_init_print_info(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * Check boundaries twice: Some fundamental inconsistencies can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * be detected at build time already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) #define __FIXADDR_TOP (-PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) #define high_memory (-128UL << 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) #undef high_memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) #undef __FIXADDR_TOP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) #ifdef CONFIG_HIGHMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) BUG_ON(VMALLOC_END > PKMAP_BASE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) BUG_ON(VMALLOC_START >= VMALLOC_END);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) BUG_ON((unsigned long)high_memory > VMALLOC_START);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) test_wp_bit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) #ifdef CONFIG_MEMORY_HOTPLUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) int arch_add_memory(int nid, u64 start, u64 size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) struct mhp_params *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) unsigned long start_pfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) unsigned long nr_pages = size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * The page tables were already mapped at boot so if the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) * requests a different mapping type then we must change all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * pages with __set_memory_prot().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) ret = __set_memory_prot(start, nr_pages, params->pgprot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) return __add_pages(nid, start_pfn, nr_pages, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) void arch_remove_memory(int nid, u64 start, u64 size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) unsigned long start_pfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) unsigned long nr_pages = size >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) __remove_pages(start_pfn, nr_pages, altmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) int kernel_set_to_readonly __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) static void mark_nxdata_nx(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) * When this called, init has already been executed and released,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) * so everything past _etext should be NX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) unsigned long start = PFN_ALIGN(_etext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * This comes from __is_kernel_text upper limit. Also HPAGE where used:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (__supported_pte_mask & _PAGE_NX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) set_memory_nx(start, size >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) void mark_rodata_ro(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) unsigned long start = PFN_ALIGN(_text);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) unsigned long size = (unsigned long)__end_rodata - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) pr_info("Write protecting kernel text and read-only data: %luk\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) size >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) kernel_set_to_readonly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) #ifdef CONFIG_CPA_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) pr_info("Testing CPA: write protecting again\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) mark_nxdata_nx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (__supported_pte_mask & _PAGE_NX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) debug_checkwx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) }