^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * prepare to run common code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #define DISABLE_BRANCH_PROFILING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /* cpu_feature_enabled() cannot be used this early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define USE_EARLY_PGTABLE_L5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/start_kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mem_encrypt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/pgtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <asm/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <asm/sections.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <asm/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <asm/e820/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/bios_ebda.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <asm/bootparam_utils.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <asm/microcode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <asm/kasan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <asm/realmode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <asm/extable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <asm/trapnr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <asm/sev-es.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Manage page tables very early on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static unsigned int __initdata next_early_pgt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifdef CONFIG_X86_5LEVEL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) unsigned int __pgtable_l5_enabled __ro_after_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) unsigned int pgdir_shift __ro_after_init = 39;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) EXPORT_SYMBOL(pgdir_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) unsigned int ptrs_per_p4d __ro_after_init = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) EXPORT_SYMBOL(ptrs_per_p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) EXPORT_SYMBOL(page_offset_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) EXPORT_SYMBOL(vmalloc_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) EXPORT_SYMBOL(vmemmap_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * GDT used on the boot CPU before switching to virtual addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static struct desc_struct startup_gdt[GDT_ENTRIES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Address needs to be set at runtime because it references the startup_gdt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * while the kernel still uses a direct mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static struct desc_ptr startup_gdt_descr = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) .size = sizeof(startup_gdt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) .address = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define __head __section(".head.text")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return ptr - (void *)_text + (void *)physaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) return fixup_pointer(ptr, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #ifdef CONFIG_X86_5LEVEL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return fixup_pointer(ptr, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static bool __head check_la57_support(unsigned long physaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * 5-level paging is detected and enabled at kernel decomression
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * stage. Only check if it has been enabled there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) if (!(native_read_cr4() & X86_CR4_LA57))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) *fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) *fixup_int(&pgdir_shift, physaddr) = 48;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) *fixup_int(&ptrs_per_p4d, physaddr) = 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static bool __head check_la57_support(unsigned long physaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /* Code in __startup_64() can be relocated during execution, but the compiler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * doesn't have to generate PC-relative relocations when accessing globals from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * that function. Clang actually does not generate them, which leads to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * boot-time crashes. To work around this problem, every global pointer must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * be adjusted using fixup_pointer().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) unsigned long __head __startup_64(unsigned long physaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct boot_params *bp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) unsigned long vaddr, vaddr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) unsigned long load_delta, *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) unsigned long pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) pgdval_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) p4dval_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pudval_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) pmdval_t *pmd, pmd_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) pteval_t *mask_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) bool la57;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) unsigned int *next_pgt_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) la57 = check_la57_support(physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /* Is the address too large? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (physaddr >> MAX_PHYSMEM_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) for (;;);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * Compute the delta between the address I am compiled to run at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * and the address I am actually running at.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* Is the address not 2M aligned? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (load_delta & ~PMD_PAGE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) for (;;);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) /* Activate Secure Memory Encryption (SME) if supported and enabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) sme_enable(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /* Include the SME encryption mask in the fixup value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) load_delta += sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /* Fixup the physical addresses in the page table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) pgd = fixup_pointer(&early_top_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) p = pgd + pgd_index(__START_KERNEL_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) if (la57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) *p = (unsigned long)level4_kernel_pgt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) *p = (unsigned long)level3_kernel_pgt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (la57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) p4d[511] += load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pud = fixup_pointer(&level3_kernel_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) pud[510] += load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) pud[511] += load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) pmd[i] += load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * Set up the identity mapping for the switchover. These
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * entries should *NOT* have the global bit set! This also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * creates a bunch of nonsense entries but that is fine --
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * it avoids problems around wraparound.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (la57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) i = physaddr >> P4D_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) i = physaddr >> PUD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* Filter out unsupported __PAGE_KERNEL_* bits: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pmd_entry &= *mask_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) pmd_entry += sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) pmd_entry += physaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) int idx = i + (physaddr >> PMD_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * Fixup the kernel text+data virtual addresses. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * we might write invalid pmds, when the kernel is relocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * cleanup_highmap() fixes this up along with the mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * beyond _end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * Only the region occupied by the kernel image has so far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * been checked against the table of usable memory regions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * provided by the firmware, so invalidate pages outside that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * region. A page table entry that maps to a reserved area of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * memory would allow processor speculation into that area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * and on some hardware (particularly the UV platform) even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * speculative access to some reserved areas is caught as an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * error, causing the BIOS to halt the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) pmd = fixup_pointer(level2_kernel_pgt, physaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* invalidate pages before the kernel image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) for (i = 0; i < pmd_index((unsigned long)_text); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) pmd[i] &= ~_PAGE_PRESENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /* fixup pages that are part of the kernel image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) for (; i <= pmd_index((unsigned long)_end); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (pmd[i] & _PAGE_PRESENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) pmd[i] += load_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /* invalidate pages after the kernel image */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) for (; i < PTRS_PER_PMD; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) pmd[i] &= ~_PAGE_PRESENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * Fixup phys_base - remove the memory encryption mask to obtain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * the true physical address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /* Encrypt the kernel and related (if SME is active) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) sme_encrypt_kernel(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * Clear the memory encryption mask from the .bss..decrypted section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * The bss section will be memset to zero later in the initialization so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * there is no need to zero it after changing the memory encryption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * attribute.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (mem_encrypt_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) vaddr = (unsigned long)__start_bss_decrypted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) vaddr_end = (unsigned long)__end_bss_decrypted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) i = pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) pmd[i] -= sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * Return the SME encryption mask (if SME is active) to be used as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * modifier for the initial pgdir entry programmed into CR3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) return sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) unsigned long __startup_secondary_64(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * Return the SME encryption mask (if SME is active) to be used as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * modifier for the initial pgdir entry programmed into CR3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return sme_get_me_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /* Wipe all early page tables except for the kernel symbol map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) static void __init reset_early_page_tables(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) next_early_pgt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) write_cr3(__sme_pa_nodebug(early_top_pgt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) /* Create a new PMD entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) unsigned long physaddr = address - __PAGE_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) pgdval_t pgd, *pgd_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) p4dval_t p4d, *p4d_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) pudval_t pud, *pud_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) pmdval_t *pmd_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) /* Invalid address or early pgt is done ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) pgd_p = &early_top_pgt[pgd_index(address)].pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) pgd = *pgd_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * critical -- __PAGE_OFFSET would point us back into the dynamic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * range and we might end up looping forever...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) if (!pgtable_l5_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) p4d_p = pgd_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) else if (pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) reset_early_page_tables();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) p4d_p += p4d_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) p4d = *p4d_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) reset_early_page_tables();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) pud_p += pud_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) pud = *pud_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) if (pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) reset_early_page_tables();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) pmd_p[pmd_index(address)] = pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) static bool __init early_make_pgtable(unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) unsigned long physaddr = address - __PAGE_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) pmdval_t pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) pmd = (physaddr & PMD_MASK) + early_pmd_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return __early_make_pgtable(address, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) void __init do_early_exception(struct pt_regs *regs, int trapnr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (trapnr == X86_TRAP_PF &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) early_make_pgtable(native_read_cr2()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) early_fixup_exception(regs, trapnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /* Don't add a printk in there. printk relies on the PDA which is not initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) static void __init clear_bss(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) memset(__bss_start, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) (unsigned long) __bss_stop - (unsigned long) __bss_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) static unsigned long get_cmd_line_ptr(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) return cmd_line_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) static void __init copy_bootdata(char *real_mode_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) char * command_line;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) unsigned long cmd_line_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * If SME is active, this will create decrypted mappings of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * boot data in advance of the copy operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) sme_map_bootdata(real_mode_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) memcpy(&boot_params, real_mode_data, sizeof(boot_params));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) sanitize_boot_params(&boot_params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) cmd_line_ptr = get_cmd_line_ptr();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) if (cmd_line_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) command_line = __va(cmd_line_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * The old boot data is no longer needed and won't be reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * freeing up that memory for use by the system. If SME is active,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * we need to remove the mappings that were created so that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * memory doesn't remain mapped as decrypted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) sme_unmap_bootdata(real_mode_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * Build-time sanity checks on the kernel image and module
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * area mappings. (these are purely build-time and produce no code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) (__START_KERNEL & PGDIR_MASK)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) cr4_init_shadow();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /* Kill off the identity-map trampoline */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) reset_early_page_tables();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) clear_bss();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) clear_page(init_top_pgt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * SME support may update early_pmd_flags to include the memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * encryption mask, so it needs to be called before anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * that may generate a page fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) sme_early_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) kasan_early_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) idt_setup_early_handler();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) copy_bootdata(__va(real_mode_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * Load microcode early on BSP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) load_ucode_bsp();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) /* set init_top_pgt kernel high mapping*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) init_top_pgt[511] = early_top_pgt[511];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) x86_64_start_reservations(real_mode_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) void __init x86_64_start_reservations(char *real_mode_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) /* version is always not zero if it is copied */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (!boot_params.hdr.version)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) copy_bootdata(__va(real_mode_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) x86_early_init_platform_quirks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) switch (boot_params.hdr.hardware_subarch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) case X86_SUBARCH_INTEL_MID:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) x86_intel_mid_early_setup();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) start_kernel();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * used until the idt_table takes over. On the boot CPU this happens in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * this happens in the functions called from head_64.S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * The idt_table can't be used that early because all the code modifying it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * in idt.c and can be instrumented by tracing or KASAN, which both don't work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * during early CPU bringup. Also the idt_table has the runtime vectors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * configured which require certain CPU state to be setup already (like TSS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * which also hasn't happened yet in early CPU bringup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) static struct desc_ptr bringup_idt_descr = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) .address = 0, /* Set at runtime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) #ifdef CONFIG_AMD_MEM_ENCRYPT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) struct idt_data data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) gate_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) init_idt_data(&data, n, handler);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) idt_init_desc(&desc, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) native_write_idt_entry(idt, n, &desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) /* This runs while still in the direct mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) static void startup_64_load_idt(unsigned long physbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) void *handler;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /* VMM Communication Exception */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) handler = fixup_pointer(vc_no_ghcb, physbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) desc->address = (unsigned long)idt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) native_load_idt(desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) /* This is used when running on kernel addresses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) void early_setup_idt(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /* VMM Communication Exception */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) bringup_idt_descr.address = (unsigned long)bringup_idt_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) native_load_idt(&bringup_idt_descr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * Setup boot CPU state needed before kernel switches to virtual addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) void __head startup_64_setup_env(unsigned long physbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) /* Load GDT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) native_load_gdt(&startup_gdt_descr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /* New GDT is live - reload data segment registers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) asm volatile("movl %%eax, %%ds\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) "movl %%eax, %%ss\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) startup_64_load_idt(physbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }