^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * This file implements KASLR memory randomization for x86_64. It randomizes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * the virtual address space of kernel memory regions (physical memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * exploits relying on predictable kernel addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Entropy is generated using the KASLR early boot functions now shared in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * the lib directory (originally written by Kees Cook). Randomization is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * done on PGD & P4D/PUD page table levels to increase possible addresses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * The physical memory mapping code was adapted to support P4D/PUD level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * virtual addresses. This implementation on the best configuration provides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * 30,000 possible virtual addresses in average for each memory region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * An additional low memory page is used to ensure each CPU can start with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * a PGD aligned virtual address (for realmode).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * The order of each memory region is not changed. The feature looks at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * the available space for the regions based on different configuration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * options and randomizes the base and space between each. The size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * physical memory mapping is the available physical memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/pgtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/kaslr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include "mm_internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define TB_SHIFT 40
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * The end address could depend on more configuration options to make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * highest amount of space for randomization available, but that's too hard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * to keep straight and caused issues already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * Memory regions randomized by KASLR (except modules that use a separate logic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * earlier during boot). The list is ordered based on virtual addresses. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * order is kept after randomization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static __initdata struct kaslr_memory_region {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) unsigned long *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) unsigned long size_tb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) } kaslr_regions[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) { &page_offset_base, 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) { &vmalloc_base, 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) { &vmemmap_base, 0 },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /* Get size in bytes used by the memory region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static inline unsigned long get_padding(struct kaslr_memory_region *region)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return (region->size_tb << TB_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Initialize base and padding for each memory region randomized with KASLR */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) void __init kernel_randomize_memory(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) unsigned long vaddr_start, vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned long rand, memory_tb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct rnd_state rand_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) unsigned long remain_entropy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) unsigned long vmemmap_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) vaddr = vaddr_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * These BUILD_BUG_ON checks ensure the memory layout is consistent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * with the vaddr_start/vaddr_end variables. These checks are very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * limited....
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) BUILD_BUG_ON(vaddr_start >= vaddr_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if (!kaslr_memory_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * Update Physical memory mapping to available and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * add padding if needed (especially for memory hotplug support).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) BUG_ON(kaslr_regions[0].base != &page_offset_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* Adapt phyiscal memory region size based on available memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (memory_tb < kaslr_regions[0].size_tb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) kaslr_regions[0].size_tb = memory_tb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * Calculate the vmemmap region size in TBs, aligned to a TB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) sizeof(struct page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* Calculate entropy available between regions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) remain_entropy = vaddr_end - vaddr_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) remain_entropy -= get_padding(&kaslr_regions[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) unsigned long entropy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * Select a random virtual address using the extra entropy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) prandom_bytes_state(&rand_state, &rand, sizeof(rand));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) entropy = (rand % (entropy + 1)) & PUD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) vaddr += entropy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) *kaslr_regions[i].base = vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * Jump the region and add a minimum padding based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * randomization alignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) vaddr += get_padding(&kaslr_regions[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) vaddr = round_up(vaddr + 1, PUD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) remain_entropy -= entropy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) void __meminit init_trampoline_kaslr(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) pud_t *pud_page_tramp, *pud, *pud_tramp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) unsigned long paddr, vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) pud_page_tramp = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * There are two mappings for the low 1MB area, the direct mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * and the 1:1 mapping for the real mode trampoline:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * 1:1 mapping: virt_addr = phys_addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) paddr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) vaddr = (unsigned long)__va(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) pgd = pgd_offset_k(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) p4d = p4d_offset(pgd, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) pud = pud_offset(p4d, vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) pud_tramp = pud_page_tramp + pud_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) *pud_tramp = *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) if (pgtable_l5_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) p4d_page_tramp = alloc_low_page();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) p4d_tramp = p4d_page_tramp + p4d_index(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) set_p4d(p4d_tramp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) set_pgd(&trampoline_pgd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) set_pgd(&trampoline_pgd_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }