author: Alexandre Ghiti <alex@ghiti.fr> 2021-04-11 12:41:44 -0400
committer: Joshua Yeong <joshua.yeong@starfivetech.com> 2023-03-02 10:17:25 +0800
commit: 73202cf7f3c92c6eaa46c382411b78f92635d481
parent: 252672d290721aec93773f25ce1b0829cb5a7996
Commit Summary:
Diffstat:
12 files changed, 182 insertions, 36 deletions
diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/page.h>
+#include <asm/pgtable.h>
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
- . = PAGE_OFFSET;
+ . = KERNEL_LINK_ADDR;
.payload : {
*(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index dbc1b48144b0..8fd3c7ede9dd 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,15 +90,37 @@ typedef struct page *pgtable_t;
#ifdef CONFIG_MMU
extern unsigned long va_pa_offset;
+#ifdef CONFIG_64BIT
+extern unsigned long va_kernel_pa_offset;
+#endif
extern unsigned long pfn_base;
#define ARCH_PFN_OFFSET (pfn_base)
#else
#define va_pa_offset 0
+#ifdef CONFIG_64BIT
+#define va_kernel_pa_offset 0
+#endif
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
-#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+#ifdef CONFIG_64BIT
+extern unsigned long kernel_virt_addr;
+
+#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset))
+#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset))
+#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
+
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x) ({ \
+ unsigned long _x = x; \
+ (_x < kernel_virt_addr) ? \
+ linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
+ })
+#else
+#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
+#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+#endif
#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b16304fdf448..9d407dc7f2bf 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,38 @@
#include <asm/pgtable-bits.h>
-#ifndef __ASSEMBLY__
+#ifndef CONFIG_MMU
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#else
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include <linux/mm_types.h>
+#define ADDRESS_SPACE_END (UL(-1))
-#ifdef CONFIG_MMU
+#ifdef CONFIG_64BIT
+/* Leave 2GB for kernel and BPF at the end of the address space */
+#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
+#else
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#endif
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
+#ifdef CONFIG_64BIT
+/* KASLR should leave at least 128MB for BPF after the kernel */
+#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
+#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+#else
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END (VMALLOC_END)
+#endif
+
+/* Modules always live before the kernel */
+#ifdef CONFIG_64BIT
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+#endif
/*
* Roughly size the vmemmap space to be large enough to fit enough
@@ -57,9 +72,16 @@
#define FIXADDR_SIZE PGDIR_SIZE
#endif
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
#endif
+#ifndef __ASSEMBLY__
+
+/* Page Upper Directory not used in RISC-V */
+#include <asm-generic/pgtable-nopud.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <linux/mm_types.h>
+
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
@@ -466,6 +488,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
#define kern_addr_valid(addr) (1) /* FIXME */
+extern char _start[];
extern void *dtb_early_va;
extern uintptr_t dtb_early_pa;
void setup_bootmem(void);
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 9fa510707012..03df05bc639a 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -17,6 +17,7 @@ int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
int set_memory_rw_nx(unsigned long addr, int numpages);
void protect_kernel_text_data(void);
+void protect_kernel_linear_mapping_text_rodata(void);
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 481fea4b0cd0..f518aef80f36 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -72,7 +72,8 @@ pe_head_start:
.global relocate_enable_mmu
relocate_enable_mmu:
/* Relocate return address */
- li a1, PAGE_OFFSET
+ la a1, kernel_virt_addr
+ REG_L a1, 0(a1)
la a2, _start
sub a1, a1, a2
add ra, ra, a1
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 104fba889cf7..ce153771e5e9 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -408,12 +408,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-#define VMALLOC_MODULE_START \
- max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
- VMALLOC_END, GFP_KERNEL,
+ return __vmalloc_node_range(size, 1, MODULES_VADDR,
+ MODULES_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index dfe6838edb14..4c7ecd667792 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -258,8 +258,13 @@ void __init setup_arch(char **cmdline_p)
if (IS_ENABLED(CONFIG_RISCV_SBI))
sbi_init();
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
protect_kernel_text_data();
+#if defined(CONFIG_64BIT) && defined(CONFIG_MMU)
+ protect_kernel_linear_mapping_text_rodata();
+#endif
+ }
+
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
#endif
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index de03cb22d0e9..0726c05e0336 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,7 +4,8 @@
* Copyright (C) 2017 SiFive
*/
-#define LOAD_OFFSET PAGE_OFFSET
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
#include <asm/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/cache.h>
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 3c8b9e433c67..dc6c4c321112 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -216,6 +216,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
return;
}
+#ifdef CONFIG_64BIT
+ /*
+ * Modules in 64bit kernels lie in their own virtual region which is not
+ * in the vmalloc region, but dealing with page faults in this region
+ * or the vmalloc region amounts to doing the same thing: checking that
+ * the mapping exists in init_mm.pgd and updating user page table, so
+ * just use vmalloc_fault.
+ */
+ if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+#endif
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 8dc3b08eac38..9e71bc37f40d 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -24,6 +24,9 @@
#include "../kernel/head.h"
+unsigned long kernel_virt_addr = KERNEL_LINK_ADDR;
+EXPORT_SYMBOL(kernel_virt_addr);
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -88,6 +91,10 @@ static void print_vm_layout(void)
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
+#ifdef CONFIG_64BIT
+ print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+#endif
}
#else
static void print_vm_layout(void) { }
@@ -165,8 +172,13 @@ void __init setup_bootmem(void)
/* The maximal physical memory size is -PAGE_OFFSET. */
memblock_enforce_memory_limit(-PAGE_OFFSET);
- /* Reserve from the start of the kernel to the end of the kernel */
- memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ * and make sure we align the reservation on PMD_SIZE since we will
+ * map the kernel in the linear mapping as read-only: we do not want
+ * any allocation to happen between _end and the next pmd aligned page.
+ */
+ memblock_reserve(vmlinux_start, (vmlinux_end - vmlinux_start + PMD_SIZE - 1) & PMD_MASK);
/*
* memblock allocator is not aware of the fact that last 4K bytes of
@@ -202,8 +214,14 @@ void __init setup_bootmem(void)
#ifdef CONFIG_MMU
static struct pt_alloc_ops pt_ops;
+/* Offset between linear mapping virtual address and kernel load address */
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
+#ifdef CONFIG_64BIT
+/* Offset between kernel mapping virtual address and kernel load address */
+unsigned long va_kernel_pa_offset;
+EXPORT_SYMBOL(va_kernel_pa_offset);
+#endif
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
@@ -307,7 +325,7 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
+ BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
return (uintptr_t)early_pmd;
}
@@ -422,17 +440,34 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
+uintptr_t load_pa, load_sz;
+
+static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
+{
+ uintptr_t va, end_va;
+
+ end_va = kernel_virt_addr + load_sz;
+ for (va = kernel_virt_addr; va < end_va; va += map_size)
+ create_pgd_mapping(pgdir, va,
+ load_pa + (va - kernel_virt_addr),
+ map_size, PAGE_KERNEL_EXEC);
+}
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t va, pa, end_va;
- uintptr_t load_pa = (uintptr_t)(&_start);
- uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
+ uintptr_t pa;
uintptr_t map_size;
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t fix_bmap_spmd, fix_bmap_epmd;
#endif
+ load_pa = (uintptr_t)(&_start);
+ load_sz = (uintptr_t)(&_end) - load_pa;
va_pa_offset = PAGE_OFFSET - load_pa;
+#ifdef CONFIG_64BIT
+ va_kernel_pa_offset = kernel_virt_addr - load_pa;
+#endif
+
pfn_base = PFN_DOWN(load_pa);
/*
@@ -460,31 +495,28 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
(uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
- create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
+ create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
#else
/* Setup trampoline PGD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
#endif
/*
- * Setup early PGD covering entire kernel which will allows
+ * Setup early PGD covering entire kernel which will allow
* us to reach paging_init(). We map all memory banks later
* in setup_vm_final() below.
*/
- end_va = PAGE_OFFSET + load_sz;
- for (va = PAGE_OFFSET; va < end_va; va += map_size)
- create_pgd_mapping(early_pg_dir, va,
- load_pa + (va - PAGE_OFFSET),
- map_size, PAGE_KERNEL_EXEC);
+ create_kernel_page_table(early_pg_dir, map_size);
#ifndef __PAGETABLE_PMD_FOLDED
/* Setup early PMD for DTB */
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+#ifndef CONFIG_BUILTIN_DTB
/* Create two consecutive PMD mappings for FDT early scan */
pa = dtb_pa & ~(PMD_SIZE - 1);
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@@ -492,7 +524,20 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+#ifdef CONFIG_64BIT
+ /*
+ * __va can't be used since it would return a linear mapping address
+ * whereas dtb_early_va will be used before setup_vm_final installs
+ * the linear mapping.
+ */
+ dtb_early_va = kernel_mapping_pa_to_va(dtb_pa);
#else
+ dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_64BIT */
+#endif /* CONFIG_BUILTIN_DTB */
+#else
+#ifndef CONFIG_BUILTIN_DTB
/* Create two consecutive PGD mappings for FDT early scan */
pa = dtb_pa & ~(PGDIR_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
@@ -500,6 +545,13 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+#ifdef CONFIG_64BIT
+ dtb_early_va = kernel_mapping_pa_to_va(dtb_pa);
+#else
+ dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_64BIT */
+#endif /* CONFIG_BUILTIN_DTB */
#endif
dtb_early_pa = dtb_pa;
@@ -534,6 +586,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
}
+#ifdef CONFIG_64BIT
+void protect_kernel_linear_mapping_text_rodata(void)
+{
+ unsigned long text_start = (unsigned long)lm_alias(_start);
+ unsigned long init_text_start = (unsigned long)lm_alias(__init_text_begin);
+ unsigned long rodata_start = (unsigned long)lm_alias(__start_rodata);
+ unsigned long data_start = (unsigned long)lm_alias(_data);
+
+ set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
+ set_memory_nx(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
+
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+}
+#endif
+
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
@@ -555,7 +623,7 @@ static void __init setup_vm_final(void)
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
- /* Map all memory banks */
+ /* Map all memory banks in the linear mapping */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
@@ -568,10 +636,22 @@ static void __init setup_vm_final(void)
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
- map_size, PAGE_KERNEL_EXEC);
+ map_size,
+#ifdef CONFIG_64BIT
+ PAGE_KERNEL
+#else
+ PAGE_KERNEL_EXEC
+#endif
+ );
+
}
}
+#ifdef CONFIG_64BIT
+ /* Map the kernel */
+ create_kernel_page_table(swapper_pg_dir, PMD_SIZE);
+#endif
+
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 883c3be43ea9..df0fc4485d60 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -91,10 +91,15 @@ void __init kasan_init(void)
phys_addr_t _start, _end;
u64 i;
+ /*
+ * Populate all kernel virtual address space with kasan_early_shadow_page
+ * except for the linear mapping and the modules/kernel/BPF mapping.
+ */
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *)
VMALLOC_END));
+ /* Populate the linear mapping */
for_each_mem_range(i, &_start, &_end) {
void *start = (void *)__va(_start);
void *end = (void *)__va(_end);
@@ -105,6 +110,10 @@ void __init kasan_init(void)
populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
};
+ /* Populate kernel, BPF, modules mapping */
+ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
+ kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END));
+
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
mk_pte(virt_to_page(kasan_early_shadow_page),
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index e8e4dcd39fed..35703d5ef5fd 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys);
phys_addr_t __phys_addr_symbol(unsigned long x)
{
- unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_start = (unsigned long)kernel_virt_addr;
unsigned long kernel_end = (unsigned long)_end;
/*