^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/sched/debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/init_task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <asm/traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) static void set_df_gdt_entry(unsigned int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * we're running the doublefault task. Cannot return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) asmlinkage noinstr void __noreturn doublefault_shim(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) unsigned long cr2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct pt_regs regs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) cr2 = native_read_cr2();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* Reset back to the normal kernel task. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) force_reload_TR();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) set_df_gdt_entry(smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) trace_hardirqs_off();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Fill in pt_regs. A downside of doing this in C is that the unwinder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * won't successfully unwind to the source of the double fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * The main dump from exc_double_fault() is fine, though, since it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * uses these regs directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * If anyone ever cares, this could be moved to asm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) regs.ss = TSS(ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) regs.__ssh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) regs.sp = TSS(sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) regs.flags = TSS(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) regs.cs = TSS(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /* We won't go through the entry asm, so we can leave __csh as 0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) regs.__csh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) regs.ip = TSS(ip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) regs.orig_ax = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) regs.gs = TSS(gs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) regs.__gsh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) regs.fs = TSS(fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) regs.__fsh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) regs.es = TSS(es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) regs.__esh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) regs.ds = TSS(ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) regs.__dsh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) regs.ax = TSS(ax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) regs.bp = TSS(bp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) regs.di = TSS(di);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) regs.si = TSS(si);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) regs.dx = TSS(dx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) regs.cx = TSS(cx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) regs.bx = TSS(bx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) exc_double_fault(®s, 0, cr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * x86_32 does not save the original CR3 anywhere on a task switch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * This means that, even if we wanted to return, we would need to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * some way to reconstruct CR3. We could make a credible guess based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * on cpu_tlbstate, but that would be racy and would not account for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * PTI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * Instead, don't bother. We can return through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * rewind_stack_do_exit() instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) panic("cannot return from double fault\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) .tss = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * No sp0 or ss0 -- we never run CPL != 0 with this TSS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * active. sp is filled in later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) .ldt = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) .ip = (unsigned long) asm_exc_double_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) .flags = X86_EFLAGS_FIXED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) .es = __USER_DS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) .cs = __KERNEL_CS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .ss = __KERNEL_DS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .ds = __USER_DS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) .fs = __KERNEL_PERCPU,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #ifndef CONFIG_X86_32_LAZY_GS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) .gs = __KERNEL_STACK_CANARY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) .__cr3 = __pa_nodebug(swapper_pg_dir),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void set_df_gdt_entry(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /* Set up doublefault TSS pointer in the GDT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) &get_cpu_entry_area(cpu)->doublefault_stack.tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) void doublefault_init_cpu_tss(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) unsigned int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * The linker isn't smart enough to initialize percpu variables that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * point to other places in percpu space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) this_cpu_write(doublefault_stack.tss.sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) (unsigned long)&cea->doublefault_stack.stack +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) sizeof(doublefault_stack.stack));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) set_df_gdt_entry(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }