^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Pentium III FXSR, SSE support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Gareth Hughes <gareth@valinux.com>, May 2000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * X86-64 port
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Andi Kleen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * CPU hotplug support - ashok.raj@intel.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * This file handles the architecture-dependent parts of process handling..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/sched/task_stack.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/elfcore.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/user.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/ftrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <asm/fpu/internal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <asm/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <asm/ia32.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <asm/debugreg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <asm/switch_to.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <asm/xen/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <asm/vdso.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <asm/resctrl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <asm/unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <asm/fsgsbase.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* Not included via unistd.h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <asm/unistd_32_ia32.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #include "process.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) /* Prints also some state that isn't saved in the pt_regs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) const char *log_lvl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) unsigned long d0, d1, d2, d3, d6, d7;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) unsigned int fsindex, gsindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) unsigned int ds, es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) show_iret_regs(regs, log_lvl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (regs->orig_ax != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) log_lvl, regs->ax, regs->bx, regs->cx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) log_lvl, regs->dx, regs->si, regs->di);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) printk("%sRBP: %016lx R08: %016lx R09: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) log_lvl, regs->bp, regs->r8, regs->r9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) printk("%sR10: %016lx R11: %016lx R12: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) log_lvl, regs->r10, regs->r11, regs->r12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) printk("%sR13: %016lx R14: %016lx R15: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) log_lvl, regs->r13, regs->r14, regs->r15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (mode == SHOW_REGS_SHORT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (mode == SHOW_REGS_USER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) rdmsrl(MSR_FS_BASE, fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) printk("%sFS: %016lx GS: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) log_lvl, fs, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) asm("movl %%ds,%0" : "=r" (ds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) asm("movl %%es,%0" : "=r" (es));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) asm("movl %%fs,%0" : "=r" (fsindex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) asm("movl %%gs,%0" : "=r" (gsindex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) rdmsrl(MSR_FS_BASE, fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) rdmsrl(MSR_GS_BASE, gs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) cr0 = read_cr0();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) cr2 = read_cr2();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) cr3 = __read_cr3();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) cr4 = __read_cr4();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) log_lvl, fs, fsindex, gs, gsindex, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) log_lvl, regs->cs, ds, es, cr0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) log_lvl, cr2, cr3, cr4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) get_debugreg(d0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) get_debugreg(d1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) get_debugreg(d2, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) get_debugreg(d3, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) get_debugreg(d6, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) get_debugreg(d7, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /* Only print out debug registers if they are in their non-default state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) (d6 == DR6_RESERVED) && (d7 == 0x400))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) log_lvl, d0, d1, d2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) log_lvl, d3, d6, d7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (boot_cpu_has(X86_FEATURE_OSPKE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) printk("%sPKRU: %08x\n", log_lvl, read_pkru());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) void release_thread(struct task_struct *dead_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) WARN_ON(dead_task->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) enum which_selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) GS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * Out of line to be protected from kprobes and tracing. If this would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * traced or probed than any access to a per CPU variable happens with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * the wrong GS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * It is not used on Xen paravirt. When paravirt support is needed, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * needs to be renamed with native_ prefix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) static noinstr unsigned long __rdgsbase_inactive(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) if (!static_cpu_has(X86_FEATURE_XENPV)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) gsbase = rdgsbase();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * Out of line to be protected from kprobes and tracing. If this would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * traced or probed than any access to a per CPU variable happens with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * the wrong GS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * It is not used on Xen paravirt. When paravirt support is needed, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * needs to be renamed with native_ prefix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) static noinstr void __wrgsbase_inactive(unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (!static_cpu_has(X86_FEATURE_XENPV)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) wrgsbase(gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * not available. The goal is to be reasonably fast on non-FSGSBASE systems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * It's forcibly inlined because it'll generate better code and this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * is hot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static __always_inline void save_base_legacy(struct task_struct *prev_p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) unsigned short selector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) enum which_selector which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (likely(selector == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * On Intel (without X86_BUG_NULL_SEG), the segment base could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * be the pre-existing saved base or it could be zero. On AMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * (with X86_BUG_NULL_SEG), the segment base could be almost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * This branch is very hot (it's hit twice on almost every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * context switch between 64-bit programs), and avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * the RDMSR helps a lot, so we just assume that whatever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * value is already saved is correct. This matches historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * Linux behavior, so it won't break existing applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * report that the base is zero, it needs to actually be zero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * see the corresponding logic in load_seg_legacy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * If the selector is 1, 2, or 3, then the base is zero on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * !X86_BUG_NULL_SEG CPUs and could be anything on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * X86_BUG_NULL_SEG CPUs. In the latter case, Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * has never attempted to preserve the base across context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * switches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * If selector > 3, then it refers to a real segment, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * saving the base isn't necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) if (which == FS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) prev_p->thread.fsbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) prev_p->thread.gsbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) static __always_inline void save_fsgs(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) savesegment(fs, task->thread.fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) savesegment(gs, task->thread.gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * If FSGSBASE is enabled, we can't make any useful guesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * about the base, and user code expects us to save the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * value. Fortunately, reading the base directly is efficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) task->thread.fsbase = rdfsbase();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) task->thread.gsbase = __rdgsbase_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) save_base_legacy(task, task->thread.fsindex, FS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) save_base_legacy(task, task->thread.gsindex, GS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * While a process is running,current->thread.fsbase and current->thread.gsbase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * may not match the corresponding CPU registers (see save_base_legacy()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) void current_save_fsgs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /* Interrupts need to be off for FSGSBASE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) save_fsgs(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) #if IS_ENABLED(CONFIG_KVM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) EXPORT_SYMBOL_GPL(current_save_fsgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) static __always_inline void loadseg(enum which_selector which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) unsigned short sel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if (which == FS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) loadsegment(fs, sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) load_gs_index(sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) static __always_inline void load_seg_legacy(unsigned short prev_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) unsigned long prev_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) unsigned short next_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) unsigned long next_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) enum which_selector which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (likely(next_index <= 3)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * The next task is using 64-bit TLS, is not using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * segment at all, or is having fun with arcane CPU features.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) if (next_base == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * Nasty case: on AMD CPUs, we need to forcibly zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * the base.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) loadseg(which, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * We could try to exhaustively detect cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * under which we can skip the segment load,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * but there's really only one case that matters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * for performance: if both the previous and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * next states are fully zeroed, we can skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * the load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * (This assumes that prev_base == 0 has no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * false positives. This is the case on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * Intel-style CPUs.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) if (likely(prev_index | next_index | prev_base))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (prev_index != next_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) next_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * The next task is using a real segment. Loading the selector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * is sufficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) struct thread_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /* Update the FS and GS selectors if they could have changed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (unlikely(prev->fsindex || next->fsindex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) loadseg(FS, next->fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (unlikely(prev->gsindex || next->gsindex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) loadseg(GS, next->gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* Update the bases. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) wrfsbase(next->fsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) __wrgsbase_inactive(next->gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) load_seg_legacy(prev->fsindex, prev->fsbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) next->fsindex, next->fsbase, FS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) load_seg_legacy(prev->gsindex, prev->gsbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) next->gsindex, next->gsbase, GS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) unsigned long x86_fsgsbase_read_task(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) unsigned short selector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) unsigned short idx = selector >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) unsigned long base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (likely((selector & SEGMENT_TI_MASK) == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (unlikely(idx >= GDT_ENTRIES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * There are no user segments in the GDT with nonzero bases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * other than the TLS segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) idx -= GDT_ENTRY_TLS_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) base = get_desc_base(&task->thread.tls_array[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) #ifdef CONFIG_MODIFY_LDT_SYSCALL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) struct ldt_struct *ldt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * If performance here mattered, we could protect the LDT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * with RCU. This is a slow path, though, so we can just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * take the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) mutex_lock(&task->mm->context.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) ldt = task->mm->context.ldt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (unlikely(!ldt || idx >= ldt->nr_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) base = get_desc_base(ldt->entries + idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) mutex_unlock(&task->mm->context.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) unsigned long x86_gsbase_read_cpu_inactive(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) gsbase = __rdgsbase_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) __wrgsbase_inactive(gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) unsigned long x86_fsbase_read_task(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) unsigned long fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) fsbase = x86_fsbase_read_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) (task->thread.fsindex == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) fsbase = task->thread.fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) return fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) unsigned long x86_gsbase_read_task(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) gsbase = x86_gsbase_read_cpu_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) (task->thread.gsindex == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) gsbase = task->thread.gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) WARN_ON_ONCE(task == current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) task->thread.fsbase = fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) WARN_ON_ONCE(task == current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) task->thread.gsbase = gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) start_thread_common(struct pt_regs *regs, unsigned long new_ip,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) unsigned long new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) unsigned int _cs, unsigned int _ss, unsigned int _ds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) WARN_ON_ONCE(regs != current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if (static_cpu_has(X86_BUG_NULL_SEG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) /* Loading zero below won't clear the base. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) loadsegment(fs, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) load_gs_index(__USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) loadsegment(fs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) loadsegment(es, _ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) loadsegment(ds, _ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) load_gs_index(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) regs->ip = new_ip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) regs->sp = new_sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) regs->cs = _cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) regs->ss = _ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) regs->flags = X86_EFLAGS_IF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) start_thread_common(regs, new_ip, new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) __USER_CS, __USER_DS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) EXPORT_SYMBOL_GPL(start_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) start_thread_common(regs, new_ip, new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) test_thread_flag(TIF_X32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) ? __USER_CS : __USER32_CS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) __USER_DS, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * switch_to(x,y) should switch tasks from x to y.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * This could still be optimized:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * - fold all the options into a flag word and test it with a single test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * - could test fs/gs bitsliced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * Kprobes not supported here. Set the probe on schedule instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * Function graph tracer not supported too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) __visible __notrace_funcgraph struct task_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) struct thread_struct *prev = &prev_p->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) struct thread_struct *next = &next_p->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) this_cpu_read(irq_count) != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (!test_thread_flag(TIF_NEED_FPU_LOAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) switch_fpu_prepare(prev_p, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) /* We must save %fs and %gs before load_TLS() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * %fs and %gs may be cleared by load_TLS().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * (e.g. xen_load_tls())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) save_fsgs(prev_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) * Load TLS before restoring any segments so that segment loads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * reference the correct GDT entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) load_TLS(next, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * Leave lazy mode, flushing any hypercalls made here. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * must be done after loading TLS entries in the GDT but before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * loading segments that might reference them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) arch_end_context_switch(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /* Switch DS and ES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * Reading them only returns the selectors, but writing them (if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * nonzero) loads the full descriptor from the GDT or LDT. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * LDT for next is loaded in switch_mm, and the GDT is loaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * We therefore need to write new values to the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * registers on every context switch unless both the new and old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * values are zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * Note that we don't need to do anything for CS and SS, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * those are saved and restored as part of pt_regs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) savesegment(es, prev->es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (unlikely(next->es | prev->es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) loadsegment(es, next->es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) savesegment(ds, prev->ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (unlikely(next->ds | prev->ds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) loadsegment(ds, next->ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) x86_fsgsbase_load(prev, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * Switch the PDA and FPU contexts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) this_cpu_write(current_task, next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) switch_fpu_finish(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /* Reload sp0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) update_task_stack(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) switch_to_extra(prev_p, next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * AMD CPUs have a misfeature: SYSRET sets the SS selector but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * does not update the cached descriptor. As a result, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * do SYSRET while SS is NULL, we'll end up in user mode with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * SS apparently equal to __USER_DS but actually unusable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * The straightforward workaround would be to fix it up just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * before SYSRET, but that would slow down the system call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * fast paths. Instead, we ensure that SS is never NULL in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * system call context. We do this by replacing NULL SS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * selectors at every context switch. SYSCALL sets up a valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * SS, so the only way to get NULL is to re-enter the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * from CPL 3 through an interrupt. Since that can't happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * in the same task as a running syscall, we are guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * context switch between every interrupt vector entry and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * subsequent SYSRET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * We read SS first because SS reads are much faster than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * writes. Out of caution, we force SS to __KERNEL_DS even if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * it previously had a different non-NULL value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) unsigned short ss_sel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) savesegment(ss, ss_sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (ss_sel != __KERNEL_DS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) loadsegment(ss, __KERNEL_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) /* Load the Intel cache allocation PQR MSR. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) resctrl_sched_in();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) return prev_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) void set_personality_64bit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) /* inherit personality from parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /* Make sure to be in 64bit mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) clear_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) clear_thread_flag(TIF_ADDR32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) clear_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /* Pretend that this comes from a 64bit execve */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) task_pt_regs(current)->orig_ax = __NR_execve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) current_thread_info()->status &= ~TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) /* Ensure the corresponding mm is not marked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) current->mm->context.ia32_compat = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) /* TBD: overwrites user setup. Should have two bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) But 64bit processes have always behaved this way,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) so it's not too bad. The main problem is just that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 32bit children are affected again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) current->personality &= ~READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void __set_personality_x32(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) #ifdef CONFIG_X86_X32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) clear_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) set_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) current->mm->context.ia32_compat = TIF_X32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) current->personality &= ~READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * in_32bit_syscall() uses the presence of the x32 syscall bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * flag to determine compat status. The x86 mmap() code relies on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * the syscall bitness so set x32 syscall bit right here to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * in_32bit_syscall() work during exec().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * Pretend to come from a x32 execve.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) current_thread_info()->status &= ~TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) static void __set_personality_ia32(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) set_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) clear_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) current->mm->context.ia32_compat = TIF_IA32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) current->personality |= force_personality32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) /* Prepare the first "return" to user space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) task_pt_regs(current)->orig_ax = __NR_ia32_execve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) current_thread_info()->status |= TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) void set_personality_ia32(bool x32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) /* Make sure to be in 32bit mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) set_thread_flag(TIF_ADDR32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (x32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) __set_personality_x32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) __set_personality_ia32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) EXPORT_SYMBOL_GPL(set_personality_ia32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) #ifdef CONFIG_CHECKPOINT_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) ret = map_vdso_once(image, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) return (long)image->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) switch (option) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) case ARCH_SET_GS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (unlikely(arg2 >= TASK_SIZE_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * ARCH_SET_GS has always overwritten the index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * and the base. Zero is the most sensible value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * to put in the index, and is the only value that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * makes any sense if FSGSBASE is unavailable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (task == current) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) loadseg(GS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) x86_gsbase_write_cpu_inactive(arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * On non-FSGSBASE systems, save_base_legacy() expects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) * that we also fill in thread.gsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) task->thread.gsbase = arg2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) task->thread.gsindex = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) x86_gsbase_write_task(task, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) case ARCH_SET_FS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * Not strictly needed for %fs, but do it for symmetry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * with %gs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) if (unlikely(arg2 >= TASK_SIZE_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * Set the selector to 0 for the same reason
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * as %gs above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (task == current) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) loadseg(FS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) x86_fsbase_write_cpu(arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * On non-FSGSBASE systems, save_base_legacy() expects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * that we also fill in thread.fsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) task->thread.fsbase = arg2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) task->thread.fsindex = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) x86_fsbase_write_task(task, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) case ARCH_GET_FS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) unsigned long base = x86_fsbase_read_task(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) ret = put_user(base, (unsigned long __user *)arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) case ARCH_GET_GS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) unsigned long base = x86_gsbase_read_task(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) ret = put_user(base, (unsigned long __user *)arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) #ifdef CONFIG_CHECKPOINT_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) # ifdef CONFIG_X86_X32_ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) case ARCH_MAP_VDSO_X32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) return prctl_map_vdso(&vdso_image_x32, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) case ARCH_MAP_VDSO_32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) return prctl_map_vdso(&vdso_image_32, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) case ARCH_MAP_VDSO_64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) return prctl_map_vdso(&vdso_image_64, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) ret = do_arch_prctl_64(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (ret == -EINVAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) ret = do_arch_prctl_common(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) return do_arch_prctl_common(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) unsigned long KSTK_ESP(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return task_pt_regs(task)->sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }