Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *  Copyright (C) 1995  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *  Pentium III FXSR, SSE support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *	Gareth Hughes <gareth@valinux.com>, May 2000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *  X86-64 port
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *	Andi Kleen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *	CPU hotplug support - ashok.raj@intel.com
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * This file handles the architecture-dependent parts of process handling..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/sched/task_stack.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <linux/elfcore.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include <linux/user.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #include <linux/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) #include <linux/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) #include <linux/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #include <linux/ftrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) #include <asm/fpu/internal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) #include <asm/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) #include <asm/ia32.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) #include <asm/debugreg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) #include <asm/switch_to.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) #include <asm/xen/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) #include <asm/vdso.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) #include <asm/resctrl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) #include <asm/unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) #include <asm/fsgsbase.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) /* Not included via unistd.h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) #include <asm/unistd_32_ia32.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) #include "process.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) /* Prints also some state that isn't saved in the pt_regs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 		 const char *log_lvl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	unsigned long d0, d1, d2, d3, d6, d7;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	unsigned int fsindex, gsindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	unsigned int ds, es;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	show_iret_regs(regs, log_lvl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	if (regs->orig_ax != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	       log_lvl, regs->ax, regs->bx, regs->cx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	       log_lvl, regs->dx, regs->si, regs->di);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	printk("%sRBP: %016lx R08: %016lx R09: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	       log_lvl, regs->bp, regs->r8, regs->r9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	printk("%sR10: %016lx R11: %016lx R12: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	       log_lvl, regs->r10, regs->r11, regs->r12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	printk("%sR13: %016lx R14: %016lx R15: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	       log_lvl, regs->r13, regs->r14, regs->r15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	if (mode == SHOW_REGS_SHORT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	if (mode == SHOW_REGS_USER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 		rdmsrl(MSR_FS_BASE, fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		printk("%sFS:  %016lx GS:  %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		       log_lvl, fs, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	asm("movl %%ds,%0" : "=r" (ds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	asm("movl %%es,%0" : "=r" (es));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	asm("movl %%fs,%0" : "=r" (fsindex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	asm("movl %%gs,%0" : "=r" (gsindex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	rdmsrl(MSR_FS_BASE, fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	rdmsrl(MSR_GS_BASE, gs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	cr0 = read_cr0();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	cr2 = read_cr2();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	cr3 = __read_cr3();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	cr4 = __read_cr4();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	printk("%sFS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	       log_lvl, fs, fsindex, gs, gsindex, shadowgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	printk("%sCS:  %04lx DS: %04x ES: %04x CR0: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 		log_lvl, regs->cs, ds, es, cr0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		log_lvl, cr2, cr3, cr4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	get_debugreg(d0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	get_debugreg(d1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	get_debugreg(d2, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	get_debugreg(d3, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	get_debugreg(d6, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	get_debugreg(d7, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	/* Only print out debug registers if they are in their non-default state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 		       log_lvl, d0, d1, d2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		       log_lvl, d3, d6, d7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	if (boot_cpu_has(X86_FEATURE_OSPKE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 		printk("%sPKRU: %08x\n", log_lvl, read_pkru());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) void release_thread(struct task_struct *dead_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	WARN_ON(dead_task->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) enum which_selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	GS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)  * Out of line to be protected from kprobes and tracing. If this would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)  * traced or probed than any access to a per CPU variable happens with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)  * the wrong GS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)  * It is not used on Xen paravirt. When paravirt support is needed, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)  * needs to be renamed with native_ prefix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) static noinstr unsigned long __rdgsbase_inactive(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	if (!static_cpu_has(X86_FEATURE_XENPV)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 		native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		gsbase = rdgsbase();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 		native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 		instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 		rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)  * Out of line to be protected from kprobes and tracing. If this would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)  * traced or probed than any access to a per CPU variable happens with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)  * the wrong GS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)  * It is not used on Xen paravirt. When paravirt support is needed, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)  * needs to be renamed with native_ prefix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) static noinstr void __wrgsbase_inactive(unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	if (!static_cpu_has(X86_FEATURE_XENPV)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 		native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		wrgsbase(gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 		native_swapgs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 		instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)  * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)  * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)  * It's forcibly inlined because it'll generate better code and this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)  * is hot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static __always_inline void save_base_legacy(struct task_struct *prev_p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 					     unsigned short selector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 					     enum which_selector which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	if (likely(selector == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 		 * be the pre-existing saved base or it could be zero.  On AMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		 * (with X86_BUG_NULL_SEG), the segment base could be almost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 		 * anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 		 * This branch is very hot (it's hit twice on almost every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 		 * context switch between 64-bit programs), and avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 		 * the RDMSR helps a lot, so we just assume that whatever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 		 * value is already saved is correct.  This matches historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 		 * Linux behavior, so it won't break existing applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 		 * report that the base is zero, it needs to actually be zero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 		 * see the corresponding logic in load_seg_legacy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 		 * If the selector is 1, 2, or 3, then the base is zero on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 		 * !X86_BUG_NULL_SEG CPUs and could be anything on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 		 * has never attempted to preserve the base across context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 		 * switches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 		 * If selector > 3, then it refers to a real segment, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 		 * saving the base isn't necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 		if (which == FS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 			prev_p->thread.fsbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 			prev_p->thread.gsbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) static __always_inline void save_fsgs(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	savesegment(fs, task->thread.fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	savesegment(gs, task->thread.gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		 * If FSGSBASE is enabled, we can't make any useful guesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		 * about the base, and user code expects us to save the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		 * value.  Fortunately, reading the base directly is efficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		task->thread.fsbase = rdfsbase();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 		task->thread.gsbase = __rdgsbase_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		save_base_legacy(task, task->thread.fsindex, FS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 		save_base_legacy(task, task->thread.gsindex, GS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)  * While a process is running,current->thread.fsbase and current->thread.gsbase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)  * may not match the corresponding CPU registers (see save_base_legacy()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) void current_save_fsgs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	/* Interrupts need to be off for FSGSBASE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	save_fsgs(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) #if IS_ENABLED(CONFIG_KVM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) EXPORT_SYMBOL_GPL(current_save_fsgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) static __always_inline void loadseg(enum which_selector which,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 				    unsigned short sel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	if (which == FS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 		loadsegment(fs, sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 		load_gs_index(sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) static __always_inline void load_seg_legacy(unsigned short prev_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 					    unsigned long prev_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 					    unsigned short next_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 					    unsigned long next_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 					    enum which_selector which)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	if (likely(next_index <= 3)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 		 * The next task is using 64-bit TLS, is not using this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 		 * segment at all, or is having fun with arcane CPU features.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		if (next_base == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 			 * Nasty case: on AMD CPUs, we need to forcibly zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 			 * the base.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 				loadseg(which, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 				loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 				 * We could try to exhaustively detect cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 				 * under which we can skip the segment load,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 				 * but there's really only one case that matters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 				 * for performance: if both the previous and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 				 * next states are fully zeroed, we can skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 				 * the load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 				 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 				 * (This assumes that prev_base == 0 has no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 				 * false positives.  This is the case on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 				 * Intel-style CPUs.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 				if (likely(prev_index | next_index | prev_base))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 					loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 			if (prev_index != next_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 				loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 			wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 			       next_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 		 * The next task is using a real segment.  Loading the selector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		 * is sufficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 		loadseg(which, next_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 					      struct thread_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 		/* Update the FS and GS selectors if they could have changed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 		if (unlikely(prev->fsindex || next->fsindex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 			loadseg(FS, next->fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 		if (unlikely(prev->gsindex || next->gsindex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 			loadseg(GS, next->gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 		/* Update the bases. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		wrfsbase(next->fsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		__wrgsbase_inactive(next->gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		load_seg_legacy(prev->fsindex, prev->fsbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 				next->fsindex, next->fsbase, FS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 		load_seg_legacy(prev->gsindex, prev->gsbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 				next->gsindex, next->gsbase, GS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) unsigned long x86_fsgsbase_read_task(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 				     unsigned short selector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	unsigned short idx = selector >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	unsigned long base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	if (likely((selector & SEGMENT_TI_MASK) == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 		if (unlikely(idx >= GDT_ENTRIES))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 		 * There are no user segments in the GDT with nonzero bases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 		 * other than the TLS segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 		idx -= GDT_ENTRY_TLS_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		base = get_desc_base(&task->thread.tls_array[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) #ifdef CONFIG_MODIFY_LDT_SYSCALL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 		struct ldt_struct *ldt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 		 * If performance here mattered, we could protect the LDT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 		 * with RCU.  This is a slow path, though, so we can just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		 * take the mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		mutex_lock(&task->mm->context.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		ldt = task->mm->context.ldt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		if (unlikely(!ldt || idx >= ldt->nr_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 			base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			base = get_desc_base(ldt->entries + idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 		mutex_unlock(&task->mm->context.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 		base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 	return base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) unsigned long x86_gsbase_read_cpu_inactive(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 	unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 	if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 		local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 		gsbase = __rdgsbase_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 		local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 		rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 	return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 		local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 		__wrgsbase_inactive(gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 		local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) unsigned long x86_fsbase_read_task(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	unsigned long fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	if (task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 		fsbase = x86_fsbase_read_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 	else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 		 (task->thread.fsindex == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 		fsbase = task->thread.fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 	return fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) unsigned long x86_gsbase_read_task(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	unsigned long gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	if (task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 		gsbase = x86_gsbase_read_cpu_inactive();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 		 (task->thread.gsindex == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 		gsbase = task->thread.gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 		gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 	return gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	WARN_ON_ONCE(task == current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	task->thread.fsbase = fsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	WARN_ON_ONCE(task == current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	task->thread.gsbase = gsbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) start_thread_common(struct pt_regs *regs, unsigned long new_ip,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 		    unsigned long new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 	WARN_ON_ONCE(regs != current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 	if (static_cpu_has(X86_BUG_NULL_SEG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 		/* Loading zero below won't clear the base. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 		loadsegment(fs, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 		load_gs_index(__USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	loadsegment(fs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	loadsegment(es, _ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	loadsegment(ds, _ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 	load_gs_index(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	regs->ip		= new_ip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 	regs->sp		= new_sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 	regs->cs		= _cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	regs->ss		= _ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	regs->flags		= X86_EFLAGS_IF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	start_thread_common(regs, new_ip, new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 			    __USER_CS, __USER_DS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) EXPORT_SYMBOL_GPL(start_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	start_thread_common(regs, new_ip, new_sp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 			    test_thread_flag(TIF_X32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 			    ? __USER_CS : __USER32_CS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 			    __USER_DS, __USER_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)  *	switch_to(x,y) should switch tasks from x to y.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)  * This could still be optimized:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)  * - fold all the options into a flag word and test it with a single test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)  * - could test fs/gs bitsliced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)  * Kprobes not supported here. Set the probe on schedule instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)  * Function graph tracer not supported too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) __visible __notrace_funcgraph struct task_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 	struct thread_struct *prev = &prev_p->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	struct thread_struct *next = &next_p->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 	int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 		     this_cpu_read(irq_count) != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 		switch_fpu_prepare(prev_p, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 	/* We must save %fs and %gs before load_TLS() because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	 * %fs and %gs may be cleared by load_TLS().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	 * (e.g. xen_load_tls())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 	save_fsgs(prev_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 	 * Load TLS before restoring any segments so that segment loads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	 * reference the correct GDT entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 	load_TLS(next, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 	 * Leave lazy mode, flushing any hypercalls made here.  This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	 * must be done after loading TLS entries in the GDT but before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 	 * loading segments that might reference them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 	arch_end_context_switch(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	/* Switch DS and ES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 	 * Reading them only returns the selectors, but writing them (if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	 * nonzero) loads the full descriptor from the GDT or LDT.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	 * LDT for next is loaded in switch_mm, and the GDT is loaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 	 * above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	 * We therefore need to write new values to the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	 * registers on every context switch unless both the new and old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	 * values are zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 	 * Note that we don't need to do anything for CS and SS, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	 * those are saved and restored as part of pt_regs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 	savesegment(es, prev->es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	if (unlikely(next->es | prev->es))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 		loadsegment(es, next->es);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	savesegment(ds, prev->ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 	if (unlikely(next->ds | prev->ds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 		loadsegment(ds, next->ds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 	x86_fsgsbase_load(prev, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 	 * Switch the PDA and FPU contexts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 	this_cpu_write(current_task, next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 	switch_fpu_finish(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	/* Reload sp0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 	update_task_stack(next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 	switch_to_extra(prev_p, next_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 		 * does not update the cached descriptor.  As a result, if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 		 * do SYSRET while SS is NULL, we'll end up in user mode with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 		 * SS apparently equal to __USER_DS but actually unusable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 		 * The straightforward workaround would be to fix it up just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 		 * before SYSRET, but that would slow down the system call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 		 * fast paths.  Instead, we ensure that SS is never NULL in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 		 * system call context.  We do this by replacing NULL SS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 		 * selectors at every context switch.  SYSCALL sets up a valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 		 * SS, so the only way to get NULL is to re-enter the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 		 * from CPL 3 through an interrupt.  Since that can't happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 		 * in the same task as a running syscall, we are guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 		 * context switch between every interrupt vector entry and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 		 * subsequent SYSRET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 		 * We read SS first because SS reads are much faster than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 		 * it previously had a different non-NULL value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 		unsigned short ss_sel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 		savesegment(ss, ss_sel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 		if (ss_sel != __KERNEL_DS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 			loadsegment(ss, __KERNEL_DS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 	/* Load the Intel cache allocation PQR MSR. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 	resctrl_sched_in();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 	return prev_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) void set_personality_64bit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	/* inherit personality from parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 	/* Make sure to be in 64bit mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 	clear_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 	clear_thread_flag(TIF_ADDR32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) 	clear_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	/* Pretend that this comes from a 64bit execve */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	task_pt_regs(current)->orig_ax = __NR_execve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 	current_thread_info()->status &= ~TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 	/* Ensure the corresponding mm is not marked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 	if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 		current->mm->context.ia32_compat = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 	/* TBD: overwrites user setup. Should have two bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) 	   But 64bit processes have always behaved this way,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) 	   so it's not too bad. The main problem is just that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 	   32bit children are affected again. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) 	current->personality &= ~READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void __set_personality_x32(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) #ifdef CONFIG_X86_X32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 	clear_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 	set_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 	if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 		current->mm->context.ia32_compat = TIF_X32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) 	current->personality &= ~READ_IMPLIES_EXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) 	 * in_32bit_syscall() uses the presence of the x32 syscall bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) 	 * flag to determine compat status.  The x86 mmap() code relies on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) 	 * the syscall bitness so set x32 syscall bit right here to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 	 * in_32bit_syscall() work during exec().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) 	 * Pretend to come from a x32 execve.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) 	current_thread_info()->status &= ~TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) static void __set_personality_ia32(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 	set_thread_flag(TIF_IA32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 	clear_thread_flag(TIF_X32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) 	if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) 		current->mm->context.ia32_compat = TIF_IA32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) 	current->personality |= force_personality32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) 	/* Prepare the first "return" to user space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) 	current_thread_info()->status |= TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) void set_personality_ia32(bool x32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 	/* Make sure to be in 32bit mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) 	set_thread_flag(TIF_ADDR32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) 	if (x32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) 		__set_personality_x32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 		__set_personality_ia32();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) EXPORT_SYMBOL_GPL(set_personality_ia32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) #ifdef CONFIG_CHECKPOINT_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) 	ret = map_vdso_once(image, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) 	return (long)image->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) 	switch (option) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) 	case ARCH_SET_GS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) 		if (unlikely(arg2 >= TASK_SIZE_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) 			return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) 		preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) 		 * ARCH_SET_GS has always overwritten the index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) 		 * and the base. Zero is the most sensible value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) 		 * to put in the index, and is the only value that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) 		 * makes any sense if FSGSBASE is unavailable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) 		if (task == current) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) 			loadseg(GS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) 			x86_gsbase_write_cpu_inactive(arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) 			 * On non-FSGSBASE systems, save_base_legacy() expects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) 			 * that we also fill in thread.gsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) 			task->thread.gsbase = arg2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) 			task->thread.gsindex = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) 			x86_gsbase_write_task(task, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) 		preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) 	case ARCH_SET_FS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) 		 * Not strictly needed for %fs, but do it for symmetry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) 		 * with %gs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) 		if (unlikely(arg2 >= TASK_SIZE_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) 			return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) 		preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) 		 * Set the selector to 0 for the same reason
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) 		 * as %gs above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) 		if (task == current) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) 			loadseg(FS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) 			x86_fsbase_write_cpu(arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) 			 * On non-FSGSBASE systems, save_base_legacy() expects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) 			 * that we also fill in thread.fsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) 			task->thread.fsbase = arg2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) 			task->thread.fsindex = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) 			x86_fsbase_write_task(task, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) 		preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) 	case ARCH_GET_FS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 		unsigned long base = x86_fsbase_read_task(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) 		ret = put_user(base, (unsigned long __user *)arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) 	case ARCH_GET_GS: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) 		unsigned long base = x86_gsbase_read_task(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) 		ret = put_user(base, (unsigned long __user *)arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) #ifdef CONFIG_CHECKPOINT_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) # ifdef CONFIG_X86_X32_ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) 	case ARCH_MAP_VDSO_X32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) 		return prctl_map_vdso(&vdso_image_x32, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) 	case ARCH_MAP_VDSO_32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) 		return prctl_map_vdso(&vdso_image_32, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) 	case ARCH_MAP_VDSO_64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) 		return prctl_map_vdso(&vdso_image_64, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) 	ret = do_arch_prctl_64(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) 	if (ret == -EINVAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) 		ret = do_arch_prctl_common(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) #ifdef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) 	return do_arch_prctl_common(current, option, arg2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) unsigned long KSTK_ESP(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) 	return task_pt_regs(task)->sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }