Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *  linux/arch/x86_64/entry.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *  Copyright (C) 1991, 1992  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * entry.S contains the system-call and fault low-level handling routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * Some of this is documented in Documentation/x86/entry_64.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * A note on terminology:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  * - iret frame:	Architecture defined interrupt frame from SS to RIP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *			at the top of the kernel process stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  * Some macro usage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  * - SYM_FUNC_START/END:Define functions in the symbol table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  * - idtentry:		Define exception entry points.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <asm/segment.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <asm/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <asm/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <asm/msr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <asm/unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <asm/thread_info.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <asm/hw_irq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <asm/page_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <asm/irqflags.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <asm/paravirt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <asm/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <asm/asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <asm/smap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <asm/pgtable_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <asm/trapnr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <asm/nospec-branch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <asm/fsgsbase.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include "calling.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) .code64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) .section .entry.text, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #ifdef CONFIG_PARAVIRT_XXL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) SYM_CODE_START(native_usergs_sysret64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	sysretq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) SYM_CODE_END(native_usergs_sysret64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #endif /* CONFIG_PARAVIRT_XXL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * This is the only entry point used for 64-bit system calls.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * hardware interface is reasonably well designed and the register to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  * argument mapping Linux uses fits well with the registers that are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * available when SYSCALL is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * SYSCALL instructions can be found inlined in libc implementations as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  * well as some other programs and libraries.  There are also a handful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  * of SYSCALL instructions in the vDSO used, for example, as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * clock_gettimeofday fallback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70)  * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71)  * then loads new ss, cs, and rip from previously programmed MSRs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72)  * rflags gets masked by a value from another MSR (so CLD and CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73)  * are not needed). SYSCALL does not save anything on the stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74)  * and does not change rsp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76)  * Registers on entry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77)  * rax  system call number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78)  * rcx  return address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79)  * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80)  * rdi  arg0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81)  * rsi  arg1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82)  * rdx  arg2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83)  * r10  arg3 (needs to be moved to rcx to conform to C ABI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84)  * r8   arg4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85)  * r9   arg5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86)  * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88)  * Only called from user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90)  * When user can change pt_regs->foo always force IRET. That is because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91)  * it deals with uncanonical addresses better. SYSRET has trouble
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92)  * with them due to bugs in both AMD and Intel CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) SYM_CODE_START(entry_SYSCALL_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	/* tss.sp2 is scratch space. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	movq	%rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	/* Construct struct pt_regs on stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	pushq	$__USER_DS				/* pt_regs->ss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	pushq	PER_CPU_VAR(cpu_tss_rw + TSS_sp2)	/* pt_regs->sp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	pushq	%r11					/* pt_regs->flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	pushq	$__USER_CS				/* pt_regs->cs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	pushq	%rcx					/* pt_regs->ip */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	pushq	%rax					/* pt_regs->orig_ax */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	/* IRQs are off. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	movq	%rax, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	movq	%rsp, %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	call	do_syscall_64		/* returns with IRQs disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 * Try to use SYSRET instead of IRET if we're returning to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * a completely clean 64-bit userspace context.  If we're not,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 * go to the slow exit path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	movq	RCX(%rsp), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	movq	RIP(%rsp), %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	jne	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	 * in kernel space.  This essentially lets the user take over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	 * the kernel, since userspace controls RSP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	 * If width of "canonical tail" ever becomes variable, this will need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	 * to be updated to remain correct on both old and new CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	 * Change top bits to match most significant bit (47th or 56th bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	 * depending on paging mode) in the address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) #ifdef CONFIG_X86_5LEVEL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	/* If this changed %rcx, it was not canonical */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	cmpq	%rcx, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	jne	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	jne	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	movq	R11(%rsp), %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	jne	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	 * restore RF properly. If the slowpath sets it for whatever reason, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	 * need to restore it correctly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	 * trap from userspace immediately after SYSRET.  This would cause an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	 * infinite loop whenever #DB happens with register state that satisfies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	 * the opportunistic SYSRET conditions.  For example, single-stepping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	 * this user code:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	 *           movq	$stuck_here, %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	 *           pushfq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	 *           popq %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	 *   stuck_here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	 * would never get past 'stuck_here'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	jnz	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	/* nothing to check for RSP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	jne	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	 * We win! This label is here just for ease of understanding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	 * perf profiles. Nothing jumps here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) syscall_return_via_sysret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	/* rcx and r11 are already restored (see code above) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	POP_REGS pop_rdi=0 skip_r11rcx=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	 * Now all regs are restored except RSP and RDI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	 * Save old stack pointer and switch to trampoline stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	movq	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	pushq	RSP-RDI(%rdi)	/* RSP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	pushq	(%rdi)		/* RDI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	 * We are on the trampoline stack.  All regs except RDI are live.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	 * We can do future final exit work right here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	STACKLEAK_ERASE_NOCLOBBER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	popq	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	popq	%rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	USERGS_SYSRET64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) SYM_CODE_END(entry_SYSCALL_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  * %rdi: prev task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  * %rsi: next task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) .pushsection .text, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) SYM_FUNC_START(__switch_to_asm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	 * Save callee-saved registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	 * This must match the order in inactive_task_frame
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	pushq	%rbp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	pushq	%rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	pushq	%r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	pushq	%r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	pushq	%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	pushq	%r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	/* switch stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	movq	%rsp, TASK_threadsp(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	movq	TASK_threadsp(%rsi), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) #ifdef CONFIG_STACKPROTECTOR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	movq	TASK_stack_canary(%rsi), %rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) #ifdef CONFIG_RETPOLINE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	 * When switching from a shallower to a deeper call stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	 * the RSB may either underflow or use entries populated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	 * with userspace addresses. On CPUs where those concerns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	 * exist, overwrite the RSB with entries which capture
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	 * speculative execution to prevent attack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	/* restore callee-saved registers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	popq	%r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	popq	%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	popq	%r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	popq	%r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	popq	%rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	popq	%rbp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	jmp	__switch_to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) SYM_FUNC_END(__switch_to_asm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) .popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271)  * A newly forked process directly context switches into this address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273)  * rax: prev task we switched from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274)  * rbx: kernel thread func (NULL for user thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275)  * r12: kernel thread arg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) .pushsection .text, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) SYM_CODE_START(ret_from_fork)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	movq	%rax, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	call	schedule_tail			/* rdi: 'prev' task parameter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	testq	%rbx, %rbx			/* from kernel_thread? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	jnz	1f				/* kernel threads are uncommon */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	movq	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	call	syscall_exit_to_user_mode	/* returns with IRQs disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	jmp	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	/* kernel thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	movq	%r12, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	CALL_NOSPEC rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	 * A kernel thread is allowed to return here after successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 * calling kernel_execve().  Exit to userspace to complete the execve()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	 * syscall.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	movq	$0, RAX(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	jmp	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) SYM_CODE_END(ret_from_fork)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) .popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) #ifdef CONFIG_DEBUG_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	pushq %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	SAVE_FLAGS(CLBR_RAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	testl $X86_EFLAGS_IF, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	jz .Lokay_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	ud2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) .Lokay_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	popq %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320)  * idtentry_body - Macro to emit code calling the C function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321)  * @cfunc:		C function to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322)  * @has_error_code:	Hardware pushed error code on stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) .macro idtentry_body cfunc has_error_code:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	call	error_entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	movq	%rsp, %rdi			/* pt_regs pointer into 1st argument*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	.if \has_error_code == 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 		movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	.endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	call	\cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	jmp	error_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342)  * idtentry - Macro to generate entry stubs for simple IDT entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343)  * @vector:		Vector number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344)  * @asmsym:		ASM symbol for the entry point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345)  * @cfunc:		C function to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346)  * @has_error_code:	Hardware pushed error code on stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  * The macro emits code to set up the kernel context for straight forward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349)  * and simple IDT entries. No IST stack, no paranoid entry checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) .macro idtentry vector asmsym cfunc has_error_code:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) SYM_CODE_START(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	ASM_CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	.if \has_error_code == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 		pushq	$-1			/* ORIG_RAX: no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	.endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	.if \vector == X86_TRAP_BP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 		 * If coming from kernel space, create a 6-word gap to allow the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 		 * int3 handler to emulate a call instruction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		testb	$3, CS-ORIG_RAX(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		jnz	.Lfrom_usermode_no_gap_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		.rept	6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		pushq	5*8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		.endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		UNWIND_HINT_IRET_REGS offset=8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) .Lfrom_usermode_no_gap_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	.endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	idtentry_body \cfunc \has_error_code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) _ASM_NOKPROBE(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) SYM_CODE_END(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381)  * Interrupt entry/exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383)  + The interrupt stubs push (vector) onto the stack, which is the error_code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384)  * position of idtentry exceptions, and jump to one of the two idtentry points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  * (common/spurious).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387)  * common_interrupt is a hotpath, align it to a cache line
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) .macro idtentry_irq vector cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	.p2align CONFIG_X86_L1_CACHE_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	idtentry \vector asm_\cfunc \cfunc has_error_code=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395)  * System vectors which invoke their handlers directly and are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396)  * going through the regular common device interrupt handling code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) .macro idtentry_sysvec vector cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	idtentry \vector asm_\cfunc \cfunc has_error_code=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403)  * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404)  * @vector:		Vector number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405)  * @asmsym:		ASM symbol for the entry point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406)  * @cfunc:		C function to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408)  * The macro emits code to set up the kernel context for #MC and #DB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410)  * If the entry comes from user space it uses the normal entry path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411)  * including the return to user space work and preemption checks on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412)  * exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414)  * If hits in kernel mode then it needs to go through the paranoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415)  * entry as the exception can hit any random state. No preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416)  * check on exit to keep the paranoid path simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) .macro idtentry_mce_db vector asmsym cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) SYM_CODE_START(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	ASM_CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	pushq	$-1			/* ORIG_RAX: no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	 * If the entry is from userspace, switch stacks and treat it as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	 * a normal entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	testb	$3, CS-ORIG_RAX(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	jnz	.Lfrom_usermode_switch_stack_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	/* paranoid_entry returns GS information for paranoid_exit in EBX. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	call	paranoid_entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	movq	%rsp, %rdi		/* pt_regs pointer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	call	\cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	jmp	paranoid_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	/* Switch to the regular task stack and use the noist entry point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) .Lfrom_usermode_switch_stack_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	idtentry_body noist_\cfunc, has_error_code=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) _ASM_NOKPROBE(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) SYM_CODE_END(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) #ifdef CONFIG_AMD_MEM_ENCRYPT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453)  * idtentry_vc - Macro to generate entry stub for #VC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454)  * @vector:		Vector number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455)  * @asmsym:		ASM symbol for the entry point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456)  * @cfunc:		C function to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458)  * The macro emits code to set up the kernel context for #VC. The #VC handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459)  * runs on an IST stack and needs to be able to cause nested #VC exceptions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461)  * To make this work the #VC entry code tries its best to pretend it doesn't use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462)  * an IST stack by switching to the task stack if coming from user-space (which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463)  * includes early SYSCALL entry path) or back to the stack in the IRET frame if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464)  * entered from kernel-mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466)  * If entered from kernel-mode the return stack is validated first, and if it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * not safe to use (e.g. because it points to the entry stack) the #VC handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * will switch to a fall-back stack (VC2) and call a special handler function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  * The macro is only used for one vector, but it is planned to be extended in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  * the future for the #HV exception.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) .macro idtentry_vc vector asmsym cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) SYM_CODE_START(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	ASM_CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	 * If the entry is from userspace, switch stacks and treat it as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	 * a normal entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	testb	$3, CS-ORIG_RAX(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	jnz	.Lfrom_usermode_switch_stack_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	call	paranoid_entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	 * Switch off the IST stack to make it free for nested exceptions. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	 * vc_switch_off_ist() function will switch back to the interrupted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	 * stack if it is safe to do so. If not it switches to the VC fall-back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	 * stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	movq	%rsp, %rdi		/* pt_regs pointer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	call	vc_switch_off_ist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	movq	%rax, %rsp		/* Switch to new stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	/* Update pt_regs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	movq	%rsp, %rdi		/* pt_regs pointer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	call	kernel_\cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	 * No need to switch back to the IST stack. The current stack is either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	 * identical to the stack in the IRET frame or the VC fall-back stack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	 * so it is definitly mapped even with PTI enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	jmp	paranoid_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	/* Switch to the regular task stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) .Lfrom_usermode_switch_stack_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	idtentry_body user_\cfunc, has_error_code=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) _ASM_NOKPROBE(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) SYM_CODE_END(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530)  * Double fault entry. Straight paranoid. No checks from which context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531)  * this comes because for the espfix induced #DF this would do the wrong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532)  * thing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) .macro idtentry_df vector asmsym cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) SYM_CODE_START(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	UNWIND_HINT_IRET_REGS offset=8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	ASM_CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	/* paranoid_entry returns GS information for paranoid_exit in EBX. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	call	paranoid_entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	movq	%rsp, %rdi		/* pt_regs pointer into first argument */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	call	\cfunc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	jmp	paranoid_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) _ASM_NOKPROBE(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) SYM_CODE_END(\asmsym)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555)  * Include the defines which emit the idt entries which are shared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556)  * shared between 32 and 64 bit and emit the __irqentry_text_* markers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557)  * so the stacktrace boundary checks work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	.align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	.globl __irqentry_text_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) __irqentry_text_start:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) #include <asm/idtentry.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	.align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	.globl __irqentry_text_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) __irqentry_text_end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) SYM_CODE_START_LOCAL(common_interrupt_return)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) #ifdef CONFIG_DEBUG_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	/* Assert that pt_regs indicates user mode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	testb	$3, CS(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	jnz	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	ud2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) #ifdef CONFIG_XEN_PV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	POP_REGS pop_rdi=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	 * Save old stack pointer and switch to trampoline stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	movq	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	/* Copy the IRET frame to the trampoline stack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	pushq	6*8(%rdi)	/* SS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	pushq	5*8(%rdi)	/* RSP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	pushq	4*8(%rdi)	/* EFLAGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	pushq	3*8(%rdi)	/* CS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	pushq	2*8(%rdi)	/* RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	/* Push user RDI on the trampoline stack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	pushq	(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	 * We are on the trampoline stack.  All regs except RDI are live.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	 * We can do future final exit work right here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	STACKLEAK_ERASE_NOCLOBBER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	/* Restore RDI. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	popq	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	SWAPGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	INTERRUPT_RETURN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) #ifdef CONFIG_DEBUG_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	/* Assert that pt_regs indicates kernel mode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	testb	$3, CS(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	jz	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	ud2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	POP_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	addq	$8, %rsp	/* skip regs->orig_ax */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	 * when returning from IPI handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	INTERRUPT_RETURN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	 * Are we returning to a stack segment from the LDT?  Note: in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	 * 64-bit mode SS:RSP on the exception stack is always valid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) #ifdef CONFIG_X86_ESPFIX64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	testb	$4, (SS-RIP)(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	jnz	native_irq_return_ldt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	 * This may fault.  Non-paranoid faults on return to userspace are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	 * handled by fixup_bad_iret.  These include #SS, #GP, and #NP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	 * Double-faults due to espfix64 are handled in exc_double_fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	 * Other faults here are fatal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	iretq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) #ifdef CONFIG_X86_ESPFIX64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) native_irq_return_ldt:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	 * We are running with user GSBASE.  All GPRs contain their user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	 * values.  We have a percpu ESPFIX stack that is eight slots
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	 * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	 * of the ESPFIX stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	 * We clobber RAX and RDI in this code.  We stash RDI on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	 * normal stack and RAX on the ESPFIX stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	 * The ESPFIX stack layout we set up looks like this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	 * --- top of ESPFIX stack ---
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	 * SS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	 * RSP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	 * RFLAGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	 * CS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	 * RIP  <-- RSP points here when we're done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	 * RAX  <-- espfix_waddr points here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	 * --- bottom of ESPFIX stack ---
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	pushq	%rdi				/* Stash user RDI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	swapgs					/* to kernel GS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	movq	PER_CPU_VAR(espfix_waddr), %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	movq	%rax, (0*8)(%rdi)		/* user RAX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	movq	(1*8)(%rsp), %rax		/* user RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	movq	%rax, (1*8)(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	movq	(2*8)(%rsp), %rax		/* user CS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	movq	%rax, (2*8)(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	movq	(3*8)(%rsp), %rax		/* user RFLAGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	movq	%rax, (3*8)(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	movq	(5*8)(%rsp), %rax		/* user SS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	movq	%rax, (5*8)(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	movq	(4*8)(%rsp), %rax		/* user RSP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	movq	%rax, (4*8)(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	/* Now RAX == RSP. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	 * espfix_stack[31:16] == 0.  The page tables are set up such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	 * the same page.  Set up RSP so that RSP[31:16] contains the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	 * still points to an RO alias of the ESPFIX stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	orq	PER_CPU_VAR(espfix_stack), %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	swapgs					/* to user GS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	popq	%rdi				/* Restore user RDI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	movq	%rax, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	UNWIND_HINT_IRET_REGS offset=8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	 * At this point, we cannot write to the stack any more, but we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	 * still read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	popq	%rax				/* Restore user RAX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	 * RSP now points to an ordinary IRET frame, except that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	 * is read-only and RSP[31:16] are preloaded with the userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	 * values.  We can now IRET back to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	jmp	native_irq_return_iret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) SYM_CODE_END(common_interrupt_return)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) _ASM_NOKPROBE(common_interrupt_return)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729)  * Reload gs selector with exception handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730)  * edi:  new selector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732)  * Is in entry.text as it shouldn't be instrumented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) SYM_FUNC_START(asm_load_gs_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) .Lgs_change:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	movl	%edi, %gs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) SYM_FUNC_END(asm_load_gs_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) EXPORT_SYMBOL(asm_load_gs_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	.section .fixup, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	/* running with kernelgs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	swapgs					/* switch back to user gs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) .macro ZAP_GS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	/* This can't be a string because the preprocessor needs to see it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	movl $__USER_DS, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	movl %eax, %gs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	xorl	%eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	movl	%eax, %gs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	jmp	2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) SYM_CODE_END(.Lbad_gs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	.previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764)  * rdi: New stack pointer points to the top word of the stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765)  * rsi: Function pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766)  * rdx: Function argument (can be NULL if none)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) SYM_FUNC_START(asm_call_on_stack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	 * Save the frame pointer unconditionally. This allows the ORC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	 * unwinder to handle the stack switch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	pushq		%rbp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	mov		%rsp, %rbp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	 * The unwinder relies on the word at the top of the new stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	 * page linking back to the previous RSP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	mov		%rsp, (%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	mov		%rdi, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	/* Move the argument to the right place */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	mov		%rdx, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	.pushsection .discard.instr_begin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	.long 1b - .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	.popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	CALL_NOSPEC	rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	.pushsection .discard.instr_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	.long 2b - .
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	.popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	/* Restore the previous stack pointer from RBP. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	leaveq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) SYM_FUNC_END(asm_call_on_stack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) #ifdef CONFIG_XEN_PV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806)  * A note on the "critical region" in our callback handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  * We want to avoid stacking callback handlers due to events occurring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * during handling of the last event. To do this, we keep events disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * until we've done all processing. HOWEVER, we must enable events before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * popping the stack frame (can't be done atomically) and so it would still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  * be possible to get enough handler activations to overflow the stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * Although unlikely, bugs of that kind are hard to track down, so we'd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  * like to avoid the possibility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  * So, on entry to the handler we detect whether we interrupted an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  * existing activation in its critical region -- if so, we pop the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816)  * activation and restart the handler using the previous one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818)  * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823)  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824)  * see the correct pointer to the pt_regs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	UNWIND_HINT_FUNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	call	xen_pv_evtchn_do_upcall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	jmp	error_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) SYM_CODE_END(exc_xen_hypervisor_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836)  * Hypervisor uses this for application faults while it executes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837)  * We get here for two reasons:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838)  *  1. Fault while reloading DS, ES, FS or GS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839)  *  2. Fault while executing IRET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840)  * Category 1 we do not need to fix up as Xen has already reloaded all segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841)  * registers that could be reloaded and zeroed the others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842)  * Category 2 we fix up by killing the current process. We cannot use the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843)  * normal Linux return path in this case because if we use the IRET hypercall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  * We distinguish between categories by comparing each saved segment register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846)  * with its current contents: any discrepancy means we in category 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) SYM_CODE_START(xen_failsafe_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	movl	%ds, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	cmpw	%cx, 0x10(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	jne	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	movl	%es, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	cmpw	%cx, 0x18(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	jne	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	movl	%fs, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	cmpw	%cx, 0x20(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	jne	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	movl	%gs, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	cmpw	%cx, 0x28(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	jne	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	/* All segments match their saved values => Category 2 (Bad IRET). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	movq	(%rsp), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	movq	8(%rsp), %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	addq	$0x30, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	pushq	$0				/* RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	UNWIND_HINT_IRET_REGS offset=8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	jmp	asm_exc_general_protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	movq	(%rsp), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	movq	8(%rsp), %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	addq	$0x30, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	pushq	$-1 /* orig_ax = -1 => not a system call */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	PUSH_AND_CLEAR_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	ENCODE_FRAME_POINTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	jmp	error_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) SYM_CODE_END(xen_failsafe_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) #endif /* CONFIG_XEN_PV */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882)  * Save all registers in pt_regs. Return GSBASE related information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883)  * in EBX depending on the availability of the FSGSBASE instructions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885)  * FSGSBASE	R/EBX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886)  *     N        0 -> SWAPGS on exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887)  *              1 -> no SWAPGS on exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889)  *     Y        GSBASE value at entry, must be restored in paranoid_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) SYM_CODE_START_LOCAL(paranoid_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	UNWIND_HINT_FUNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	cld
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	PUSH_AND_CLEAR_REGS save_ret=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	ENCODE_FRAME_POINTER 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	 * Always stash CR3 in %r14.  This value will be restored,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	 * verbatim, at exit.  Needed if paranoid_entry interrupted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	 * another entry that already switched to the user CR3 value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	 * but has not yet returned to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	 * This is also why CS (stashed in the "iret frame" by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	 * hardware at entry) can not be used: this may be a return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	 * to kernel code, but with a user CR3 value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	 * Switching CR3 does not depend on kernel GSBASE so it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	 * be done before switching to the kernel GSBASE. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	 * required for FSGSBASE because the kernel GSBASE has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	 * be retrieved from a kernel internal table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	 * Handling GSBASE depends on the availability of FSGSBASE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	 * Without FSGSBASE the kernel enforces that negative GSBASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	 * can be made about the GSBASE value when entering from user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	 * space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	 * Read the current GSBASE and store it in %rbx unconditionally,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	 * retrieve and set the current CPUs kernel GSBASE. The stored value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	 * has to be restored in paranoid_exit unconditionally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	 * The unconditional write to GS base below ensures that no subsequent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	 * is needed here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) .Lparanoid_entry_checkgs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	/* EBX = 1 -> kernel GSBASE active, no restore required */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	movl	$1, %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	 * The kernel-enforced convention is a negative GSBASE indicates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	 * a kernel value. No SWAPGS needed on entry and exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	movl	$MSR_GS_BASE, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	rdmsr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	testl	%edx, %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	js	.Lparanoid_kernel_gsbase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	/* EBX = 0 -> SWAPGS required on exit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	xorl	%ebx, %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) .Lparanoid_kernel_gsbase:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	FENCE_SWAPGS_KERNEL_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) SYM_CODE_END(paranoid_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959)  * "Paranoid" exit path from exception stack.  This is invoked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960)  * only on return from non-NMI IST interrupts that came
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961)  * from kernel space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963)  * We may be returning to very strange contexts (e.g. very early
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964)  * in syscall entry), so checking for preemption here would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965)  * be complicated.  Fortunately, there's no good reason to try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966)  * to handle preemption here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968)  * R/EBX contains the GSBASE related information depending on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969)  * availability of the FSGSBASE instructions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971)  * FSGSBASE	R/EBX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972)  *     N        0 -> SWAPGS on exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973)  *              1 -> no SWAPGS on exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975)  *     Y        User space GSBASE, must be restored unconditionally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) SYM_CODE_START_LOCAL(paranoid_exit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	 * The order of operations is important. RESTORE_CR3 requires
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	 * kernel GSBASE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	 * NB to anyone to try to optimize this code: this code does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	 * not execute at all for exceptions from user mode. Those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	 * exceptions go through error_exit instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	/* Handle the three GSBASE cases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	/* With FSGSBASE enabled, unconditionally restore GSBASE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	wrgsbase	%rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	jmp		restore_regs_and_return_to_kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) .Lparanoid_exit_checkgs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	/* On non-FSGSBASE systems, conditionally do SWAPGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	testl		%ebx, %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	jnz		restore_regs_and_return_to_kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	/* We are returning to a context with user GSBASE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	jmp		restore_regs_and_return_to_kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) SYM_CODE_END(paranoid_exit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)  * Save all registers in pt_regs, and switch GS if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) SYM_CODE_START_LOCAL(error_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	UNWIND_HINT_FUNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	cld
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	PUSH_AND_CLEAR_REGS save_ret=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	ENCODE_FRAME_POINTER 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	testb	$3, CS+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	jz	.Lerror_kernelspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	 * We entered from user mode or we're pretending to have entered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	 * from user mode due to an IRET fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	SWAPGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	FENCE_SWAPGS_USER_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	/* We have user CR3.  Change to kernel CR3. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) .Lerror_entry_from_usermode_after_swapgs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	/* Put us onto the real thread stack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	popq	%r12				/* save return addr in %12 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	call	sync_regs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	movq	%rax, %rsp			/* switch stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	ENCODE_FRAME_POINTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	pushq	%r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	 * There are two places in the kernel that can potentially fault with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	 * usergs. Handle them here.  B stepping K8s sometimes report a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	 * truncated RIP for IRET exceptions returning to compat mode. Check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	 * for these here too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) .Lerror_kernelspace:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	leaq	native_irq_return_iret(%rip), %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	cmpq	%rcx, RIP+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	je	.Lerror_bad_iret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	movl	%ecx, %eax			/* zero extend */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	cmpq	%rax, RIP+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	je	.Lbstep_iret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	cmpq	$.Lgs_change, RIP+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	jne	.Lerror_entry_done_lfence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	 * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	 * gsbase and proceed.  We'll fix up the exception and land in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	 * .Lgs_change's error handler with kernel gsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	SWAPGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	 * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	 * kernel or user gsbase.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) .Lerror_entry_done_lfence:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	FENCE_SWAPGS_KERNEL_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) .Lbstep_iret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	/* Fix truncated RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	movq	%rcx, RIP+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	/* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) .Lerror_bad_iret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	 * We came from an IRET to user mode, so we have user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	 * gsbase and CR3.  Switch to kernel gsbase and CR3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	SWAPGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	FENCE_SWAPGS_USER_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	 * Pretend that the exception came from user mode: set up pt_regs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	 * as if we faulted immediately after IRET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	mov	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	call	fixup_bad_iret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	mov	%rax, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	jmp	.Lerror_entry_from_usermode_after_swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) SYM_CODE_END(error_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) SYM_CODE_START_LOCAL(error_return)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	DEBUG_ENTRY_ASSERT_IRQS_OFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	testb	$3, CS(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	jz	restore_regs_and_return_to_kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	jmp	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) SYM_CODE_END(error_return)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)  * Runs on exception stack.  Xen PV does not go through this path at all,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)  * so we can use real assembly here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)  * Registers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)  *	%r14: Used to save/restore the CR3 of the interrupted context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)  *	      when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) SYM_CODE_START(asm_exc_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	 * the iretq it performs will take us out of NMI context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	 * This means that we can have nested NMIs where the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	 * NMI is using the top of the stack of the previous NMI. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	 * can't let it execute because the nested NMI will corrupt the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	 * stack of the previous NMI. NMI handlers are not re-entrant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	 * anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	 * To handle this case we do the following:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	 *  Check the a special location on the stack that contains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	 *  a variable that is set when NMIs are executing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	 *  The interrupted task's stack is also checked to see if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	 *  is an NMI stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	 *  If the variable is not set and the stack is not the NMI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	 *  stack then:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	 *    o Set the special variable on the stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	 *    o Copy the interrupt frame into an "outermost" location on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	 *      stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	 *    o Copy the interrupt frame into an "iret" location on the stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	 *    o Continue processing the NMI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	 *  If the variable is set or the previous stack is the NMI stack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	 *    o Modify the "iret" location to jump to the repeat_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	 *    o return back to the first NMI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	 * Now on exit of the first NMI, we first clear the stack variable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	 * The NMI stack will tell any nested NMIs at that point that it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	 * nested. Then we pop the stack normally with iret, and if there was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	 * a nested NMI that updated the copy interrupt stack frame, a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	 * jump will be made to the repeat_nmi code that will handle the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	 * NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	 * However, espfix prevents us from directly returning to userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	 * with a single IRET instruction.  Similarly, IRET to user mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	 * can fault.  We therefore handle NMIs from user space like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	 * other IST entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	ASM_CLAC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	/* Use %rdx as our temp variable throughout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	pushq	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	testb	$3, CS-RIP+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	jz	.Lnmi_from_kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	 * NMI from user mode.  We need to run on the thread stack, but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	 * can't go through the normal entry paths: NMIs are masked, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	 * we don't want to enable interrupts, because then we'll end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	 * up in an awkward situation in which IRQs are on but NMIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	 * are off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	 * We also must not push anything to the stack before switching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	 * stacks lest we corrupt the "NMI executing" variable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	cld
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	FENCE_SWAPGS_USER_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	movq	%rsp, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	UNWIND_HINT_IRET_REGS base=%rdx offset=8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	pushq	5*8(%rdx)	/* pt_regs->ss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	pushq	4*8(%rdx)	/* pt_regs->rsp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	pushq	3*8(%rdx)	/* pt_regs->flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	pushq	2*8(%rdx)	/* pt_regs->cs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	pushq	1*8(%rdx)	/* pt_regs->rip */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	pushq   $-1		/* pt_regs->orig_ax */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	PUSH_AND_CLEAR_REGS rdx=(%rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	ENCODE_FRAME_POINTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	 * At this point we no longer need to worry about stack damage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	 * due to nesting -- we're on the normal thread stack and we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	 * done with the NMI stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	movq	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	movq	$-1, %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	call	exc_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	 * Return back to user mode.  We must *not* do the normal exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	 * work, because we don't want to enable interrupts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	jmp	swapgs_restore_regs_and_return_to_usermode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) .Lnmi_from_kernel:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	 * Here's what our stack frame will look like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	 * | original SS                                             |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	 * | original Return RSP                                     |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	 * | original RFLAGS                                         |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	 * | original CS                                             |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	 * | original RIP                                            |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	 * | temp storage for rdx                                    |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	 * | "NMI executing" variable                                |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	 * | iret SS          } Copied from "outermost" frame        |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	 * | iret Return RSP  } on each loop iteration; overwritten  |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	 * | iret RFLAGS      } by a nested NMI to force another     |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	 * | iret CS          } iteration if needed.                 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	 * | iret RIP         }                                      |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	 * | outermost SS          } initialized in first_nmi;       |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	 * | outermost Return RSP  } will not be changed before      |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	 * | outermost RFLAGS      } NMI processing is done.         |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	 * | outermost CS          } Copied to "iret" frame on each  |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	 * | outermost RIP         } iteration.                      |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	 * | pt_regs                                                 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	 * +---------------------------------------------------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 	 * The "original" frame is used by hardware.  Before re-enabling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	 * NMIs, we need to be done with it, and we need to leave enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	 * space for the asm code here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	 * We return by executing IRET while RSP points to the "iret" frame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	 * That will either return for real or it will loop back into NMI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	 * processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	 * The "outermost" frame is copied to the "iret" frame on each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	 * iteration of the loop, so each iteration starts with the "iret"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	 * frame pointing to the final return target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	 * Determine whether we're a nested NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	 * If we interrupted kernel code between repeat_nmi and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	 * end_repeat_nmi, then we are a nested NMI.  We must not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	 * modify the "iret" frame because it's being written by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	 * the outer NMI.  That's okay; the outer NMI handler is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	 * about to about to call exc_nmi() anyway, so we can just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	 * resume the outer NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	movq	$repeat_nmi, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	cmpq	8(%rsp), %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	ja	1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	movq	$end_repeat_nmi, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	cmpq	8(%rsp), %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	ja	nested_nmi_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	 * Now check "NMI executing".  If it's set, then we're nested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	 * This will not detect if we interrupted an outer NMI just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	 * before IRET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	cmpl	$1, -8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	je	nested_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	 * Now test if the previous stack was an NMI stack.  This covers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	 * the case where we interrupt an outer NMI after it clears
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	 * "NMI executing" but before IRET.  We need to be careful, though:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	 * there is one case in which RSP could point to the NMI stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	 * despite there being no NMI active: naughty userspace controls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	 * RSP at the very beginning of the SYSCALL targets.  We can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	 * pull a fast one on naughty userspace, though: we program
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	 * SYSCALL to mask DF, so userspace cannot cause DF to be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	 * if it controls the kernel's RSP.  We set DF before we clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	 * "NMI executing".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	lea	6*8(%rsp), %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	cmpq	%rdx, 4*8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	/* If the stack pointer is above the NMI stack, this is a normal NMI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	ja	first_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	subq	$EXCEPTION_STKSZ, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	cmpq	%rdx, 4*8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	/* If it is below the NMI stack, it is a normal NMI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	jb	first_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	/* Ah, it is within the NMI stack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	testb	$(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	jz	first_nmi	/* RSP was user controlled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	/* This is a nested NMI. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) nested_nmi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 * Modify the "iret" frame to point to repeat_nmi, forcing another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	 * iteration of NMI handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	subq	$8, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	leaq	-10*8(%rsp), %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	pushq	$__KERNEL_DS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	pushq	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	pushfq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	pushq	$__KERNEL_CS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	pushq	$repeat_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	/* Put stack back */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	addq	$(6*8), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) nested_nmi_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	popq	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	/* We are returning to kernel mode, so this cannot result in a fault. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	iretq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) first_nmi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	/* Restore rdx. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	movq	(%rsp), %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	/* Make room for "NMI executing". */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	pushq	$0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	/* Leave room for the "iret" frame */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	subq	$(5*8), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	/* Copy the "original" frame to the "outermost" frame */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	.rept 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	pushq	11*8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	.endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	/* Everything up to here is safe from nested NMIs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) #ifdef CONFIG_DEBUG_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	 * For ease of testing, unmask NMIs right away.  Disabled by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	 * default because IRET is very expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	pushq	$0		/* SS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	pushq	%rsp		/* RSP (minus 8 because of the previous push) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	addq	$8, (%rsp)	/* Fix up RSP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	pushfq			/* RFLAGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	pushq	$__KERNEL_CS	/* CS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	pushq	$1f		/* RIP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	iretq			/* continues at repeat_nmi below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) repeat_nmi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	 * If there was a nested NMI, the first NMI's iret will return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	 * here. But NMIs are still enabled and we can take another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	 * nested NMI. The nested NMI checks the interrupted RIP to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	 * if it is between repeat_nmi and end_repeat_nmi, and if so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	 * it will just return, as we are about to repeat an NMI anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	 * This makes it safe to copy to the stack frame that a nested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	 * NMI will update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	 * RSP is pointing to "outermost RIP".  gsbase is unknown, but, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	 * we're repeating an NMI, gsbase has the same value that it had on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	 * the first iteration.  paranoid_entry will load the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	 * gsbase if needed before we call exc_nmi().  "NMI executing"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	 * is zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	movq	$1, 10*8(%rsp)		/* Set "NMI executing". */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	 * Copy the "outermost" frame to the "iret" frame.  NMIs that nest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	 * here must not modify the "iret" frame while we're writing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	 * it or it will end up containing garbage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	addq	$(10*8), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	.rept 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	pushq	-6*8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	.endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	subq	$(5*8), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) end_repeat_nmi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	 * Everything below this point can be preempted by a nested NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	 * If this happens, then the inner NMI will change the "iret"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	 * frame to point back to repeat_nmi.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	 * as we should not be calling schedule in NMI context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	 * Even with normal interrupts enabled. An NMI should not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	 * setting NEED_RESCHED or anything that normal interrupts and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 	 * exceptions might do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	call	paranoid_entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	movq	%rsp, %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	movq	$-1, %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	call	exc_nmi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	/* Always restore stashed CR3 value (see paranoid_entry) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	 * The above invocation of paranoid_entry stored the GSBASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	 * related information in R/EBX depending on the availability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	 * of FSGSBASE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	 * If FSGSBASE is enabled, restore the saved GSBASE value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	 * unconditionally, otherwise take the conditional SWAPGS path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	wrgsbase	%rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	jmp	nmi_restore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) nmi_no_fsgsbase:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	/* EBX == 0 -> invoke SWAPGS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	testl	%ebx, %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 	jnz	nmi_restore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) nmi_swapgs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	swapgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) nmi_restore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	POP_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	 * at the "iret" frame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	addq	$6*8, %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	 * Clear "NMI executing".  Set DF first so that we can easily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	 * distinguish the remaining code between here and IRET from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	 * the SYSCALL entry and exit paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	 * We arguably should just inspect RIP instead, but I (Andy) wrote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	 * this code when I had the misapprehension that Xen PV supported
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	 * NMIs, and Xen PV would break that approach.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	std
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	 * iretq reads the "iret" frame and exits the NMI stack in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	 * single instruction.  We are returning to kernel mode, so this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	 * cannot result in a fault.  Similarly, we don't need to worry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	 * about espfix64 on the way back to kernel mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 	iretq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) SYM_CODE_END(asm_exc_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) #ifndef CONFIG_IA32_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)  * This handles SYSCALL from 32-bit code.  There is no way to program
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463)  * MSRs to fully disable 32-bit SYSCALL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) SYM_CODE_START(ignore_sysret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	UNWIND_HINT_EMPTY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	mov	$-ENOSYS, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	sysretl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) SYM_CODE_END(ignore_sysret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) .pushsection .text, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) SYM_CODE_START(rewind_stack_do_exit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	UNWIND_HINT_FUNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	/* Prevent any naive code from trying to unwind to our caller. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	xorl	%ebp, %ebp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	leaq	-PTREGS_SIZE(%rax), %rsp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	call	do_exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) SYM_CODE_END(rewind_stack_do_exit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) .popsection