Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * common.c - C code for kernel entry and exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * Copyright (c) 2015 Andrew Lutomirski
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * Based on asm and ptrace code by many authors.  The code here originated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * in ptrace.c and signal.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/sched/task_stack.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/entry-common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #ifdef CONFIG_XEN_PV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <xen/xen-ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <xen/events.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include <asm/traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) #include <asm/vdso.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #include <asm/cpufeature.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #include <asm/fpu/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #include <asm/nospec-branch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #include <asm/io_bitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) #include <asm/syscall.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #include <asm/irq_stack.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	nr = syscall_enter_from_user_mode(regs, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	if (likely(nr < NR_syscalls)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		nr = array_index_nospec(nr, NR_syscalls);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 		regs->ax = sys_call_table[nr](regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) #ifdef CONFIG_X86_X32_ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	} else if (likely((nr & __X32_SYSCALL_BIT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 					X32_NR_syscalls);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		regs->ax = x32_sys_call_table[nr](regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	syscall_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 		current_thread_info()->status |= TS_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	return (unsigned int)regs->orig_ax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70)  * Invoke a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 						  unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	if (likely(nr < IA32_NR_syscalls)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 		nr = array_index_nospec(nr, IA32_NR_syscalls);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 		regs->ax = ia32_sys_call_table[nr](regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) /* Handles int $0x80 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	unsigned int nr = syscall_32_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	 * orig_ax, the unsigned int return value truncates it.  This may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	 * or may not be necessary, but it matches the old asm behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	do_syscall_32_irqs_on(regs, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	syscall_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	unsigned int nr = syscall_32_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	int res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	 * This cannot use syscall_enter_from_user_mode() as it has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	 * fetch EBP before invoking any of the syscall entry work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	 * functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	syscall_enter_from_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	/* Fetch EBP from where the vDSO stashed it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	if (IS_ENABLED(CONFIG_X86_64)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		 * Micro-optimization: the pointer we're following is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 		 * explicitly 32 bits, so it can't be out of range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 		res = __get_user(*(u32 *)&regs->bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 			 (u32 __user __force *)(unsigned long)(u32)regs->sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 		res = get_user(*(u32 *)&regs->bp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 		       (u32 __user __force *)(unsigned long)(u32)regs->sp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		/* User code screwed up. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		regs->ax = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 		local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		irqentry_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	/* Now this is just like a normal syscall. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	do_syscall_32_irqs_on(regs, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	syscall_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	 * convention.  Adjust regs so it looks like we entered using int80.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 					vdso_image_32.sym_int80_landing_pad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	 * Fix it up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	regs->ip = landing_pad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	/* Invoke the syscall. If it failed, keep it simple: use IRET. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	if (!__do_fast_syscall_32(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	 * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	 * SYSRETL is available on all 64-bit CPUs, so we don't need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	 * bother with SYSEXIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	 * because the ECX fixup above will ensure that this is essentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	 * never the case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 		regs->ip == landing_pad &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	 * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	 * because the ECX fixup above will ensure that this is essentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	 * never the case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	 * We don't allow syscalls at all from VM86 mode, but we still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	 * need to check VM, because we might be returning from sys_vm86.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	return static_cpu_has(X86_FEATURE_SEP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 		regs->cs == __USER_CS && regs->ss == __USER_DS &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		regs->ip == landing_pad &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) __visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	regs->sp = regs->bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	/* SYSENTER clobbers EFLAGS.IF.  Assume it was set in usermode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	regs->flags |= X86_EFLAGS_IF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	return do_fast_syscall_32(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) SYSCALL_DEFINE0(ni_syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #ifdef CONFIG_XEN_PV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #ifndef CONFIG_PREEMPTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)  * Some hypercalls issued by the toolstack can take many 10s of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)  * seconds. Allow tasks running hypercalls via the privcmd driver to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)  * be voluntarily preempted even if full kernel preemption is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)  * disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)  * Such preemptible hypercalls are bracketed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)  * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)  * calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)  * In case of scheduling the flag must be cleared and restored after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)  * returning from schedule as the task might move to a different CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) static __always_inline bool get_and_clear_inhcall(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	__this_cpu_write(xen_in_preemptible_hcall, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	return inhcall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static __always_inline void restore_inhcall(bool inhcall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	__this_cpu_write(xen_in_preemptible_hcall, inhcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) static __always_inline bool get_and_clear_inhcall(void) { return false; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static __always_inline void restore_inhcall(bool inhcall) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) static void __xen_pv_evtchn_do_upcall(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	irq_enter_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	inc_irq_stat(irq_hv_callback_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	xen_hvm_evtchn_do_upcall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	irq_exit_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	struct pt_regs *old_regs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	bool inhcall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	irqentry_state_t state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	state = irqentry_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	old_regs = set_irq_regs(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	set_irq_regs(old_regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	inhcall = get_and_clear_inhcall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 		irqentry_exit_cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		restore_inhcall(inhcall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 		irqentry_exit(regs, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) #endif /* CONFIG_XEN_PV */