Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3) #include <linux/context_tracking.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4) #include <linux/entry-common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5) #include <linux/livepatch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) #include <trace/events/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * enter_from_user_mode - Establish state when coming from user mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * Syscall/interrupt entry disables interrupts, but user mode is traced as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  * 1) Tell lockdep that interrupts are disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  * 2) Invoke context tracking if enabled to reactivate RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  * 3) Trace interrupts off state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) static __always_inline void enter_from_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 	arch_check_user_regs(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 	lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	CT_WARN_ON(ct_state() != CONTEXT_USER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	user_exit_irqoff();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 	trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	if (unlikely(audit_context())) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 		unsigned long args[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 		syscall_get_arguments(current, regs, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) static long syscall_trace_enter(struct pt_regs *regs, long syscall,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 				unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	/* Handle ptrace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 		ret = arch_syscall_enter_tracehook(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		if (ret || (ti_work & _TIF_SYSCALL_EMU))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 			return -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	/* Do seccomp after ptrace, to catch any tracer changes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	if (ti_work & _TIF_SECCOMP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 		ret = __secure_computing(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 		if (ret == -1L)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	/* Either of the above might have changed the syscall number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	syscall = syscall_get_nr(current, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		trace_sys_enter(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	syscall_enter_audit(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	return ret ? : syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) static __always_inline long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) __syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	unsigned long ti_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	if (ti_work & SYSCALL_ENTER_WORK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 		syscall = syscall_trace_enter(regs, syscall, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	return syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	return __syscall_enter_from_user_work(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	ret = __syscall_enter_from_user_work(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)  * exit_to_user_mode - Fixup state when exiting to user mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)  * Syscall/interupt exit enables interrupts, but the kernel state is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)  * interrupts disabled when this is invoked. Also tell RCU about it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)  * 1) Trace interrupts on state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)  * 2) Invoke context tracking if enabled to adjust RCU state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)  * 3) Invoke architecture specific last minute exit code, e.g. speculation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)  *    mitigations, etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)  * 4) Tell lockdep that interrupts are enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static __always_inline void exit_to_user_mode(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	user_enter_irqoff();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	arch_exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* Workaround to allow gradual conversion of architecture code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) void __weak arch_do_signal(struct pt_regs *regs) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 					    unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	 * Before returning to user space ensure that all pending work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	 * items have been completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	while (ti_work & EXIT_TO_USER_MODE_WORK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		local_irq_enable_exit_to_user(ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		if (ti_work & _TIF_NEED_RESCHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		if (ti_work & _TIF_UPROBE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 			uprobe_notify_resume(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 		if (ti_work & _TIF_PATCH_PENDING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 			klp_update_patch_state(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		if (ti_work & _TIF_SIGPENDING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 			arch_do_signal(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		if (ti_work & _TIF_NOTIFY_RESUME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 			tracehook_notify_resume(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 			rseq_handle_notify_resume(NULL, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 		/* Architecture specific TIF work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		arch_exit_to_user_mode_work(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 		 * Disable interrupts and reevaluate the work flags as they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 		 * might have changed while interrupts and preemption was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 		 * enabled above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 		local_irq_disable_exit_to_user();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 		ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	/* Return the latest work state for arch_exit_to_user_mode() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	return ti_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static void exit_to_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 		ti_work = exit_to_user_mode_loop(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	arch_exit_to_user_mode_prepare(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	/* Ensure that the address limit is intact and no locks are held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	addr_limit_user_check();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	lockdep_sys_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) #ifndef _TIF_SINGLESTEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) static inline bool report_single_step(unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)  * If TIF_SYSCALL_EMU is set, then the only reason to report is when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)  * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)  * instruction has been already reported in syscall_enter_from_user_mode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) #define SYSEMU_STEP	(_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static inline bool report_single_step(unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	bool step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	audit_syscall_exit(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	if (ti_work & _TIF_SYSCALL_TRACEPOINT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 		trace_sys_exit(regs, syscall_get_return_value(current, regs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	step = report_single_step(ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	if (step || ti_work & _TIF_SYSCALL_TRACE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 		arch_syscall_exit_tracehook(regs, step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)  * Syscall specific exit to user mode preparation. Runs with interrupts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)  * enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	u32 cached_flags = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	unsigned long nr = syscall_get_nr(current, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 			local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	rseq_syscall(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	 * Do one-time syscall specific work. If these work items are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	 * enabled, we want to run them exactly once per syscall exit with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	 * interrupts enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	if (unlikely(cached_flags & SYSCALL_EXIT_WORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		syscall_exit_work(regs, cached_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	syscall_exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	local_irq_disable_exit_to_user();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	irqentry_state_t ret = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 		.exit_rcu = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 		irqentry_enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	 * If this entry hit the idle task invoke rcu_irq_enter() whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	 * RCU is watching or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	 * Interupts can nest when the first interrupt invokes softirq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	 * processing on return which enables interrupts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	 * Scheduler ticks in the idle task can mark quiescent state and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	 * terminate a grace period, if and only if the timer interrupt is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	 * not nested into another interrupt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	 * Checking for rcu_is_watching() here would prevent the nesting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	 * interrupt to invoke rcu_irq_enter(). If that nested interrupt is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	 * assume that it is the first interupt and eventually claim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	 * quiescient state and end grace periods prematurely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	 * Unconditionally invoke rcu_irq_enter() so RCU state stays
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	 * consistent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	 * TINY_RCU does not support EQS, so let the compiler eliminate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	 * this part when enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 		 * If RCU is not watching then the same careful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 		 * sequence vs. lockdep and tracing is required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		 * as in irq_enter_from_user_mode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 		lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 		rcu_irq_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 		instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 		ret.exit_rcu = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	 * If RCU is watching then RCU only wants to check whether it needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	 * already contains a warning when RCU is not watching, so no point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	 * in having another one here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	rcu_irq_enter_check_tick();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) void irqentry_exit_cond_resched(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	if (!preempt_count()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 		/* Sanity check RCU and thread stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		rcu_irq_exit_check_preempt();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 			WARN_ON_ONCE(!on_thread_stack());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		if (need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 			preempt_schedule_irq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	/* Check whether this returns to user mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 		irqentry_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	} else if (!regs_irqs_disabled(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 		 * If RCU was not watching on entry this needs to be done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 		 * carefully and needs the same ordering of lockdep/tracing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 		 * and RCU as the return to user mode path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 		if (state.exit_rcu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 			instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 			/* Tell the tracer that IRET will enable interrupts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 			trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 			lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 			instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 			rcu_irq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 			lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 		instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		if (IS_ENABLED(CONFIG_PREEMPTION))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 			irqentry_exit_cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 		/* Covers both tracing and lockdep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		trace_hardirqs_on();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 		instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		 * IRQ flags state is correct already. Just tell RCU if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 		 * was not watching on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 		if (state.exit_rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 			rcu_irq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 	irqentry_state_t irq_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	irq_state.lockdep = lockdep_hardirqs_enabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 	__nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 	lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 	lockdep_hardirq_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 	rcu_nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	ftrace_nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	return irq_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	ftrace_nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	if (irq_state.lockdep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 		trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	rcu_nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	lockdep_hardirq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	if (irq_state.lockdep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 		lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 	__nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }