^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/context_tracking.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/entry-common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/livepatch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <trace/events/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * enter_from_user_mode - Establish state when coming from user mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Syscall/interrupt entry disables interrupts, but user mode is traced as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * 1) Tell lockdep that interrupts are disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * 2) Invoke context tracking if enabled to reactivate RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * 3) Trace interrupts off state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) static __always_inline void enter_from_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) arch_check_user_regs(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) CT_WARN_ON(ct_state() != CONTEXT_USER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) user_exit_irqoff();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) if (unlikely(audit_context())) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) unsigned long args[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) syscall_get_arguments(current, regs, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static long syscall_trace_enter(struct pt_regs *regs, long syscall,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* Handle ptrace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) ret = arch_syscall_enter_tracehook(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) if (ret || (ti_work & _TIF_SYSCALL_EMU))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) return -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Do seccomp after ptrace, to catch any tracer changes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) if (ti_work & _TIF_SECCOMP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) ret = __secure_computing(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) if (ret == -1L)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Either of the above might have changed the syscall number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) syscall = syscall_get_nr(current, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) trace_sys_enter(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) syscall_enter_audit(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) return ret ? : syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static __always_inline long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) __syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) unsigned long ti_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (ti_work & SYSCALL_ENTER_WORK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) syscall = syscall_trace_enter(regs, syscall, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return __syscall_enter_from_user_work(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ret = __syscall_enter_from_user_work(regs, syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * exit_to_user_mode - Fixup state when exiting to user mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * Syscall/interupt exit enables interrupts, but the kernel state is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * interrupts disabled when this is invoked. Also tell RCU about it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * 1) Trace interrupts on state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * 2) Invoke context tracking if enabled to adjust RCU state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * 3) Invoke architecture specific last minute exit code, e.g. speculation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * mitigations, etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * 4) Tell lockdep that interrupts are enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static __always_inline void exit_to_user_mode(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) user_enter_irqoff();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) arch_exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* Workaround to allow gradual conversion of architecture code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) void __weak arch_do_signal(struct pt_regs *regs) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * Before returning to user space ensure that all pending work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * items have been completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) while (ti_work & EXIT_TO_USER_MODE_WORK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) local_irq_enable_exit_to_user(ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (ti_work & _TIF_NEED_RESCHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (ti_work & _TIF_UPROBE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) uprobe_notify_resume(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (ti_work & _TIF_PATCH_PENDING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) klp_update_patch_state(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (ti_work & _TIF_SIGPENDING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) arch_do_signal(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (ti_work & _TIF_NOTIFY_RESUME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) tracehook_notify_resume(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) rseq_handle_notify_resume(NULL, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* Architecture specific TIF work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) arch_exit_to_user_mode_work(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * Disable interrupts and reevaluate the work flags as they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * might have changed while interrupts and preemption was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * enabled above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) local_irq_disable_exit_to_user();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /* Return the latest work state for arch_exit_to_user_mode() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) return ti_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static void exit_to_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ti_work = exit_to_user_mode_loop(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) arch_exit_to_user_mode_prepare(regs, ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* Ensure that the address limit is intact and no locks are held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) addr_limit_user_check();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) lockdep_sys_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) #ifndef _TIF_SINGLESTEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) static inline bool report_single_step(unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * If TIF_SYSCALL_EMU is set, then the only reason to report is when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * instruction has been already reported in syscall_enter_from_user_mode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) #define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static inline bool report_single_step(unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) bool step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) audit_syscall_exit(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (ti_work & _TIF_SYSCALL_TRACEPOINT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) trace_sys_exit(regs, syscall_get_return_value(current, regs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) step = report_single_step(ti_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (step || ti_work & _TIF_SYSCALL_TRACE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) arch_syscall_exit_tracehook(regs, step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * Syscall specific exit to user mode preparation. Runs with interrupts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) u32 cached_flags = READ_ONCE(current_thread_info()->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) unsigned long nr = syscall_get_nr(current, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) rseq_syscall(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * Do one-time syscall specific work. If these work items are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * enabled, we want to run them exactly once per syscall exit with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * interrupts enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (unlikely(cached_flags & SYSCALL_EXIT_WORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) syscall_exit_work(regs, cached_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) syscall_exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) local_irq_disable_exit_to_user();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) exit_to_user_mode_prepare(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) exit_to_user_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) irqentry_state_t ret = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) .exit_rcu = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) irqentry_enter_from_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * If this entry hit the idle task invoke rcu_irq_enter() whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * RCU is watching or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * Interupts can nest when the first interrupt invokes softirq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * processing on return which enables interrupts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * Scheduler ticks in the idle task can mark quiescent state and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * terminate a grace period, if and only if the timer interrupt is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * not nested into another interrupt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * Checking for rcu_is_watching() here would prevent the nesting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * interrupt to invoke rcu_irq_enter(). If that nested interrupt is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * the tick then rcu_flavor_sched_clock_irq() would wrongfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * assume that it is the first interupt and eventually claim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * quiescient state and end grace periods prematurely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * Unconditionally invoke rcu_irq_enter() so RCU state stays
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * consistent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * TINY_RCU does not support EQS, so let the compiler eliminate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * this part when enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * If RCU is not watching then the same careful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * sequence vs. lockdep and tracing is required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * as in irq_enter_from_user_mode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) rcu_irq_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) ret.exit_rcu = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * If RCU is watching then RCU only wants to check whether it needs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * already contains a warning when RCU is not watching, so no point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * in having another one here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) rcu_irq_enter_check_tick();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) void irqentry_exit_cond_resched(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (!preempt_count()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* Sanity check RCU and thread stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) rcu_irq_exit_check_preempt();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) WARN_ON_ONCE(!on_thread_stack());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) preempt_schedule_irq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* Check whether this returns to user mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) irqentry_exit_to_user_mode(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) } else if (!regs_irqs_disabled(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * If RCU was not watching on entry this needs to be done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * carefully and needs the same ordering of lockdep/tracing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * and RCU as the return to user mode path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (state.exit_rcu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* Tell the tracer that IRET will enable interrupts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) rcu_irq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (IS_ENABLED(CONFIG_PREEMPTION))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) irqentry_exit_cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /* Covers both tracing and lockdep */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) trace_hardirqs_on();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * IRQ flags state is correct already. Just tell RCU if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * was not watching on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (state.exit_rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) rcu_irq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) irqentry_state_t irq_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) irq_state.lockdep = lockdep_hardirqs_enabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) __nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) lockdep_hardirqs_off(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) lockdep_hardirq_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) rcu_nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) trace_hardirqs_off_finish();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) ftrace_nmi_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) return irq_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) ftrace_nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (irq_state.lockdep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) trace_hardirqs_on_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) lockdep_hardirqs_on_prepare(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) rcu_nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) lockdep_hardirq_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (irq_state.lockdep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) lockdep_hardirqs_on(CALLER_ADDR0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) __nmi_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }