^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Context tracking: Probe on high level context boundaries such as kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * and userspace. This includes syscalls and exceptions entry/exit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This is used by RCU to remove its dependency on the timer tick while a CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * runs in userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Started by Frederic Weisbecker:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/context_tracking.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <trace/events/context_tracking.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) DEFINE_STATIC_KEY_FALSE(context_tracking_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) EXPORT_SYMBOL_GPL(context_tracking_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) DEFINE_PER_CPU(struct context_tracking, context_tracking);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) EXPORT_SYMBOL_GPL(context_tracking);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static noinstr bool context_tracking_recursion_enter(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) int recursion;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) recursion = __this_cpu_inc_return(context_tracking.recursion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) if (recursion == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) __this_cpu_dec(context_tracking.recursion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static __always_inline void context_tracking_recursion_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) __this_cpu_dec(context_tracking.recursion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * context_tracking_enter - Inform the context tracking that the CPU is going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * enter user or guest space mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * This function must be called right before we switch from the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * to user or guest space, when it's guaranteed the remaining kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * instructions to execute won't use any RCU read side critical section
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * because this function sets RCU in extended quiescent state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) void noinstr __context_tracking_enter(enum ctx_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) /* Kernel threads aren't supposed to go to userspace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) WARN_ON_ONCE(!current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (!context_tracking_recursion_enter())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) if ( __this_cpu_read(context_tracking.state) != state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (__this_cpu_read(context_tracking.active)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * At this stage, only low level arch entry code remains and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * then we'll run in userspace. We can assume there won't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * any RCU read-side critical section until the next call to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * on the tick.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (state == CONTEXT_USER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) trace_user_enter(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vtime_user_enter(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) rcu_user_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * Even if context tracking is disabled on this CPU, because it's outside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * the full dynticks mask for example, we still have to keep track of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * context transitions and states to prevent inconsistency on those of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * If a task triggers an exception in userspace, sleep on the exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * handler and then migrate to another CPU, that new CPU must know where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * the exception returns by the time we call exception_exit().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * This information can only be provided by the previous CPU when it called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * exception_enter().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * OTOH we can spare the calls to vtime and RCU when context_tracking.active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * is false because we know that CPU is not tickless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) __this_cpu_write(context_tracking.state, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) context_tracking_recursion_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) EXPORT_SYMBOL_GPL(__context_tracking_enter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) void context_tracking_enter(enum ctx_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Some contexts may involve an exception occuring in an irq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * leading to that nesting:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * This would mess up the dyntick_nesting count though. And rcu_irq_*()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * helpers are enough to protect RCU uses inside the exception. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * just return immediately if we detect we are in an IRQ.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) if (in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) __context_tracking_enter(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) NOKPROBE_SYMBOL(context_tracking_enter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) EXPORT_SYMBOL_GPL(context_tracking_enter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) void context_tracking_user_enter(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) user_enter();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) NOKPROBE_SYMBOL(context_tracking_user_enter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * context_tracking_exit - Inform the context tracking that the CPU is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * exiting user or guest mode and entering the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * This function must be called after we entered the kernel from user or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * guest space before any use of RCU read side critical section. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * potentially include any high level kernel code like syscalls, exceptions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * signal handling, etc...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * This call supports re-entrancy. This way it can be called from any exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * handler without needing to know if we came from userspace or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) void noinstr __context_tracking_exit(enum ctx_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (!context_tracking_recursion_enter())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (__this_cpu_read(context_tracking.state) == state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (__this_cpu_read(context_tracking.active)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * We are going to run code that may use RCU. Inform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * RCU core about that (ie: we may need the tick again).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) rcu_user_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (state == CONTEXT_USER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) vtime_user_exit(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) trace_user_exit(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) context_tracking_recursion_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) EXPORT_SYMBOL_GPL(__context_tracking_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) void context_tracking_exit(enum ctx_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) __context_tracking_exit(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) NOKPROBE_SYMBOL(context_tracking_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) EXPORT_SYMBOL_GPL(context_tracking_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) void context_tracking_user_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) user_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) NOKPROBE_SYMBOL(context_tracking_user_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) void __init context_tracking_cpu_set(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) static __initdata bool initialized = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (!per_cpu(context_tracking.active, cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) per_cpu(context_tracking.active, cpu) = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) static_branch_inc(&context_tracking_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (initialized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) #ifdef CONFIG_HAVE_TIF_NOHZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * This assumes that init is the only task at this early boot stage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) set_tsk_thread_flag(&init_task, TIF_NOHZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) WARN_ON_ONCE(!tasklist_empty());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) initialized = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #ifdef CONFIG_CONTEXT_TRACKING_FORCE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) void __init context_tracking_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) context_tracking_cpu_set(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) #endif