author: Thomas Gleixner <tglx@linutronix.de> 2012-10-26 18:50:54 +0100
committer: Minda Chen <minda.chen@starfivetech.com> 2023-11-06 19:24:49 +0800
commit: 4d49bc4d923b17bf504fb96d2382e79fb854a01d
parent: 346b840b6cf5bd676f37110d18602ceee8c09ee6
Commit Summary:
Diffstat:
12 files changed, 226 insertions, 35 deletions
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index a9a0b33160de..3da73c968211 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -175,6 +175,20 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val) do { } while (0)
+#define sub_preempt_lazy_count(val) do { } while (0)
+#define inc_preempt_lazy_count() do { } while (0)
+#define dec_preempt_lazy_count() do { } while (0)
+#define preempt_lazy_count() (0)
+#endif
+
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
@@ -183,6 +197,12 @@ do { \
barrier(); \
} while (0)
+#define preempt_lazy_disable() \
+do { \
+ inc_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
@@ -220,6 +240,18 @@ do { \
__preempt_schedule(); \
} while (0)
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+ if (should_resched(0)) \
+ __preempt_schedule(); \
+} while (0)
+
#else /* !CONFIG_PREEMPTION */
#define preempt_enable() \
do { \
@@ -227,6 +259,12 @@ do { \
preempt_count_dec(); \
} while (0)
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define preempt_enable_notrace() \
do { \
barrier(); \
@@ -268,6 +306,9 @@ do { \
#define preempt_check_resched_rt() barrier()
#define preemptible() 0
+#define preempt_lazy_disable() barrier()
+#define preempt_lazy_enable() barrier()
+
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE
@@ -286,7 +327,7 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
- if (tif_need_resched()) \
+ if (tif_need_resched_now()) \
set_preempt_need_resched(); \
} while (0)
@@ -402,8 +443,15 @@ extern void migrate_enable(void);
#else
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+ preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+ preempt_lazy_enable();
+}
#endif /* CONFIG_SMP */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04172b57d71d..0b57d5ffa11d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2011,6 +2011,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
#ifdef CONFIG_PREEMPT_RT
static inline bool task_match_saved_state(struct task_struct *p, long match_state)
{
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 0999f6317978..7af834b7c114 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
+ test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy() 0
+#endif
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3e475eeb5a99..2d1f684b355b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -69,6 +69,7 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
+ unsigned char preempt_lazy_count;
};
#define TRACE_EVENT_TYPE_MAX \
@@ -157,9 +158,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
unsigned int trace_ctx)
{
entry->preempt_count = trace_ctx & 0xff;
+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
entry->pid = current->pid;
entry->type = type;
- entry->flags = trace_ctx >> 16;
+ entry->flags = trace_ctx >> 24;
}
unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@@ -172,6 +174,7 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
TRACE_FLAG_NMI = 0x40,
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
};
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 5876e30c5740..5df0776264c2 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,5 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
+config HAVE_PREEMPT_LAZY
+ bool
+
+config PREEMPT_LAZY
+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
+
choice
prompt "Preemption Model"
default PREEMPT_NONE
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c5a98237338d..e8981a05c29e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -986,6 +986,46 @@ void resched_curr(struct rq *rq)
trace_sched_wake_idle_without_ipi(cpu);
}
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+ return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ int cpu;
+
+ if (!sched_feat(PREEMPT_LAZY)) {
+ resched_curr(rq);
+ return;
+ }
+
+ if (test_tsk_need_resched(curr))
+ return;
+
+ if (test_tsk_need_resched_lazy(curr))
+ return;
+
+ set_tsk_need_resched_lazy(curr);
+
+ cpu = cpu_of(rq);
+ if (cpu == smp_processor_id())
+ return;
+
+ /* NEED_RESCHED_LAZY must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(curr))
+ smp_send_reschedule(cpu);
+}
+#endif
+
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -2141,6 +2181,7 @@ void migrate_disable(void)
preempt_disable();
this_rq()->nr_pinned++;
p->migration_disabled = 1;
+ preempt_lazy_disable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_disable);
@@ -2171,6 +2212,7 @@ void migrate_enable(void)
barrier();
p->migration_disabled = 0;
this_rq()->nr_pinned--;
+ preempt_lazy_enable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_enable);
@@ -4406,6 +4448,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(p)->preempt_lazy_count = 0;
+#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -6253,6 +6298,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
+ clear_tsk_need_resched_lazy(prev);
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
rq->last_seen_need_resched_ns = 0;
@@ -6470,6 +6516,30 @@ static void __sched notrace preempt_schedule_common(void)
} while (need_resched());
}
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ return 1;
+ if (current_thread_info()->preempt_lazy_count)
+ return 0;
+ return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+ return 1;
+}
+
+#endif
+
#ifdef CONFIG_PREEMPTION
/*
* This is the entry point to schedule() from in-kernel preemption
@@ -6483,7 +6553,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
*/
if (likely(!preemptible()))
return;
-
+ if (!preemptible_lazy())
+ return;
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
@@ -6516,6 +6587,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
if (likely(!preemptible()))
return;
+ if (!preemptible_lazy())
+ return;
+
do {
/*
* Because the function tracer can trace preempt_count_sub()
@@ -8677,7 +8751,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
/*
* The idle tasks have their own, simple scheduling class:
*/
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 38ad84c0d874..e4867129445d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4445,7 +4445,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
@@ -4469,7 +4469,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (delta > ideal_runtime)
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static void
@@ -4612,7 +4612,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule.
*/
if (queued) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
return;
}
/*
@@ -4752,7 +4752,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static __always_inline
@@ -5515,7 +5515,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
if (delta < 0) {
if (task_current(rq, p))
- resched_curr(rq);
+ resched_curr_lazy(rq);
return;
}
hrtick_start(rq, delta);
@@ -7205,7 +7205,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
preempt:
- resched_curr(rq);
+ resched_curr_lazy(rq);
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
@@ -11106,7 +11106,7 @@ static void task_fork_fair(struct task_struct *p)
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
se->vruntime -= cfs_rq->min_vruntime;
@@ -11133,7 +11133,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
*/
if (task_current(rq, p)) {
if (p->prio > oldprio)
- resched_curr(rq);
+ resched_curr_lazy(rq);
} else
check_preempt_curr(rq, p, 0);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1cf435bbcd9c..d5cee51819bf 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
#ifdef CONFIG_PREEMPT_RT
SCHED_FEAT(TTWU_QUEUE, false)
+# ifdef CONFIG_PREEMPT_LAZY
+SCHED_FEAT(PREEMPT_LAZY, true)
+# endif
#else
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3d3e5793e117..0e7b90962aec 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2317,6 +2317,15 @@ extern void reweight_task(struct task_struct *p, int prio);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
+#ifdef CONFIG_PREEMPT_LAZY
+extern void resched_curr_lazy(struct rq *rq);
+#else
+static inline void resched_curr_lazy(struct rq *rq)
+{
+ resched_curr(rq);
+}
+#endif
+
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bc677cd64224..3471d5c66e46 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2629,7 +2629,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
trace_flags |= TRACE_FLAG_NEED_RESCHED;
if (test_preempt_need_resched())
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+#ifdef CONFIG_PREEMPT_LAZY
+ if (need_resched_lazy())
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+#endif
+
+ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+ (preempt_lazy_count() & 0xff) << 16 |
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
}
@@ -4193,15 +4199,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
- "# | / _----=> need-resched \n"
- "# || / _---=> hardirq/softirq \n"
- "# ||| / _--=> preempt-depth \n"
- "# |||| / _-=> migrate-disable \n"
- "# ||||| / delay \n"
- "# cmd pid |||||| time | caller \n"
- "# \\ / |||||| \\ | / \n");
+ seq_puts(m, "# _--------=> CPU# \n"
+ "# / _-------=> irqs-off \n"
+ "# | / _------=> need-resched \n"
+ "# || / _-----=> need-resched-lazy\n"
+ "# ||| / _----=> hardirq/softirq \n"
+ "# |||| / _---=> preempt-depth \n"
+ "# ||||| / _--=> preempt-lazy-depth\n"
+ "# |||||| / _-=> migrate-disable \n"
+ "# ||||||| / delay \n"
+ "# cmd pid |||||||| time | caller \n"
+ "# \\ / |||||||| \\ | / \n");
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4235,14 +4243,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
- seq_printf(m, "# %.*s|||| / delay\n", prec, space);
- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
+ seq_printf(m, "# %.*s _-------=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space);
+ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
+ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space);
+ seq_printf(m, "# %.*s|||||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | ");
}
void
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 830b3b9940f4..749b786cd548 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -184,6 +184,7 @@ static int trace_define_common_fields(void)
/* Holds both preempt_count and migrate_disable */
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
+ __common_field(unsigned char, preempt_lazy_count);
return ret;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c2ca40e8595b..be070d258c3b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
{
char hardsoft_irq;
char need_resched;
+ char need_resched_lazy;
char irqs_off;
int hardirq;
int softirq;
@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
break;
}
+ need_resched_lazy =
+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+
hardsoft_irq =
(nmi && hardirq) ? 'Z' :
nmi ? 'z' :
@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
softirq ? 's' :
'.' ;
- trace_seq_printf(s, "%c%c%c",
- irqs_off, need_resched, hardsoft_irq);
+ trace_seq_printf(s, "%c%c%c%c",
+ irqs_off, need_resched, need_resched_lazy,
+ hardsoft_irq);
if (entry->preempt_count & 0xf)
trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
else
trace_seq_putc(s, '.');
+ if (entry->preempt_lazy_count)
+ trace_seq_printf(s, "%x", entry->preempt_lazy_count);
+ else
+ trace_seq_putc(s, '.');
+
if (entry->preempt_count & 0xf0)
trace_seq_printf(s, "%x", entry->preempt_count >> 4);
else