[RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
From: John Stultz
Date: Wed Mar 12 2025 - 18:12:16 EST
Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.
Cc: Joel Fernandes <joelagnelf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
Cc: Metin Kaya <Metin.Kaya@xxxxxxx>
Cc: Xuewen Yan <xuewen.yan94@xxxxxxxxx>
Cc: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
Cc: Suleiman Souhlal <suleiman@xxxxxxxxxx>
Cc: kernel-team@xxxxxxxxxxx
Tested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
v7:
* Switch to CONFIG_SCHED_PROXY_EXEC/sched_proxy_exec= as
suggested by Metin Kaya.
* Switch boot arg from =disable/enable to use kstrtobool(),
which supports =yes|no|1|0|true|false|on|off, as also
suggested by Metin Kaya, and print a message when a boot
argument is used.
v8:
* Move CONFIG_SCHED_PROXY_EXEC under Scheduler Features as
Suggested by Metin
* Minor rework reordering with split sched contexts patch
v12:
* Rework for selected -> donor renaming
v14:
* Depend on !PREEMPT_RT to avoid build issues for now
v15:
* Depend on EXPERT while patch series upstreaming is
in progress.
---
.../admin-guide/kernel-parameters.txt | 5 ++++
include/linux/sched.h | 13 +++++++++
init/Kconfig | 10 +++++++
kernel/sched/core.c | 29 +++++++++++++++++++
kernel/sched/sched.h | 12 ++++++++
5 files changed, 69 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb8752b42ec85..dcc2443078d00 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6262,6 +6262,11 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c08..1462f2c70aefc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1636,6 +1636,19 @@ struct task_struct {
*/
};
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
diff --git a/init/Kconfig b/init/Kconfig
index d0d021b3fa3b3..b989ddc27444e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
If in doubt, use the default value.
+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ default n
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ depends on EXPERT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu
#
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67189907214d3..3968c3967ec38 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable;
+
+ if (kstrtobool(str, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec=", setup_proxy_exec);
+
#ifdef CONFIG_SCHED_DEBUG
/*
* Debugging: various feature bits
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8512a9fb0229..05d2122533619 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1155,10 +1155,15 @@ struct rq {
*/
unsigned int nr_uninterruptible;
+#ifdef CONFIG_SCHED_PROXY_EXEC
+ struct task_struct __rcu *donor; /* Scheduling context */
+ struct task_struct __rcu *curr; /* Execution context */
+#else
union {
struct task_struct __rcu *donor; /* Scheduler context */
struct task_struct __rcu *curr; /* Execution context */
};
+#endif
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
@@ -1355,10 +1360,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+ rcu_assign_pointer(rq->donor, t);
+}
+#else
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
{
/* Do nothing */
}
+#endif
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
--
2.49.0.rc0.332.g42c0ae87b1-goog