[RFC][PATCH v14 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable

From: John Stultz
Date: Mon Nov 25 2024 - 14:52:19 EST


Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.

Cc: Joel Fernandes <joelaf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
Cc: Metin Kaya <Metin.Kaya@xxxxxxx>
Cc: Xuewen Yan <xuewen.yan94@xxxxxxxxx>
Cc: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
Cc: kernel-team@xxxxxxxxxxx
Tested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
v7:
* Switch to CONFIG_SCHED_PROXY_EXEC/sched_proxy_exec= as
suggested by Metin Kaya.
* Switch boot arg from =disable/enable to use kstrtobool(),
which supports =yes|no|1|0|true|false|on|off, as also
suggested by Metin Kaya, and print a message when a boot
argument is used.
v8:
* Move CONFIG_SCHED_PROXY_EXEC under Scheduler Features as
Suggested by Metin
* Minor rework reordering with split sched contexts patch
v12:
* Rework for selected -> donor renaming
v14:
* Depend on !PREEMPT_RT to avoid build issues for now
---
.../admin-guide/kernel-parameters.txt | 5 ++++
include/linux/sched.h | 13 +++++++++
init/Kconfig | 9 ++++++
kernel/sched/core.c | 29 +++++++++++++++++++
kernel/sched/sched.h | 12 ++++++++
5 files changed, 68 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a4736ee87b1aa..761e858c62b92 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5955,6 +5955,11 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.

+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.

schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f0e9e00d3cf52..24e338ac34d7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1610,6 +1610,19 @@ struct task_struct {
*/
};

+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)

diff --git a/init/Kconfig b/init/Kconfig
index b07f238f3badb..364bd2065b1f1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -863,6 +863,15 @@ config UCLAMP_BUCKETS_COUNT

If in doubt, use the default value.

+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ default n
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu

#
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 95e40895a5190..d712e177d3b75 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);

DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable;
+
+ if (kstrtobool(str, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec=", setup_proxy_exec);
+
#ifdef CONFIG_SCHED_DEBUG
/*
* Debugging: various feature bits
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 76f5f53a645fc..24eae02ddc7f6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1148,10 +1148,15 @@ struct rq {
*/
unsigned int nr_uninterruptible;

+#ifdef CONFIG_SCHED_PROXY_EXEC
+ struct task_struct __rcu *donor; /* Scheduling context */
+ struct task_struct __rcu *curr; /* Execution context */
+#else
union {
struct task_struct __rcu *donor; /* Scheduler context */
struct task_struct __rcu *curr; /* Execution context */
};
+#endif
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
@@ -1348,10 +1353,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)

+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+ rcu_assign_pointer(rq->donor, t);
+}
+#else
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
{
/* Do nothing */
}
+#endif

#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
--
2.47.0.371.ga323438b13-goog