[PATCH 5/5] sched: Add laziest preempt model

From: Peter Zijlstra
Date: Mon Oct 07 2024 - 03:55:40 EST


Much like LAZY, except lazier still. It will not promote LAZY to full
preempt on tick and compete with None for suckage.

(do we really wants this?)

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/preempt.h | 10 ++++++++-
kernel/Kconfig.preempt | 12 +++++++++++
kernel/sched/core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sched/debug.c | 4 +--
4 files changed, 71 insertions(+), 4 deletions(-)

--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -487,6 +487,7 @@ extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void);
extern bool preempt_model_lazy(void);
+extern bool preempt_model_laziest(void);

#else

@@ -507,6 +508,10 @@ static inline bool preempt_model_lazy(vo
{
return IS_ENABLED(CONFIG_PREEMPT_LAZY);
}
+static inline bool preempt_model_laziest(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_LAZIEST);
+}

#endif

@@ -525,7 +530,10 @@ static inline bool preempt_model_rt(void
*/
static inline bool preempt_model_preemptible(void)
{
- return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
+ return preempt_model_full() ||
+ preempt_model_lazy() ||
+ preempt_model_laziest() ||
+ preempt_model_rt();
}

#endif /* __LINUX_PREEMPT_H */
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -84,6 +84,18 @@ config PREEMPT_LAZY
reduce lock holder preemption and recover some of the performance
gains seen from using Voluntary preemption.

+config PREEMPT_LAZIEST
+ bool "Scheduler controlled preemption model"
+ depends on !ARCH_NO_PREEMPT
+ depends on ARCH_HAS_PREEMPT_LAZY
+ select PREEMPT_BUILD if !PREEMPT_DYNAMIC
+ help
+ This option provides a scheduler driven preemption model that
+ is fundamentally similar to full preemption, but is least
+ eager to preempt SCHED_NORMAL tasks in an attempt to
+ reduce lock holder preemption and recover some of the performance
+ gains seen from using no preemption.
+
endchoice

config PREEMPT_RT
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1108,13 +1108,22 @@ void resched_curr(struct rq *rq)

#ifdef CONFIG_PREEMPT_DYNAMIC
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_promote);
static __always_inline bool dynamic_preempt_lazy(void)
{
return static_branch_unlikely(&sk_dynamic_preempt_lazy);
}
+static __always_inline bool dynamic_preempt_promote(void)
+{
+ return static_branch_unlikely(&sk_dynamic_preempt_promote);
+}
#else
static __always_inline bool dynamic_preempt_lazy(void)
{
+ return IS_ENABLED(PREEMPT_LAZY) | IS_ENABLED(PREEMPT_LAZIEST);
+}
+static __always_inline bool dynamic_preempt_promote(void)
+{
return IS_ENABLED(PREEMPT_LAZY);
}
#endif
@@ -5628,7 +5637,7 @@ void sched_tick(void)
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);

- if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
+ if (dynamic_preempt_promote() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
resched_curr(rq);

curr->sched_class->task_tick(rq, curr, 0);
@@ -7368,6 +7377,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
* preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP
* dynamic_preempt_lazy <- false
+ * dynamic_preempt_promote <- false
*
* VOLUNTARY:
* cond_resched <- __cond_resched
@@ -7376,6 +7386,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
* preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP
* dynamic_preempt_lazy <- false
+ * dynamic_preempt_promote <- false
*
* FULL:
* cond_resched <- RET0
@@ -7384,6 +7395,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
* preempt_schedule_notrace <- preempt_schedule_notrace
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
* dynamic_preempt_lazy <- false
+ * dynamic_preempt_promote <- false
*
* LAZY:
* cond_resched <- RET0
@@ -7392,6 +7404,16 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
* preempt_schedule_notrace <- preempt_schedule_notrace
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
* dynamic_preempt_lazy <- true
+ * dynamic_preempt_promote <- true
+ *
+ * LAZIEST:
+ * cond_resched <- RET0
+ * might_resched <- RET0
+ * preempt_schedule <- preempt_schedule
+ * preempt_schedule_notrace <- preempt_schedule_notrace
+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
+ * dynamic_preempt_lazy <- true
+ * dynamic_preempt_promote <- false
*/

enum {
@@ -7400,6 +7422,7 @@ enum {
preempt_dynamic_voluntary,
preempt_dynamic_full,
preempt_dynamic_lazy,
+ preempt_dynamic_laziest,
};

int preempt_dynamic_mode = preempt_dynamic_undefined;
@@ -7420,6 +7443,9 @@ int sched_dynamic_mode(const char *str)
#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
if (!strcmp(str, "lazy"))
return preempt_dynamic_lazy;
+
+ if (!strcmp(str, "laziest"))
+ return preempt_dynamic_laziest;
#endif

return -EINVAL;
@@ -7454,6 +7480,7 @@ static void __sched_dynamic_update(int m
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
+ preempt_dynamic_key_disable(preempt_promote);

switch (mode) {
case preempt_dynamic_none:
@@ -7464,6 +7491,7 @@ static void __sched_dynamic_update(int m
preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
+ preempt_dynamic_key_disable(preempt_promote);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: none\n");
break;
@@ -7476,6 +7504,7 @@ static void __sched_dynamic_update(int m
preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
+ preempt_dynamic_key_disable(preempt_promote);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: voluntary\n");
break;
@@ -7488,6 +7517,7 @@ static void __sched_dynamic_update(int m
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
+ preempt_dynamic_key_disable(preempt_promote);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: full\n");
break;
@@ -7500,9 +7530,23 @@ static void __sched_dynamic_update(int m
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_enable(preempt_lazy);
+ preempt_dynamic_key_enable(preempt_promote);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: lazy\n");
break;
+
+ case preempt_dynamic_laziest:
+ if (!klp_override)
+ preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(might_resched);
+ preempt_dynamic_enable(preempt_schedule);
+ preempt_dynamic_enable(preempt_schedule_notrace);
+ preempt_dynamic_enable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_enable(preempt_lazy);
+ preempt_dynamic_key_disable(preempt_promote);
+ if (mode != preempt_dynamic_mode)
+ pr_info("Dynamic Preempt: laziest\n");
+ break;
}

preempt_dynamic_mode = mode;
@@ -7567,6 +7611,8 @@ static void __init preempt_dynamic_init(
sched_dynamic_update(preempt_dynamic_voluntary);
} else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
sched_dynamic_update(preempt_dynamic_lazy);
+ } else if (IS_ENABLED(CONFIG_PREEMPT_LAZIEST)) {
+ sched_dynamic_update(preempt_dynamic_laziest);
} else {
/* Default static call setting, nothing to do */
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
@@ -7588,6 +7634,7 @@ PREEMPT_MODEL_ACCESSOR(none);
PREEMPT_MODEL_ACCESSOR(voluntary);
PREEMPT_MODEL_ACCESSOR(full);
PREEMPT_MODEL_ACCESSOR(lazy);
+PREEMPT_MODEL_ACCESSOR(laziest);

#else /* !CONFIG_PREEMPT_DYNAMIC: */

--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -245,9 +245,9 @@ static ssize_t sched_dynamic_write(struc
static int sched_dynamic_show(struct seq_file *m, void *v)
{
static const char * preempt_modes[] = {
- "none", "voluntary", "full", "lazy",
+ "none", "voluntary", "full", "lazy", "laziest",
};
- int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
+ int j = ARRAY_SIZE(preempt_modes) - 2*!IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2;

for (; i < j; i++) {