Re: [PATCH 5/5] sched: Add laziest preempt model
From: Ankur Arora
Date: Tue Oct 08 2024 - 02:00:13 EST
Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes:
> Much like LAZY, except lazier still. It will not promote LAZY to full
> preempt on tick and compete with None for suckage.
Yeah, none at least has cond_resched().
Can't think of any workload that would benefit from this though. Maybe
if you have a big cache footprint?
> (do we really wants this?)
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> ---
> include/linux/preempt.h | 10 ++++++++-
> kernel/Kconfig.preempt | 12 +++++++++++
> kernel/sched/core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-
> kernel/sched/debug.c | 4 +--
> 4 files changed, 71 insertions(+), 4 deletions(-)
>
> --- a/include/linux/preempt.h
> +++ b/include/linux/preempt.h
> @@ -487,6 +487,7 @@ extern bool preempt_model_none(void);
> extern bool preempt_model_voluntary(void);
> extern bool preempt_model_full(void);
> extern bool preempt_model_lazy(void);
> +extern bool preempt_model_laziest(void);
>
> #else
>
> @@ -507,6 +508,10 @@ static inline bool preempt_model_lazy(vo
> {
> return IS_ENABLED(CONFIG_PREEMPT_LAZY);
> }
> +static inline bool preempt_model_laziest(void)
> +{
> + return IS_ENABLED(CONFIG_PREEMPT_LAZIEST);
> +}
>
> #endif
>
> @@ -525,7 +530,10 @@ static inline bool preempt_model_rt(void
> */
> static inline bool preempt_model_preemptible(void)
> {
> - return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
> + return preempt_model_full() ||
> + preempt_model_lazy() ||
> + preempt_model_laziest() ||
> + preempt_model_rt();
> }
>
> #endif /* __LINUX_PREEMPT_H */
> --- a/kernel/Kconfig.preempt
> +++ b/kernel/Kconfig.preempt
> @@ -84,6 +84,18 @@ config PREEMPT_LAZY
> reduce lock holder preemption and recover some of the performance
> gains seen from using Voluntary preemption.
>
> +config PREEMPT_LAZIEST
> + bool "Scheduler controlled preemption model"
> + depends on !ARCH_NO_PREEMPT
> + depends on ARCH_HAS_PREEMPT_LAZY
> + select PREEMPT_BUILD if !PREEMPT_DYNAMIC
> + help
> + This option provides a scheduler driven preemption model that
> + is fundamentally similar to full preemption, but is least
> + eager to preempt SCHED_NORMAL tasks in an attempt to
> + reduce lock holder preemption and recover some of the performance
> + gains seen from using no preemption.
> +
> endchoice
>
> config PREEMPT_RT
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1108,13 +1108,22 @@ void resched_curr(struct rq *rq)
>
> #ifdef CONFIG_PREEMPT_DYNAMIC
> static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
> +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_promote);
> static __always_inline bool dynamic_preempt_lazy(void)
> {
> return static_branch_unlikely(&sk_dynamic_preempt_lazy);
> }
> +static __always_inline bool dynamic_preempt_promote(void)
> +{
> + return static_branch_unlikely(&sk_dynamic_preempt_promote);
> +}
> #else
> static __always_inline bool dynamic_preempt_lazy(void)
> {
> + return IS_ENABLED(PREEMPT_LAZY) | IS_ENABLED(PREEMPT_LAZIEST);
> +}
> +static __always_inline bool dynamic_preempt_promote(void)
> +{
> return IS_ENABLED(PREEMPT_LAZY);
> }
> #endif
> @@ -5628,7 +5637,7 @@ void sched_tick(void)
> hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
> update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
>
> - if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
> + if (dynamic_preempt_promote() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
> resched_curr(rq);
>
> curr->sched_class->task_tick(rq, curr, 0);
> @@ -7368,6 +7377,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule_notrace <- NOP
> * irqentry_exit_cond_resched <- NOP
> * dynamic_preempt_lazy <- false
> + * dynamic_preempt_promote <- false
> *
> * VOLUNTARY:
> * cond_resched <- __cond_resched
> @@ -7376,6 +7386,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule_notrace <- NOP
> * irqentry_exit_cond_resched <- NOP
> * dynamic_preempt_lazy <- false
> + * dynamic_preempt_promote <- false
> *
> * FULL:
> * cond_resched <- RET0
> @@ -7384,6 +7395,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule_notrace <- preempt_schedule_notrace
> * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
> * dynamic_preempt_lazy <- false
> + * dynamic_preempt_promote <- false
> *
> * LAZY:
> * cond_resched <- RET0
> @@ -7392,6 +7404,16 @@ EXPORT_SYMBOL(__cond_resched_rwlock_writ
> * preempt_schedule_notrace <- preempt_schedule_notrace
> * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
> * dynamic_preempt_lazy <- true
> + * dynamic_preempt_promote <- true
> + *
> + * LAZIEST:
> + * cond_resched <- RET0
> + * might_resched <- RET0
> + * preempt_schedule <- preempt_schedule
> + * preempt_schedule_notrace <- preempt_schedule_notrace
> + * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
> + * dynamic_preempt_lazy <- true
> + * dynamic_preempt_promote <- false
> */
>
> enum {
> @@ -7400,6 +7422,7 @@ enum {
> preempt_dynamic_voluntary,
> preempt_dynamic_full,
> preempt_dynamic_lazy,
> + preempt_dynamic_laziest,
> };
>
> int preempt_dynamic_mode = preempt_dynamic_undefined;
> @@ -7420,6 +7443,9 @@ int sched_dynamic_mode(const char *str)
> #ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
> if (!strcmp(str, "lazy"))
> return preempt_dynamic_lazy;
> +
> + if (!strcmp(str, "laziest"))
> + return preempt_dynamic_laziest;
> #endif
>
> return -EINVAL;
> @@ -7454,6 +7480,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_enable(preempt_schedule_notrace);
> preempt_dynamic_enable(irqentry_exit_cond_resched);
> preempt_dynamic_key_disable(preempt_lazy);
> + preempt_dynamic_key_disable(preempt_promote);
>
> switch (mode) {
> case preempt_dynamic_none:
> @@ -7464,6 +7491,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_disable(preempt_schedule_notrace);
> preempt_dynamic_disable(irqentry_exit_cond_resched);
> preempt_dynamic_key_disable(preempt_lazy);
> + preempt_dynamic_key_disable(preempt_promote);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: none\n");
> break;
> @@ -7476,6 +7504,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_disable(preempt_schedule_notrace);
> preempt_dynamic_disable(irqentry_exit_cond_resched);
> preempt_dynamic_key_disable(preempt_lazy);
> + preempt_dynamic_key_disable(preempt_promote);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: voluntary\n");
> break;
> @@ -7488,6 +7517,7 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_enable(preempt_schedule_notrace);
> preempt_dynamic_enable(irqentry_exit_cond_resched);
> preempt_dynamic_key_disable(preempt_lazy);
> + preempt_dynamic_key_disable(preempt_promote);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: full\n");
> break;
> @@ -7500,9 +7530,23 @@ static void __sched_dynamic_update(int m
> preempt_dynamic_enable(preempt_schedule_notrace);
> preempt_dynamic_enable(irqentry_exit_cond_resched);
> preempt_dynamic_key_enable(preempt_lazy);
> + preempt_dynamic_key_enable(preempt_promote);
> if (mode != preempt_dynamic_mode)
> pr_info("Dynamic Preempt: lazy\n");
> break;
> +
> + case preempt_dynamic_laziest:
> + if (!klp_override)
> + preempt_dynamic_disable(cond_resched);
> + preempt_dynamic_disable(might_resched);
> + preempt_dynamic_enable(preempt_schedule);
> + preempt_dynamic_enable(preempt_schedule_notrace);
> + preempt_dynamic_enable(irqentry_exit_cond_resched);
> + preempt_dynamic_key_enable(preempt_lazy);
> + preempt_dynamic_key_disable(preempt_promote);
> + if (mode != preempt_dynamic_mode)
> + pr_info("Dynamic Preempt: laziest\n");
> + break;
> }
>
> preempt_dynamic_mode = mode;
> @@ -7567,6 +7611,8 @@ static void __init preempt_dynamic_init(
> sched_dynamic_update(preempt_dynamic_voluntary);
> } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
> sched_dynamic_update(preempt_dynamic_lazy);
> + } else if (IS_ENABLED(CONFIG_PREEMPT_LAZIEST)) {
> + sched_dynamic_update(preempt_dynamic_laziest);
> } else {
> /* Default static call setting, nothing to do */
> WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
> @@ -7588,6 +7634,7 @@ PREEMPT_MODEL_ACCESSOR(none);
> PREEMPT_MODEL_ACCESSOR(voluntary);
> PREEMPT_MODEL_ACCESSOR(full);
> PREEMPT_MODEL_ACCESSOR(lazy);
> +PREEMPT_MODEL_ACCESSOR(laziest);
>
> #else /* !CONFIG_PREEMPT_DYNAMIC: */
>
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -245,9 +245,9 @@ static ssize_t sched_dynamic_write(struc
> static int sched_dynamic_show(struct seq_file *m, void *v)
> {
> static const char * preempt_modes[] = {
> - "none", "voluntary", "full", "lazy",
> + "none", "voluntary", "full", "lazy", "laziest",
> };
> - int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
> + int j = ARRAY_SIZE(preempt_modes) - 2*!IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
> int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2;
>
> for (; i < j; i++) {
--
ankur