Re: [PATCH v3 2/2] sched: Remove the limitation of WF_ON_CPU on wakelist if wakee cpu is idle

From: Valentin Schneider
Date: Mon Jun 06 2022 - 06:39:16 EST


On 02/06/22 12:06, Tianchen Ding wrote:

> Signed-off-by: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx>

Minor nits below, otherwise:

Reviewed-by: Valentin Schneider <vschneid@xxxxxxxxxx>

> ---
> kernel/sched/core.c | 27 ++++++++++++++++-----------
> kernel/sched/sched.h | 1 -
> 2 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a9efe134fbe5..ccb9e0fbf49b 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3808,7 +3808,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
> return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
> }
>
> -static inline bool ttwu_queue_cond(int cpu, int wake_flags)
> +static inline bool ttwu_queue_cond(int cpu)
> {
> /*
> * Do not complicate things with the async wake_list while the CPU is
> @@ -3824,13 +3824,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
> if (!cpus_share_cache(smp_processor_id(), cpu))
> return true;
>
> + if (cpu == smp_processor_id())
> + return false;
> +
> /*
> - * If the task is descheduling and the only running task on the
> - * CPU then use the wakelist to offload the task activation to
> - * the soon-to-be-idle CPU as the current CPU is likely busy.
> - * nr_running is checked to avoid unnecessary task stacking.
> + * If the wakee cpu is idle, or the task is descheduling and the
> + * only running task on the CPU, then use the wakelist to offload
> + * the task activation to the idle (or soon-to-be-idle) CPU as
> + * the current CPU is likely busy. nr_running is checked to
> + * avoid unnecessary task stacking.
> + *

> + * Note that we can only get here with (wakee) p->on_rq=0,
> + * p->on_cpu can be whatever, we've done the dequeue, so
> + * the wakee has been accounted out of ->nr_running.

If you want to include it, that comment should be added in patch 1 rather
than in patch 2.

> */
> - if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running)
> + if (!cpu_rq(cpu)->nr_running)
> return true;
>
> return false;
> @@ -3838,10 +3846,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
>
> static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
> {
> - if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
> - if (WARN_ON_ONCE(cpu == smp_processor_id()))
> - return false;
> -
> + if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
> sched_clock_cpu(cpu); /* Sync clocks across CPUs */
> __ttwu_queue_wakelist(p, cpu, wake_flags);
> return true;
> @@ -4163,7 +4168,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
> * scheduling.
> */
> if (smp_load_acquire(&p->on_cpu) &&
> - ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
> + ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
> goto unlock;
>
> /*
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 01259611beb9..1e34bb4527fd 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2039,7 +2039,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
>
> #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
> #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
> -#define WF_ON_CPU 0x40 /* Wakee is on_cpu */

There still is a reference to WF_ON_CPU in a comment in prepare_task(),
maybe change that one to "smp_load_acquire(&p->on_cpu)".

>
> #ifdef CONFIG_SMP
> static_assert(WF_EXEC == SD_BALANCE_EXEC);
> --
> 2.27.0