Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

From: Vincent Guittot
Date: Tue Apr 15 2025 - 09:53:21 EST


On Tue, 25 Mar 2025 at 12:16, Christian Loehle <christian.loehle@xxxxxxx> wrote:
>
> On 3/2/25 21:05, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task doesn't have wakeup events anymore or at a far
> > too low pace. For such situation, we can take advantage of the task being
> > put back in the enqueued list to check if it should be pushed on another
> > CPU. When the task is alone on the CPU, it's never put back in the enqueued
> > list; In this special case, we use the tick to run the check.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> > ---
> > kernel/sched/fair.c | 220 +++++++++++++++++++++++++++++++++++++++++++
> > kernel/sched/sched.h | 2 +
> > 2 files changed, 222 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index a9b97bbc085f..c3e383b86808 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > hrtick_update(rq);
> > }
> >
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> > static void set_next_buddy(struct sched_entity *se);
> >
> > /*
> > @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > h_nr_idle = task_has_idle_policy(p);
> > if (task_sleep || task_delayed || !se->sched_delayed)
> > h_nr_runnable = 1;
> > +
> > + fair_remove_pushable_task(rq, p);
> > } else {
> > cfs_rq = group_cfs_rq(se);
> > slice = cfs_rq_min_slice(cfs_rq);
> > @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > return target;
> > }
> >
> > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > +{
> > + unsigned long max_capa, util;
> > +
> > + max_capa = min(get_actual_cpu_capacity(cpu),
> > + uclamp_eff_value(p, UCLAMP_MAX));
> > + util = max(task_util_est(p), task_runnable(p));
> > +
> > + /*
> > + * Return true only if the task might not sleep/wakeup because of a low
> > + * compute capacity. Tasks, which wake up regularly, will be handled by
> > + * feec().
> > + */
> > + return (util > max_capa);
> > +}
> > +
> > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > + if (p->nr_cpus_allowed == 1)
> > + return false;
> > +
> > + if (is_rd_overutilized(rq->rd))
> > + return false;
> > +
> > + if (task_stuck_on_cpu(p, cpu_of(rq)))
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static int active_load_balance_cpu_stop(void *data);
> > +
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> > +{
> > + int new_cpu, cpu = cpu_of(rq);
> > +
> > + if (!sched_energy_enabled())
> > + return;
> > +
> > + if (WARN_ON(!p))
> > + return;
> > +
> > + if (WARN_ON(!task_current(rq, p)))
> > + return;
> > +
> > + if (is_migration_disabled(p))
> > + return;
> > +
> > + /* If there are several task, wait for being put back */
> > + if (rq->nr_running > 1)
> > + return;
> > +
> > + if (!sched_energy_push_task(p, rq))
> > + return;
> > +
> > + new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > + if (new_cpu == cpu)
> > + return;
> > +
> > + /*
> > + * ->active_balance synchronizes accesses to
> > + * ->active_balance_work. Once set, it's cleared
> > + * only after active load balance is finished.
> > + */
> > + if (!rq->active_balance) {
> > + rq->active_balance = 1;
> > + rq->push_cpu = new_cpu;
> > + } else
> > + return;
> > +
> > + raw_spin_rq_unlock(rq);
> > + stop_one_cpu_nowait(cpu,
> > + active_load_balance_cpu_stop, rq,
> > + &rq->active_balance_work);
> > + raw_spin_rq_lock(rq);
> > +}
> > +
> > +static inline int has_pushable_tasks(struct rq *rq)
> > +{
> > + return !plist_head_empty(&rq->cfs.pushable_tasks);
> > +}
> > +
> > +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> > +{
> > + struct task_struct *p;
> > +
> > + if (!has_pushable_tasks(rq))
> > + return NULL;
> > +
> > + p = plist_first_entry(&rq->cfs.pushable_tasks,
> > + struct task_struct, pushable_tasks);
> > +
> > + WARN_ON_ONCE(rq->cpu != task_cpu(p));
> > + WARN_ON_ONCE(task_current(rq, p));
> > + WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> > + WARN_ON_ONCE(!task_on_rq_queued(p));
> > +
> > + /*
> > + * Remove task from the pushable list as we try only once after that
> > + * the task has been put back in enqueued list.
> > + */
> > + plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +
> > + return p;
> > +}
> > +
> > +/*
> > + * See if the non running fair tasks on this rq can be sent on other CPUs
> > + * that fits better with their profile.
> > + */
> > +static bool push_fair_task(struct rq *rq)
> > +{
> > + struct task_struct *next_task;
> > + int prev_cpu, new_cpu;
> > + struct rq *new_rq;
> > +
> > + next_task = pick_next_pushable_fair_task(rq);
> > + if (!next_task)
> > + return false;
> > +
> > + if (is_migration_disabled(next_task))
> > + return true;
> > +
> > + /* We might release rq lock */
> > + get_task_struct(next_task);
> > +
> > + prev_cpu = rq->cpu;
> > +
> > + new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
>
> We aren't gating this on a overutilized check for both call sites of this patch

The overutilized check has been done when adding the task to the list.


> like the other feec() call and testing shows that this calls feec when OU
> relatively often.
> Why would it be OK to call feec() here when it isn't on task placement?
>
> > +
> > + if (new_cpu == prev_cpu)
> > + goto out;
> > +
> > + new_rq = cpu_rq(new_cpu);
> > +
> > + if (double_lock_balance(rq, new_rq)) {
> > + /* The task has already migrated in between */
> > + if (task_cpu(next_task) != rq->cpu) {
> > + double_unlock_balance(rq, new_rq);
> > + goto out;
> > + }
> > +
> > + deactivate_task(rq, next_task, 0);
> > + set_task_cpu(next_task, new_cpu);
> > + activate_task(new_rq, next_task, 0);
> > +
> > + resched_curr(new_rq);
> > +
> > + double_unlock_balance(rq, new_rq);
> > + }
> > +
> > +out:
> > + put_task_struct(next_task);
> > +
> > + return true;
> > +}
> > +
> > +static void push_fair_tasks(struct rq *rq)
> > +{
> > + /* push_fair_task() will return true if it moved a fair task */
>
> This isn't technically true, a bit of a nit, push_fair_task() also
> will return true when the task found wasn't moveable.
>
> [snip]