Re: [PATCH 6/7 v2] sched/fair: Add misfit case to push task callback for EAS

From: Pierre Gondois
Date: Thu Jan 16 2025 - 12:36:05 EST




On 12/17/24 17:07, Vincent Guittot wrote:
Some task misfit cases can be handled directly by the push callback
instead of triggering an idle load balance to pull the task on a better
CPU.

Aren't misfit tasks migrated using active_load_balance_cpu_stop() rather than
the push mechanism ?

Also, I don't see cases where a misfit task would not be migrated by either
the push mechanism or the misfit handling present in this patch. Is it possible
to detail a case where the misfit load balancer would still be needed ?


Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>

# Conflicts:
# kernel/sched/fair.c
---
kernel/sched/fair.c | 53 +++++++++++++++++++++++++++++----------------
1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2affc063da55..9bddb094ee21 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8541,6 +8541,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
target_stat.runnable = cpu_runnable(cpu_rq(cpu));
target_stat.capa = capacity_of(cpu);
target_stat.nr_running = cpu_rq(cpu)->cfs.h_nr_runnable;
+ if ((p->on_rq) && (!p->se.sched_delayed) && (cpu == prev_cpu))
+ target_stat.nr_running--;
/* If the target needs a lower OPP, then look up for
* the corresponding OPP and its associated cost.
@@ -8623,48 +8625,58 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
static inline bool task_misfit_cpu(struct task_struct *p, int cpu)
{
- unsigned long max_capa = get_actual_cpu_capacity(cpu);
- unsigned long util = task_util_est(p);
+ unsigned long max_capa, util;
+
+ if (p->nr_cpus_allowed == 1)
+ return false;
- max_capa = min(max_capa, uclamp_eff_value(p, UCLAMP_MAX));
- util = max(util, task_runnable(p));
+ max_capa = min(get_actual_cpu_capacity(cpu),
+ uclamp_eff_value(p, UCLAMP_MAX));
+ util = max(task_util_est(p), task_runnable(p));
/*
* Return true only if the task might not sleep/wakeup because of a low
* compute capacity. Tasks, which wake up regularly, will be handled by
* feec().
*/
- return (util > max_capa);
+ if (util > max_capa)
+ return true;
+
+ /* Return true if the task doesn't fit anymore to run on the cpu */
+ if ((arch_scale_cpu_capacity(cpu) < p->max_allowed_capacity) && !task_fits_cpu(p, cpu))
+ return true;

This logic seems to already be present in update_misfit_status(). Maybe it would be
good to factorize it to have a common criteria for misfit tasks.

+
+ return false;
}
static int active_load_balance_cpu_stop(void *data);
-static inline void migrate_misfit_task(struct task_struct *p, struct rq *rq)
+static inline bool migrate_misfit_task(struct task_struct *p, struct rq *rq)
{
int new_cpu, cpu = cpu_of(rq);
if (!sched_energy_enabled() || is_rd_overutilized(rq->rd))
- return;
+ return false;
if (WARN_ON(!p))
- return;
+ return false;
- if (WARN_ON(p != rq->curr))
- return;
+ if (WARN_ON(!task_current(rq, p)))
+ return false;
if (is_migration_disabled(p))
- return;
+ return false;
- if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
- return;
+ if (rq->nr_running > 1)
+ return false;

NIT: Maybe the condition (p->nr_cpus_allowed == 1) could have already been
part of task_misfit_cpu() in the previous patch.

if (!task_misfit_cpu(p, cpu))
- return;
+ return false;
new_cpu = find_energy_efficient_cpu(p, cpu);
if (new_cpu == cpu)
- return;
+ return false;
/*
* ->active_balance synchronizes accesses to
@@ -8675,13 +8687,15 @@ static inline void migrate_misfit_task(struct task_struct *p, struct rq *rq)
rq->active_balance = 1;
rq->push_cpu = new_cpu;
} else
- return;
+ return false;
raw_spin_rq_unlock(rq);
stop_one_cpu_nowait(cpu,
active_load_balance_cpu_stop, rq,
&rq->active_balance_work);
raw_spin_rq_lock(rq);
+
+ return true;
}
static inline int has_pushable_tasks(struct rq *rq)
@@ -13299,9 +13313,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
- migrate_misfit_task(curr, rq);
- update_misfit_status(curr, rq);
- check_update_overutilized_status(task_rq(curr));
+ if (!migrate_misfit_task(curr, rq)) {
+ update_misfit_status(curr, rq);

If the system is not-OU, the only case I see where migrate_misfit_task() would
not detect a misfit task and update_misfit_status() would is if there is another
task on the rq. I.e. through:
migrate_misfit_task()
\-if (rq->nr_running > 1) return false;

However in this case, the push callback should migrate the misfit task. So is it still
necessary to look for misfit task through sched_balance_find_src_group() ?

+ check_update_overutilized_status(task_rq(curr));
+ }
task_tick_core(rq, curr);
}