[PATCH] sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

From: Pierre . Gondois
Date: Tue Apr 20 2021 - 08:57:39 EST


From: Pierre Gondois <Pierre.Gondois@xxxxxxx>

find_energy_efficient_cpu() (feec()) searches the best energy CPU
to place a task on. To do so, compute_energy() estimates the energy
impact of placing the task on a CPU, based on CPU and task utilization
signals.

Utilization signals can be concurrently updated while evaluating a
perf_domain. In some cases, this leads to having a 'negative delta',
i.e. placing the task in the perf_domain is seen as an energy gain.
Thus, any further energy comparison is biased.

In case of a 'negative delta', return prev_cpu since:
1. a 'negative delta' happens in less than 0.5% of feec() calls,
on a Juno with 6 CPUs (4 little, 2 big)
2. it is unlikely to have two consecutive 'negative delta' for
a task, so if the first call fails, feec() will correctly
place the task in the next feec() call
3. EAS current behavior tends to select prev_cpu if the task
doesn't raise the OPP of its current perf_domain. prev_cpu
is EAS's generic decision
4. prev_cpu should be preferred to returning an error code.
In the latter case, select_idle_sibling() would do the placement,
selecting a big (and not energy efficient) CPU. As 3., the task
would potentially reside on the big CPU for a long time

The patch also:
a. groups the compute_energy() calls to lower the chances of having
concurrent updates in between the calls
b. skips the base_energy_pd computation if no CPU is available in a
perf_domain

Fixes: eb92692b2544d sched/fair: Speed-up energy-aware wake-up
Reported-by: Xuewen Yan <xuewen.yan@xxxxxxxxxx>
Suggested-by: Xuewen Yan <xuewen.yan@xxxxxxxxxx>
Signed-off-by: Pierre Gondois <Pierre.Gondois@xxxxxxx>
---
kernel/sched/fair.c | 69 +++++++++++++++++++++++++--------------------
1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0dba0ebc3657..577482aa8919 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6594,8 +6594,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
{
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ int cpu, best_energy_cpu = prev_cpu, target = -1;
unsigned long cpu_cap, util, base_energy = 0;
- int cpu, best_energy_cpu = prev_cpu;
struct sched_domain *sd;
struct perf_domain *pd;

@@ -6614,19 +6614,18 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!sd)
goto fail;

+ target = prev_cpu;
+
sync_entity_load_avg(&p->se);
if (!task_util_est(p))
- goto unlock;
+ goto fail;

for (; pd; pd = pd->next) {
unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+ bool compute_prev_delta = false;
unsigned long base_energy_pd;
int max_spare_cap_cpu = -1;

- /* Compute the 'base' energy of the pd, without @p */
- base_energy_pd = compute_energy(p, -1, pd);
- base_energy += base_energy_pd;
-
for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
@@ -6647,26 +6646,41 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!fits_capacity(util, cpu_cap))
continue;

- /* Always use prev_cpu as a candidate. */
if (cpu == prev_cpu) {
- prev_delta = compute_energy(p, prev_cpu, pd);
- prev_delta -= base_energy_pd;
- best_delta = min(best_delta, prev_delta);
- }
-
- /*
- * Find the CPU with the maximum spare capacity in
- * the performance domain
- */
- if (spare_cap > max_spare_cap) {
+ /* Always use prev_cpu as a candidate. */
+ compute_prev_delta = true;
+ } else if (spare_cap > max_spare_cap) {
+ /*
+ * Find the CPU with the maximum spare capacity
+ * in the performance domain.
+ */
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
}

+ if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+ continue;
+
+ /* Compute the 'base' energy of the pd, without @p */
+ base_energy_pd = compute_energy(p, -1, pd);
+ base_energy += base_energy_pd;
+
+ if (compute_prev_delta) {
+ prev_delta = compute_energy(p, prev_cpu, pd);
+ /* Prevent negative deltas and select prev_cpu */
+ if (prev_delta < base_energy_pd)
+ goto fail;
+ prev_delta -= base_energy_pd;
+ best_delta = min(best_delta, prev_delta);
+ }
+
/* Evaluate the energy impact of using this CPU. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+ if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+ /* Prevent negative deltas and select prev_cpu */
+ if (cur_delta < base_energy_pd)
+ goto fail;
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
best_delta = cur_delta;
@@ -6674,25 +6688,20 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
}
}
-unlock:
- rcu_read_unlock();

/*
- * Pick the best CPU if prev_cpu cannot be used, or if it saves at
- * least 6% of the energy used by prev_cpu.
+ * Pick the best CPU if:
+ * - prev_cpu cannot be used, or
+ * - it saves at least 6% of the energy used by prev_cpu
*/
- if (prev_delta == ULONG_MAX)
- return best_energy_cpu;
-
- if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
- return best_energy_cpu;
-
- return prev_cpu;
+ if ((prev_delta == ULONG_MAX) ||
+ (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+ target = best_energy_cpu;

fail:
rcu_read_unlock();

- return -1;
+ return target;
}

/*
--
2.17.1