[PATCH v4 09/11] sched/fair: use load instead of runnable load in wakeup path

From: Vincent Guittot
Date: Fri Oct 18 2019 - 09:27:05 EST


runnable load has been introduced to take into account the case where
blocked load biases the wake up path which may end to select an overloaded
CPU with a large number of runnable tasks instead of an underutilized
CPU with a huge blocked load.

Tha wake up path now starts to looks for idle CPUs before comparing
runnable load and it's worth aligning the wake up path with the
load_balance.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 670856d..6203e71 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1475,7 +1475,12 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
}

-static unsigned long cpu_runnable_load(struct rq *rq);
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+
+static unsigned long cpu_runnable_load(struct rq *rq)
+{
+ return cfs_rq_runnable_load_avg(&rq->cfs);
+}

/* Cached statistics for all CPUs within a node */
struct numa_stats {
@@ -5380,11 +5385,6 @@ static int sched_idle_cpu(int cpu)
rq->nr_running);
}

-static unsigned long cpu_runnable_load(struct rq *rq)
-{
- return cfs_rq_runnable_load_avg(&rq->cfs);
-}
-
static unsigned long cpu_load(struct rq *rq)
{
return cfs_rq_load_avg(&rq->cfs);
@@ -5485,7 +5485,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
s64 this_eff_load, prev_eff_load;
unsigned long task_load;

- this_eff_load = cpu_runnable_load(cpu_rq(this_cpu));
+ this_eff_load = cpu_load(cpu_rq(this_cpu));

if (sync) {
unsigned long current_load = task_h_load(current);
@@ -5503,7 +5503,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
this_eff_load *= 100;
this_eff_load *= capacity_of(prev_cpu);

- prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu));
+ prev_eff_load = cpu_load(cpu_rq(prev_cpu));
prev_eff_load -= task_load;
if (sched_feat(WA_BIAS))
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5591,7 +5591,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
max_spare_cap = 0;

for_each_cpu(i, sched_group_span(group)) {
- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
runnable_load += load;

avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5732,7 +5732,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
continue;
}

- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
if (load < min_load) {
min_load = load;
least_loaded_cpu = i;
--
2.7.4