[PATCH 4/5] sched/fair: use load instead of runnable load

From: Vincent Guittot
Date: Fri Jul 19 2019 - 03:59:15 EST


runnable load has been introduced to take into account the case
where blocked load biases the load balance decision which was selecting
underutilized group with huge blocked load whereas other groups were
overloaded.

The load is now only used when groups are overloaded. In this case,
it's worth being conservative and taking into account the sleeping
tasks that might wakeup on the cpu.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 472959df..c221713 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5371,6 +5371,11 @@ static unsigned long cpu_runnable_load(struct rq *rq)
return cfs_rq_runnable_load_avg(&rq->cfs);
}

+static unsigned long cpu_load(struct rq *rq)
+{
+ return cfs_rq_load_avg(&rq->cfs);
+}
+
static unsigned long capacity_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity;
@@ -5466,7 +5471,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
s64 this_eff_load, prev_eff_load;
unsigned long task_load;

- this_eff_load = cpu_runnable_load(cpu_rq(this_cpu));
+ this_eff_load = cpu_load(cpu_rq(this_cpu));

if (sync) {
unsigned long current_load = task_h_load(current);
@@ -5484,7 +5489,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
this_eff_load *= 100;
this_eff_load *= capacity_of(prev_cpu);

- prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu));
+ prev_eff_load = cpu_load(cpu_rq(prev_cpu));
prev_eff_load -= task_load;
if (sched_feat(WA_BIAS))
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5572,7 +5577,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
max_spare_cap = 0;

for_each_cpu(i, sched_group_span(group)) {
- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
runnable_load += load;

avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5708,7 +5713,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
shallowest_idle_cpu = i;
}
} else if (shallowest_idle_cpu == -1) {
- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
if (load < min_load) {
min_load = load;
least_loaded_cpu = i;
@@ -8030,7 +8035,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
env->flags |= LBF_NOHZ_AGAIN;

- sgs->group_load += cpu_runnable_load(rq);
+ sgs->group_load += cpu_load(rq);
sgs->group_util += cpu_util(i);
sgs->sum_h_nr_running += rq->cfs.h_nr_running;
nr_running = rq->nr_running;
@@ -8446,7 +8451,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
init_sd_lb_stats(&sds);

/*
- * Compute the various statistics relavent for load balancing at
+ * Compute the various statistics relevant for load balancing at
* this level.
*/
update_sd_lb_stats(env, &sds);
@@ -8641,10 +8646,10 @@ static struct rq *find_busiest_queue(struct lb_env *env,
}

/*
- * When comparing with load imbalance, use weighted_cpuload()
+ * When comparing with load imbalance, use cpu_load()
* which is not scaled with the CPU capacity.
*/
- load = cpu_runnable_load(rq);
+ load = cpu_load(rq);

if (rq->nr_running == 1 && load > env->imbalance &&
!check_cpu_capacity(rq, env->sd))
--
2.7.4