[tip: sched/core] sched/fair: Use load instead of runnable load in wakeup path

From: tip-bot2 for Vincent Guittot
Date: Mon Oct 21 2019 - 05:13:22 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 11f10e5420f6cecac7d4823638bff040c257aba9
Gitweb: https://git.kernel.org/tip/11f10e5420f6cecac7d4823638bff040c257aba9
Author: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
AuthorDate: Fri, 18 Oct 2019 15:26:36 +02:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Mon, 21 Oct 2019 09:40:55 +02:00

sched/fair: Use load instead of runnable load in wakeup path

Runnable load was originally introduced to take into account the case where
blocked load biases the wake up path which may end to select an overloaded
CPU with a large number of runnable tasks instead of an underutilized
CPU with a huge blocked load.

Tha wake up path now starts looking for idle CPUs before comparing
runnable load and it's worth aligning the wake up path with the
load_balance() logic.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Morten.Rasmussen@xxxxxxx
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: hdanton@xxxxxxxx
Cc: parth@xxxxxxxxxxxxx
Cc: pauld@xxxxxxxxxx
Cc: quentin.perret@xxxxxxx
Cc: riel@xxxxxxxxxxx
Cc: srikar@xxxxxxxxxxxxxxxxxx
Cc: valentin.schneider@xxxxxxx
Link: https://lkml.kernel.org/r/1571405198-27570-10-git-send-email-vincent.guittot@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/sched/fair.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1fd6f39..b0703b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1474,7 +1474,12 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
}

-static unsigned long cpu_runnable_load(struct rq *rq);
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+
+static unsigned long cpu_runnable_load(struct rq *rq)
+{
+ return cfs_rq_runnable_load_avg(&rq->cfs);
+}

/* Cached statistics for all CPUs within a node */
struct numa_stats {
@@ -5370,11 +5375,6 @@ static int sched_idle_cpu(int cpu)
rq->nr_running);
}

-static unsigned long cpu_runnable_load(struct rq *rq)
-{
- return cfs_rq_runnable_load_avg(&rq->cfs);
-}
-
static unsigned long cpu_load(struct rq *rq)
{
return cfs_rq_load_avg(&rq->cfs);
@@ -5475,7 +5475,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
s64 this_eff_load, prev_eff_load;
unsigned long task_load;

- this_eff_load = cpu_runnable_load(cpu_rq(this_cpu));
+ this_eff_load = cpu_load(cpu_rq(this_cpu));

if (sync) {
unsigned long current_load = task_h_load(current);
@@ -5493,7 +5493,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
this_eff_load *= 100;
this_eff_load *= capacity_of(prev_cpu);

- prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu));
+ prev_eff_load = cpu_load(cpu_rq(prev_cpu));
prev_eff_load -= task_load;
if (sched_feat(WA_BIAS))
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5581,7 +5581,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
max_spare_cap = 0;

for_each_cpu(i, sched_group_span(group)) {
- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
runnable_load += load;

avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5722,7 +5722,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
continue;
}

- load = cpu_runnable_load(cpu_rq(i));
+ load = cpu_load(cpu_rq(i));
if (load < min_load) {
min_load = load;
least_loaded_cpu = i;