[tip: sched/core] sched/fair: Optimize find_idlest_group()

From: tip-bot2 for Vincent Guittot
Date: Mon Oct 21 2019 - 05:13:43 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: fc1273f4cefe6670d528715581c848abf64f391c
Gitweb: https://git.kernel.org/tip/fc1273f4cefe6670d528715581c848abf64f391c
Author: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
AuthorDate: Fri, 18 Oct 2019 15:26:37 +02:00
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Mon, 21 Oct 2019 09:40:55 +02:00

sched/fair: Optimize find_idlest_group()

find_idlest_group() now reads CPU's load_avg in two different ways.

Consolidate the function to read and use load_avg only once and simplify
the algorithm to only look for the group with lowest load_avg.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Morten.Rasmussen@xxxxxxx
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: hdanton@xxxxxxxx
Cc: parth@xxxxxxxxxxxxx
Cc: pauld@xxxxxxxxxx
Cc: quentin.perret@xxxxxxx
Cc: riel@xxxxxxxxxxx
Cc: srikar@xxxxxxxxxxxxxxxxxx
Cc: valentin.schneider@xxxxxxx
Link: https://lkml.kernel.org/r/1571405198-27570-11-git-send-email-vincent.guittot@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/sched/fair.c | 50 ++++++++++++--------------------------------
1 file changed, 14 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b0703b4..95a57c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5550,16 +5550,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
{
struct sched_group *idlest = NULL, *group = sd->groups;
struct sched_group *most_spare_sg = NULL;
- unsigned long min_runnable_load = ULONG_MAX;
- unsigned long this_runnable_load = ULONG_MAX;
- unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
+ unsigned long min_load = ULONG_MAX, this_load = ULONG_MAX;
unsigned long most_spare = 0, this_spare = 0;
int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
(sd->imbalance_pct-100) / 100;

do {
- unsigned long load, avg_load, runnable_load;
+ unsigned long load;
unsigned long spare_cap, max_spare_cap;
int local_group;
int i;
@@ -5576,15 +5574,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
* Tally up the load of all CPUs in the group and find
* the group containing the CPU with most spare capacity.
*/
- avg_load = 0;
- runnable_load = 0;
+ load = 0;
max_spare_cap = 0;

for_each_cpu(i, sched_group_span(group)) {
- load = cpu_load(cpu_rq(i));
- runnable_load += load;
-
- avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
+ load += cpu_load(cpu_rq(i));

spare_cap = capacity_spare_without(i, p);

@@ -5593,31 +5587,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
}

/* Adjust by relative CPU capacity of the group */
- avg_load = (avg_load * SCHED_CAPACITY_SCALE) /
- group->sgc->capacity;
- runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
+ load = (load * SCHED_CAPACITY_SCALE) /
group->sgc->capacity;

if (local_group) {
- this_runnable_load = runnable_load;
- this_avg_load = avg_load;
+ this_load = load;
this_spare = max_spare_cap;
} else {
- if (min_runnable_load > (runnable_load + imbalance)) {
- /*
- * The runnable load is significantly smaller
- * so we can pick this new CPU:
- */
- min_runnable_load = runnable_load;
- min_avg_load = avg_load;
- idlest = group;
- } else if ((runnable_load < (min_runnable_load + imbalance)) &&
- (100*min_avg_load > imbalance_scale*avg_load)) {
- /*
- * The runnable loads are close so take the
- * blocked load into account through avg_load:
- */
- min_avg_load = avg_load;
+ if (load < min_load) {
+ min_load = load;
idlest = group;
}

@@ -5658,18 +5636,18 @@ skip_spare:
* local domain to be very lightly loaded relative to the remote
* domains but "imbalance" skews the comparison making remote CPUs
* look much more favourable. When considering cross-domain, add
- * imbalance to the runnable load on the remote node and consider
- * staying local.
+ * imbalance to the load on the remote node and consider staying
+ * local.
*/
if ((sd->flags & SD_NUMA) &&
- min_runnable_load + imbalance >= this_runnable_load)
+ min_load + imbalance >= this_load)
return NULL;

- if (min_runnable_load > (this_runnable_load + imbalance))
+ if (min_load >= this_load + imbalance)
return NULL;

- if ((this_runnable_load < (min_runnable_load + imbalance)) &&
- (100*this_avg_load < imbalance_scale*min_avg_load))
+ if ((this_load < (min_load + imbalance)) &&
+ (100*this_load < imbalance_scale*min_load))
return NULL;

return idlest;