Re: [RFC PATCH v7] sched/fair: select idle cpu from idle cpumask for task wakeup

From: Peter Zijlstra
Date: Fri Dec 11 2020 - 12:46:50 EST


On Thu, Dec 10, 2020 at 12:58:33PM +0000, Mel Gorman wrote:
> The prequisite patch to make that approach work was rejected though
> as on its own, it's not very helpful and Vincent didn't like that the
> load_balance_mask was abused to make it effective.

So last time I poked at all this, I found that using more masks was a
performance issue as well due to added cache misses.

Anyway, I finally found some time to look at all this, and while reading
over the whole SiS crud to refresh the brain came up with the below...

It's still naf, but at least it gets rid of a bunch of duplicate
scanning and LoC decreases, so it should be awesome. Ofcourse, as
always, benchmarks will totally ruin everything, we'll see, I started
some.

It goes on top of Mel's first two patches (which is about as far as I
got)...

---
fair.c | 124 ++++++++++++++++++++++++-----------------------------------------
1 file changed, 47 insertions(+), 77 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6021,11 +6021,13 @@ static inline void set_idle_cores(int cp

static inline bool test_idle_cores(int cpu, bool def)
{
- struct sched_domain_shared *sds;
+ if (static_branch_likely(&sched_smt_present)) {
+ struct sched_domain_shared *sds;

- sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
- if (sds)
- return READ_ONCE(sds->has_idle_cores);
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds)
+ return READ_ONCE(sds->has_idle_cores);
+ }

return def;
}
@@ -6059,77 +6061,39 @@ void __update_idle_core(struct rq *rq)
rcu_read_unlock();
}

-/*
- * Scan the entire LLC domain for idle cores; this dynamically switches off if
- * there are no idle cores left in the system; tracked through
- * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
- */
-static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+static int __select_idle_core(int core, struct cpumask *cpus, int *idle_cpu)
{
- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
- int core, cpu;
-
- if (!static_branch_likely(&sched_smt_present))
- return -1;
-
- if (!test_idle_cores(target, false))
- return -1;
-
- cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
-
- for_each_cpu_wrap(core, cpus, target) {
- bool idle = true;
+ bool idle = true;
+ int cpu;

- for_each_cpu(cpu, cpu_smt_mask(core)) {
- if (!available_idle_cpu(cpu)) {
- idle = false;
- break;
- }
+ for_each_cpu(cpu, cpu_smt_mask(core)) {
+ if (!available_idle_cpu(cpu)) {
+ idle = false;
+ continue;
}
-
- if (idle)
- return core;
-
- cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
+ if (idle_cpu)
+ *idle_cpu = cpu;
}

- /*
- * Failed to find an idle core; stop looking for one.
- */
- set_idle_cores(target, 0);
+ if (idle)
+ return core;

+ cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
return -1;
}

-/*
- * Scan the local SMT mask for idle CPUs.
- */
-static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
-{
- int cpu;
-
- if (!static_branch_likely(&sched_smt_present))
- return -1;
-
- for_each_cpu(cpu, cpu_smt_mask(target)) {
- if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
- !cpumask_test_cpu(cpu, sched_domain_span(sd)))
- continue;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- return cpu;
- }
+#else /* CONFIG_SCHED_SMT */

- return -1;
+static inline void set_idle_cores(int cpu, int val)
+{
}

-#else /* CONFIG_SCHED_SMT */
-
-static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+static inline bool test_idle_cores(int cpu, bool def)
{
- return -1;
+ return def;
}

-static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+static inline int __select_idle_core(int core, struct cpumask *cpus, int *idle_cpu)
{
return -1;
}
@@ -6144,10 +6108,11 @@ static inline int select_idle_smt(struct
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ int this = smp_processor_id();
+ bool smt = test_idle_cores(this, false);
+ int i, cpu, idle_cpu = -1, nr = INT_MAX;
struct sched_domain *this_sd;
u64 time;
- int this = smp_processor_id();
- int cpu, nr = INT_MAX;

this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -6155,7 +6120,7 @@ static int select_idle_cpu(struct task_s

cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);

- if (sched_feat(SIS_PROP)) {
+ if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;

/*
@@ -6175,18 +6140,31 @@ static int select_idle_cpu(struct task_s
}

for_each_cpu_wrap(cpu, cpus, target) {
- if (!--nr)
- return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- break;
+ if (smt) {
+ i = __select_idle_core(cpu, cpus, &idle_cpu);
+ if ((unsigned)i < nr_cpumask_bits)
+ return i;
+
+ } else {
+ if (!--nr)
+ return -1;
+
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
+ idle_cpu = cpu;
+ break;
+ }
+ }
}

- if (sched_feat(SIS_PROP)) {
+ if (smt)
+ set_idle_cores(this, false);
+
+ if (sched_feat(SIS_PROP) && !smt) {
time = cpu_clock(this) - time;
update_avg(&this_sd->avg_scan_cost, time);
}

- return cpu;
+ return idle_cpu;
}

/*
@@ -6315,18 +6293,10 @@ static int select_idle_sibling(struct ta
if (!sd)
return target;

- i = select_idle_core(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
-
i = select_idle_cpu(p, sd, target);
if ((unsigned)i < nr_cpumask_bits)
return i;

- i = select_idle_smt(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
-
return target;
}