Re: [PATCH 00/14] sched/topology fixes

From: Peter Zijlstra
Date: Tue May 02 2017 - 10:44:18 EST


On Fri, Apr 28, 2017 at 03:53:39PM +0200, Peter Zijlstra wrote:
> Also, the following occurred to me:
>
> sg_span & sg_mask == sg_mask
>
> Therefore, we don't need to do the whole "sg_span &" business.
>
> Hmm?

> @@ -856,7 +857,7 @@ build_sched_groups(struct sched_domain *
> continue;
>
> group = get_group(i, sdd, &sg);
> - cpumask_setall(sched_group_mask(sg));
> + cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
>
> for_each_cpu(j, span) {
> if (get_group(j, sdd, NULL) != group)

OK, so this explodes mightily.

That code also hurt my brain bad, so I had to fix that a little.

The below seems to boot.

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7996,7 +7996,7 @@ static int active_load_balance_cpu_stop(
static int should_we_balance(struct lb_env *env)
{
struct sched_group *sg = env->sd->groups;
- struct cpumask *sg_cpus, *sg_mask;
+ struct cpumask *sg_mask;
int cpu, balance_cpu = -1;

/*
@@ -8006,11 +8006,10 @@ static int should_we_balance(struct lb_e
if (env->idle == CPU_NEWLY_IDLE)
return 1;

- sg_cpus = sched_group_cpus(sg);
sg_mask = sched_group_mask(sg);
/* Try to find first idle cpu */
- for_each_cpu_and(cpu, sg_cpus, env->cpus) {
- if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+ for_each_cpu_and(cpu, sg_mask, env->cpus) {
+ if (!idle_cpu(cpu))
continue;

balance_cpu = cpu;
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -85,7 +85,8 @@ static int sched_domain_debug_one(struct
group->sgc->id,
cpumask_pr_args(sched_group_cpus(group)));

- if ((sd->flags & SD_OVERLAP) && !cpumask_full(sched_group_mask(group))) {
+ if ((sd->flags & SD_OVERLAP) &&
+ !cpumask_equal(sched_group_mask(group), sched_group_cpus(group))) {
printk(KERN_CONT " mask=%*pbl",
cpumask_pr_args(sched_group_mask(group)));
}
@@ -505,7 +506,7 @@ enum s_alloc {
*/
int group_balance_cpu(struct sched_group *sg)
{
- return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
+ return cpumask_first(sched_group_mask(sg));
}


@@ -833,23 +834,34 @@ build_overlap_sched_groups(struct sched_
* [*] in other words, the first group of each domain is its child domain.
*/

-static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
+static struct sched_group *get_group(int cpu, struct sd_data *sdd)
{
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
struct sched_domain *child = sd->child;
+ struct sched_group *sg;

if (child)
cpu = cpumask_first(sched_domain_span(child));

- if (sg) {
- *sg = *per_cpu_ptr(sdd->sg, cpu);
- (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+ sg = *per_cpu_ptr(sdd->sg, cpu);
+ sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);

- /* For claim_allocations: */
- atomic_set(&(*sg)->sgc->ref, 1);
+ /* For claim_allocations: */
+ atomic_inc(&sg->ref);
+ atomic_inc(&sg->sgc->ref);
+
+ if (child) {
+ cpumask_copy(sched_group_cpus(sg), sched_domain_span(child));
+ cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
+ } else {
+ cpumask_set_cpu(cpu, sched_group_cpus(sg));
+ cpumask_set_cpu(cpu, sched_group_cpus(sg));
}

- return cpu;
+ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_cpus(sg));
+ sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+
+ return sg;
}

/*
@@ -868,34 +880,20 @@ build_sched_groups(struct sched_domain *
struct cpumask *covered;
int i;

- get_group(cpu, sdd, &sd->groups);
- atomic_inc(&sd->groups->ref);
-
- if (cpu != cpumask_first(span))
- return 0;
-
lockdep_assert_held(&sched_domains_mutex);
covered = sched_domains_tmpmask;

cpumask_clear(covered);

- for_each_cpu(i, span) {
+ for_each_cpu_wrap(i, span, cpu) {
struct sched_group *sg;
- int group, j;

if (cpumask_test_cpu(i, covered))
continue;

- group = get_group(i, sdd, &sg);
- cpumask_setall(sched_group_mask(sg));
+ sg = get_group(i, sdd);

- for_each_cpu(j, span) {
- if (get_group(j, sdd, NULL) != group)
- continue;
-
- cpumask_set_cpu(j, covered);
- cpumask_set_cpu(j, sched_group_cpus(sg));
- }
+ cpumask_or(covered, covered, sched_group_cpus(sg));

if (!first)
first = sg;
@@ -904,6 +902,7 @@ build_sched_groups(struct sched_domain *
last = sg;
}
last->next = first;
+ sd->groups = first;

return 0;
}