Re: [PATCH 00/14] sched/topology fixes

From: Lauro Venancio
Date: Tue May 02 2017 - 10:55:02 EST


On 05/02/2017 11:43 AM, Peter Zijlstra wrote:
> On Fri, Apr 28, 2017 at 03:53:39PM +0200, Peter Zijlstra wrote:
>> Also, the following occurred to me:
>>
>> sg_span & sg_mask == sg_mask
>>
>> Therefore, we don't need to do the whole "sg_span &" business.
>>
>> Hmm?
>> @@ -856,7 +857,7 @@ build_sched_groups(struct sched_domain *
>> continue;
>>
>> group = get_group(i, sdd, &sg);
>> - cpumask_setall(sched_group_mask(sg));
>> + cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
>>
>> for_each_cpu(j, span) {
>> if (get_group(j, sdd, NULL) != group)
> OK, so this explodes mightily.
>
> That code also hurt my brain bad, so I had to fix that a little.
>
> The below seems to boot.
>
> ---
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7996,7 +7996,7 @@ static int active_load_balance_cpu_stop(
> static int should_we_balance(struct lb_env *env)
> {
> struct sched_group *sg = env->sd->groups;
> - struct cpumask *sg_cpus, *sg_mask;
> + struct cpumask *sg_mask;
> int cpu, balance_cpu = -1;
>
> /*
> @@ -8006,11 +8006,10 @@ static int should_we_balance(struct lb_e
> if (env->idle == CPU_NEWLY_IDLE)
> return 1;
>
> - sg_cpus = sched_group_cpus(sg);
> sg_mask = sched_group_mask(sg);
> /* Try to find first idle cpu */
> - for_each_cpu_and(cpu, sg_cpus, env->cpus) {
> - if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
> + for_each_cpu_and(cpu, sg_mask, env->cpus) {
> + if (!idle_cpu(cpu))
> continue;
>
> balance_cpu = cpu;
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -85,7 +85,8 @@ static int sched_domain_debug_one(struct
> group->sgc->id,
> cpumask_pr_args(sched_group_cpus(group)));
>
> - if ((sd->flags & SD_OVERLAP) && !cpumask_full(sched_group_mask(group))) {
> + if ((sd->flags & SD_OVERLAP) &&
> + !cpumask_equal(sched_group_mask(group), sched_group_cpus(group))) {
> printk(KERN_CONT " mask=%*pbl",
> cpumask_pr_args(sched_group_mask(group)));
> }
> @@ -505,7 +506,7 @@ enum s_alloc {
> */
> int group_balance_cpu(struct sched_group *sg)
> {
> - return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
> + return cpumask_first(sched_group_mask(sg));
> }
>
>
> @@ -833,23 +834,34 @@ build_overlap_sched_groups(struct sched_
> * [*] in other words, the first group of each domain is its child domain.
> */
>
> -static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
> +static struct sched_group *get_group(int cpu, struct sd_data *sdd)
> {
> struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
> struct sched_domain *child = sd->child;
> + struct sched_group *sg;
>
> if (child)
> cpu = cpumask_first(sched_domain_span(child));
>
> - if (sg) {
> - *sg = *per_cpu_ptr(sdd->sg, cpu);
> - (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
> + sg = *per_cpu_ptr(sdd->sg, cpu);
> + sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
>
> - /* For claim_allocations: */
> - atomic_set(&(*sg)->sgc->ref, 1);
> + /* For claim_allocations: */
> + atomic_inc(&sg->ref);
> + atomic_inc(&sg->sgc->ref);
> +
> + if (child) {
> + cpumask_copy(sched_group_cpus(sg), sched_domain_span(child));
> + cpumask_copy(sched_group_mask(sg), sched_group_cpus(sg));
> + } else {
> + cpumask_set_cpu(cpu, sched_group_cpus(sg));
> + cpumask_set_cpu(cpu, sched_group_cpus(sg));
Typo here. The mask is not being set in the else clause.

> }
>
> - return cpu;
> + sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_cpus(sg));
> + sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
> +
> + return sg;
> }
>
> /*
> @@ -868,34 +880,20 @@ build_sched_groups(struct sched_domain *
> struct cpumask *covered;
> int i;
>
> - get_group(cpu, sdd, &sd->groups);
> - atomic_inc(&sd->groups->ref);
> -
> - if (cpu != cpumask_first(span))
> - return 0;
> -
> lockdep_assert_held(&sched_domains_mutex);
> covered = sched_domains_tmpmask;
>
> cpumask_clear(covered);
>
> - for_each_cpu(i, span) {
> + for_each_cpu_wrap(i, span, cpu) {
> struct sched_group *sg;
> - int group, j;
>
> if (cpumask_test_cpu(i, covered))
> continue;
>
> - group = get_group(i, sdd, &sg);
> - cpumask_setall(sched_group_mask(sg));
> + sg = get_group(i, sdd);
>
> - for_each_cpu(j, span) {
> - if (get_group(j, sdd, NULL) != group)
> - continue;
> -
> - cpumask_set_cpu(j, covered);
> - cpumask_set_cpu(j, sched_group_cpus(sg));
> - }
> + cpumask_or(covered, covered, sched_group_cpus(sg));
>
> if (!first)
> first = sg;
> @@ -904,6 +902,7 @@ build_sched_groups(struct sched_domain *
> last = sg;
> }
> last->next = first;
> + sd->groups = first;
>
> return 0;
> }