Re: [PATCH 3/5] sched/fair: rework load_balance

From: Peter Zijlstra
Date: Fri Jul 19 2019 - 09:13:02 EST


On Fri, Jul 19, 2019 at 09:58:23AM +0200, Vincent Guittot wrote:

> @@ -8029,17 +8063,24 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> }
> }
>
> - /* Adjust by relative CPU capacity of the group */
> - sgs->group_capacity = group->sgc->capacity;
> - sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
> + /* Check if dst cpu is idle and preferred to this group */
> + if (env->sd->flags & SD_ASYM_PACKING &&
> + env->idle != CPU_NOT_IDLE &&
> + sgs->sum_h_nr_running &&
> + sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) {
> + sgs->group_asym_capacity = 1;
> + }
>
> - if (sgs->sum_h_nr_running)
> - sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running;
> + sgs->group_capacity = group->sgc->capacity;
>
> sgs->group_weight = group->group_weight;
>
> - sgs->group_no_capacity = group_is_overloaded(env, sgs);
> - sgs->group_type = group_classify(group, sgs);
> + sgs->group_type = group_classify(env, group, sgs);
> +
> + /* Computing avg_load makes sense only when group is overloaded */
> + if (sgs->group_type != group_overloaded)

The comment seems to suggest you meant: ==

> + sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
> + sgs->group_capacity;
> }
>
> /**
> @@ -8070,7 +8111,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> */
> if (sgs->group_type == group_misfit_task &&
> (!group_smaller_max_cpu_capacity(sg, sds->local) ||
> - !group_has_capacity(env, &sds->local_stat)))
> + sds->local_stat.group_type != group_has_spare))
> return false;
>
> if (sgs->group_type > busiest->group_type)
> @@ -8079,11 +8120,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> if (sgs->group_type < busiest->group_type)
> return false;
>
> - if (sgs->avg_load <= busiest->avg_load)
> + /* Select the overloaded group with highest avg_load */
> + if (sgs->group_type == group_overloaded &&
> + sgs->avg_load <= busiest->avg_load)

And this code does too; because with the above '!=', you're comparing
uninitialized data here, no?

> + return false;
> +
> + /* Prefer to move from lowest priority CPU's work */
> + if (sgs->group_type == group_asym_capacity && sds->busiest &&
> + sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
> return false;
>
> if (!(env->sd->flags & SD_ASYM_CPUCAPACITY))
> - goto asym_packing;
> + goto spare_capacity;
>
> /*
> * Candidate sg has no more than one task per CPU and