Re: [PATCH 3/5] sched/fair: rework load_balance

From: Peter Zijlstra
Date: Fri Jul 19 2019 - 09:22:20 EST


On Fri, Jul 19, 2019 at 09:58:23AM +0200, Vincent Guittot wrote:
> enum group_type {
> - group_other = 0,
> + group_has_spare = 0,
> + group_fully_busy,
> group_misfit_task,
> + group_asym_capacity,
> group_imbalanced,
> group_overloaded,
> };

The order of this group_type is important, maybe add a few words on how
this order got to be.

> static inline enum
> -group_type group_classify(struct sched_group *group,
> +group_type group_classify(struct lb_env *env,
> + struct sched_group *group,
> struct sg_lb_stats *sgs)
> {
> - if (sgs->group_no_capacity)
> + if (group_is_overloaded(env, sgs))
> return group_overloaded;
>
> if (sg_imbalanced(group))
> @@ -7953,7 +7975,13 @@ group_type group_classify(struct sched_group *group,
> if (sgs->group_misfit_task_load)
> return group_misfit_task;
>
> - return group_other;
> + if (sgs->group_asym_capacity)
> + return group_asym_capacity;
> +
> + if (group_has_capacity(env, sgs))
> + return group_has_spare;
> +
> + return group_fully_busy;
> }

OCD is annoyed that this function doesn't have the same / reverse order
of the one in the enum.

> @@ -8070,7 +8111,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> */
> if (sgs->group_type == group_misfit_task &&
> (!group_smaller_max_cpu_capacity(sg, sds->local) ||
> - !group_has_capacity(env, &sds->local_stat)))
> + sds->local_stat.group_type != group_has_spare))
> return false;
>
> if (sgs->group_type > busiest->group_type)
> @@ -8079,11 +8120,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> if (sgs->group_type < busiest->group_type)
> return false;

from reading the patch it wasn't obvious that at this point
sgs->group_type == busiest->group_type, and I wondered if
busiest->avg_load below was pointing to garbage, it isn't.

> - if (sgs->avg_load <= busiest->avg_load)
> + /* Select the overloaded group with highest avg_load */
> + if (sgs->group_type == group_overloaded &&
> + sgs->avg_load <= busiest->avg_load)
> + return false;
> +
> + /* Prefer to move from lowest priority CPU's work */
> + if (sgs->group_type == group_asym_capacity && sds->busiest &&
> + sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
> return false;
>
> if (!(env->sd->flags & SD_ASYM_CPUCAPACITY))
> - goto asym_packing;
> + goto spare_capacity;
>
> /*
> * Candidate sg has no more than one task per CPU and

Can we do a switch (sds->group_type) here? it seems to have most of them
listed.