Re: [PATCH 3/5] sched/fair: rework load_balance

From: Vincent Guittot
Date: Fri Jul 19 2019 - 10:13:53 EST


On Fri, 19 Jul 2019 at 15:12, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> On Fri, Jul 19, 2019 at 09:58:23AM +0200, Vincent Guittot wrote:
>
> > @@ -8029,17 +8063,24 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> > }
> > }
> >
> > - /* Adjust by relative CPU capacity of the group */
> > - sgs->group_capacity = group->sgc->capacity;
> > - sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
> > + /* Check if dst cpu is idle and preferred to this group */
> > + if (env->sd->flags & SD_ASYM_PACKING &&
> > + env->idle != CPU_NOT_IDLE &&
> > + sgs->sum_h_nr_running &&
> > + sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) {
> > + sgs->group_asym_capacity = 1;
> > + }
> >
> > - if (sgs->sum_h_nr_running)
> > - sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running;
> > + sgs->group_capacity = group->sgc->capacity;
> >
> > sgs->group_weight = group->group_weight;
> >
> > - sgs->group_no_capacity = group_is_overloaded(env, sgs);
> > - sgs->group_type = group_classify(group, sgs);
> > + sgs->group_type = group_classify(env, group, sgs);
> > +
> > + /* Computing avg_load makes sense only when group is overloaded */
> > + if (sgs->group_type != group_overloaded)
>
> The comment seems to suggest you meant: ==

yes looks like you're right :-(

>
> > + sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
> > + sgs->group_capacity;
> > }
> >
> > /**
> > @@ -8070,7 +8111,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> > */
> > if (sgs->group_type == group_misfit_task &&
> > (!group_smaller_max_cpu_capacity(sg, sds->local) ||
> > - !group_has_capacity(env, &sds->local_stat)))
> > + sds->local_stat.group_type != group_has_spare))
> > return false;
> >
> > if (sgs->group_type > busiest->group_type)
> > @@ -8079,11 +8120,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> > if (sgs->group_type < busiest->group_type)
> > return false;
> >
> > - if (sgs->avg_load <= busiest->avg_load)
> > + /* Select the overloaded group with highest avg_load */
> > + if (sgs->group_type == group_overloaded &&
> > + sgs->avg_load <= busiest->avg_load)
>
> And this code does too; because with the above '!=', you're comparing
> uninitialized data here, no?

avg_load is always 0
and the load_balance was quite conservative when system was overloaded

>
> > + return false;
> > +
> > + /* Prefer to move from lowest priority CPU's work */
> > + if (sgs->group_type == group_asym_capacity && sds->busiest &&
> > + sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
> > return false;
> >
> > if (!(env->sd->flags & SD_ASYM_CPUCAPACITY))
> > - goto asym_packing;
> > + goto spare_capacity;
> >
> > /*
> > * Candidate sg has no more than one task per CPU and