Re: [PATCH v2 3/4] sched/fair: Consider SMT in ASYM_PACKING load balance

From: Peter Zijlstra
Date: Mon May 03 2021 - 06:24:42 EST


On Mon, May 03, 2021 at 12:02:49PM +0200, Peter Zijlstra wrote:
> On Tue, Apr 13, 2021 at 07:04:35PM -0700, Ricardo Neri wrote:
> > @@ -8507,6 +8616,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> > if (!sgs->sum_h_nr_running)
> > return false;
> >
> > + /*
> > + * @sg may have been tentatively classified as group_asym_packing.
> > + * Now that we have sufficient information about @sds.local, reassess
> > + * if asym packing migration can be done. Reclassify @sg. The only
> > + * possible results are group_has_spare and group_fully_busy.
> > + */
> > + if (sgs->group_type == group_asym_packing &&
> > + !asym_can_pull_tasks(env->dst_cpu, sds, sgs, sg)) {
> > + sgs->group_asym_packing = 0;
> > + sgs->group_type = group_classify(env->sd->imbalance_pct, sg, sgs);
> > + }
>
> So if this really is all about not having sds.local in
> update_sd_lb_stats(), then that seems fixable. Let me haz a try.

How's this then?

---
kernel/sched/fair.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3bdc41f22909..e9dcbee5b3d9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8437,6 +8437,21 @@ group_type group_classify(unsigned int imbalance_pct,
return group_has_spare;
}

+static inline bool
+sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sched_group *group)
+{
+ /*
+ * Because sd->groups starts with the local group, anything that isn't
+ * the local group will have access to the local state.
+ */
+ if (group == sds->local)
+ return false;
+
+ /* XXX do magic here */
+
+ return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
+}
+
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
@@ -8445,6 +8460,7 @@ group_type group_classify(unsigned int imbalance_pct,
* @sg_status: Holds flag indicating the status of the sched_group
*/
static inline void update_sg_lb_stats(struct lb_env *env,
+ struct sd_lb_stats *sds,
struct sched_group *group,
struct sg_lb_stats *sgs,
int *sg_status)
@@ -8453,7 +8469,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,

memset(sgs, 0, sizeof(*sgs));

- local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
+ local_group = group == sds->local;

for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);
@@ -8498,9 +8514,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,

/* Check if dst CPU is idle and preferred to this group */
if (env->sd->flags & SD_ASYM_PACKING &&
- env->idle != CPU_NOT_IDLE &&
- sgs->sum_h_nr_running &&
- sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) {
+ env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
+ sched_asym(env, sds, group)) {
sgs->group_asym_packing = 1;
}

@@ -9016,7 +9031,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
update_group_capacity(env->sd, env->dst_cpu);
}

- update_sg_lb_stats(env, sg, sgs, &sg_status);
+ update_sg_lb_stats(env, sds, sg, sgs, &sg_status);

if (local_group)
goto next_group;