Re: [PATCH 10/19] sched/fair: Prioritize tasks preferring destination LLC during balancing

From: Peter Zijlstra

Date: Wed Oct 15 2025 - 11:09:16 EST


On Sat, Oct 11, 2025 at 11:24:47AM -0700, Tim Chen wrote:
> During LLC load balancing, first check for tasks that prefer the
> destination LLC and balance them to it before others.
>
> Mark source sched groups containing tasks preferring non local LLCs
> with the group_llc_balance flag. This ensures the load balancer later
> pulls or pushes these tasks toward their preferred LLCs.
>
> Co-developed-by: Chen Yu <yu.c.chen@xxxxxxxxx>
> Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
> Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> ---

For me this patch is cut too fine; it only sets group_llc_balance but
then we don't see how it is used.

> kernel/sched/fair.c | 43 +++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index cbd1e97bca4b..af7b578eaa06 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9822,8 +9822,7 @@ static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu
> else
> return mig_unrestricted;
>
> - return can_migrate_llc(src_cpu, dst_cpu,
> - task_util(p), to_pref);
> + return can_migrate_llc(src_cpu, dst_cpu, task_util(p), to_pref);
> }
>
> #else
> @@ -10394,6 +10393,7 @@ struct sg_lb_stats {
> enum group_type group_type;
> unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
> unsigned int group_smt_balance; /* Task on busy SMT be moved */
> + unsigned int group_llc_balance; /* Tasks should be moved to preferred LLC */
> unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
> #ifdef CONFIG_NUMA_BALANCING
> unsigned int nr_numa_running;
> @@ -10849,11 +10849,45 @@ static void record_sg_llc_stats(struct lb_env *env,
> if (unlikely(READ_ONCE(sd_share->capacity) != sgs->group_capacity))
> WRITE_ONCE(sd_share->capacity, sgs->group_capacity);
> }
> +
> +/*
> + * Do LLC balance on sched group that contains LLC, and have tasks preferring
> + * to run on LLC in idle dst_cpu.
> + */
> +static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
> + struct sched_group *group)
> +{
> + struct sched_domain *child = env->sd->child;
> + int llc;
> +
> + if (!sched_cache_enabled())
> + return false;
> +
> + if (env->sd->flags & SD_SHARE_LLC)
> + return false;
> +
> + /* only care about task migration among LLCs */
> + if (child && !(child->flags & SD_SHARE_LLC))
> + return false;
> +
> + llc = llc_idx(env->dst_cpu);
> + if (sgs->nr_pref_llc[llc] > 0 &&

Nit: s/> 0// would be the same, right?

> + can_migrate_llc(env->src_cpu, env->dst_cpu, 0, true) == mig_llc)
> + return true;
> +
> + return false;
> +}
> #else
> static inline void record_sg_llc_stats(struct lb_env *env, struct sg_lb_stats *sgs,
> struct sched_group *group)
> {
> }
> +
> +static inline bool llc_balance(struct lb_env *env, struct sg_lb_stats *sgs,
> + struct sched_group *group)
> +{
> + return false;
> +}
> #endif
>
> /**
> @@ -10954,6 +10988,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
>
> record_sg_llc_stats(env, sgs, group);
> +
> + /* Check for tasks in this group can be moved to their preferred LLC */
> + if (!local_group && llc_balance(env, sgs, group))
> + sgs->group_llc_balance = 1;

We now have 3 (or so) branches that start with:

if (!local_group &&

perhaps collate that some?

> +
> /* Computing avg_load makes sense only when group is overloaded */
> if (sgs->group_type == group_overloaded)
> sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
> --
> 2.32.0
>