Re: [PATCH v3 04/21] sched/cache: Make LLC id continuous

From: K Prateek Nayak

Date: Thu Feb 19 2026 - 01:13:23 EST


Hello Tim,

Thank you for the patch.

On 2/19/2026 4:51 AM, Tim Chen wrote:
> diff --git a/init/Kconfig b/init/Kconfig
> index 9848de949afa..4ddf54ab9cf7 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -987,6 +987,7 @@ config SCHED_CACHE
> bool "Cache aware load balance"
> default y
> depends on SMP
> + depends on SCHED_MC
> help
> When enabled, the scheduler will attempt to aggregate tasks from
> the same process onto a single Last Level Cache (LLC) domain when
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 48626c81ba8e..75ba4e0bfcd3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8474,6 +8474,8 @@ int sched_cpu_deactivate(unsigned int cpu)
> */
> synchronize_rcu();
>
> + sched_domains_free_llc_id(cpu);
> +
> sched_set_rq_offline(rq, cpu);
>
> scx_rq_deactivate(rq);
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 6cbc56e9adfc..04f42526e6f0 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3862,6 +3862,7 @@ static inline bool sched_cache_enabled(void)
> extern void sched_cache_active_set_unlocked(void);
> #endif
> extern void init_sched_mm(struct task_struct *p);
> +void sched_domains_free_llc_id(int cpu);
>
> extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
> extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 580fb2fbc900..5e59340ad9a9 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -18,6 +18,7 @@ void sched_domains_mutex_unlock(void)
> }
>
> /* Protected by sched_domains_mutex: */
> +static cpumask_var_t sched_domains_llc_id_allocmask;
> static cpumask_var_t sched_domains_tmpmask;
> static cpumask_var_t sched_domains_tmpmask2;
> static int tl_max_llcs;
> @@ -2590,6 +2591,57 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
> return true;
> }
>
> +static int __sched_domains_alloc_llc_id(void)
> +{
> + int lid;
> +
> + lockdep_assert_held(&sched_domains_mutex);
> +
> + lid = cpumask_first_zero(sched_domains_llc_id_allocmask);
> + if (lid >= tl_max_llcs)
> + tl_max_llcs = lid + 1;
> +
> + /*
> + * llc_id space should never grow larger than the
> + * possible number of CPUs in the system.
> + */
> + if (!unlikely(WARN_ON_ONCE(lid >= nr_cpumask_bits)))
> + cpumask_set_cpu(lid, sched_domains_llc_id_allocmask);
> + return lid;
> +}
> +
> +static void __sched_domains_free_llc_id(int cpu)
> +{
> + int i, lid;
> +
> + lockdep_assert_held(&sched_domains_mutex);
> +
> + lid = per_cpu(sd_llc_id, cpu);
> + if (lid == -1)
> + return;
> +
> + per_cpu(sd_llc_id, cpu) = -1;
> +
> + for_each_online_cpu(i) {
> + /* An online CPU owns the llc_id. */
> + if (per_cpu(sd_llc_id, i) == lid)
> + return;
> + }

We should perhaps warn and skip clearing lid from cpumask if lid was
found to be larger than "nr_cpumask_bits". Shouldn't happen but just
as a precaution.

> +
> + cpumask_clear_cpu(lid, sched_domains_llc_id_allocmask);
> +
> + /* shrink max LLC size to save memory */
> + if (lid == tl_max_llcs - 1)
> + lid = tl_max_llcs--;

No need to assign the local "lid" variable here; Simple decrement
should do.

> +}
> +
> +void sched_domains_free_llc_id(int cpu)
> +{
> + sched_domains_mutex_lock();
> + __sched_domains_free_llc_id(cpu);
> + sched_domains_mutex_unlock();
> +}
> +
> /*
> * Build sched domains for a given set of CPUs and attach the sched domains
> * to the individual CPUs
> @@ -2615,18 +2667,11 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
>
> /* Set up domains for CPUs specified by the cpu_map: */
> for_each_cpu(i, cpu_map) {
> - struct sched_domain_topology_level *tl, *tl_llc = NULL;
> + struct sched_domain_topology_level *tl;
> int lid;
>
> sd = NULL;
> for_each_sd_topology(tl) {
> - int flags = 0;
> -
> - if (tl->sd_flags)
> - flags = (*tl->sd_flags)();
> -
> - if (flags & SD_SHARE_LLC)
> - tl_llc = tl;
>
> sd = build_sched_domain(tl, cpu_map, attr, sd, i);
>
> @@ -2642,18 +2687,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
> if (lid == -1) {
> int j;
>
> + j = cpumask_first(cpu_coregroup_mask(i));
> /*
> * Assign the llc_id to the CPUs that do not
> * have an LLC.
> */
> - if (!tl_llc) {
> - per_cpu(sd_llc_id, i) = tl_max_llcs++;
> + if (j >= nr_cpu_ids) {
> + per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
>
> continue;
> }

I don't think we need to special case this out since:

for_each_cpu(j, cpu_coregroup_mask(i)) {
...
}

would bail out if no CPU is set (also CPU "i" would definitely be
set on it since it must be online) and the "if" after the loop will
see "lid" as "-1" and DTRT.

>
> /* try to reuse the llc_id of its siblings */
> - for_each_cpu(j, tl_llc->mask(tl_llc, i)) {
> + for (; j < nr_cpu_ids; j = cpumask_next(j, cpu_coregroup_mask(i))) {
> if (i == j)
> continue;
>
> @@ -2668,7 +2714,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
>
> /* a new LLC is detected */
> if (lid == -1)
> - per_cpu(sd_llc_id, i) = tl_max_llcs++;
> + per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
> }
> }
>
> @@ -2869,6 +2915,7 @@ int __init sched_init_domains(const struct cpumask *cpu_map)
> {
> int err;
>
> + zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL);
> zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
> zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
> zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);

--
Thanks and Regards,
Prateek