Re: [PATCH v3 04/21] sched/cache: Make LLC id continuous

From: Tim Chen

Date: Thu Feb 19 2026 - 19:12:35 EST


On Thu, 2026-02-19 at 11:42 +0530, K Prateek Nayak wrote:
> Hello Tim,
>
> Thank you for the patch.
>
> On 2/19/2026 4:51 AM, Tim Chen wrote:
> > diff --git a/init/Kconfig b/init/Kconfig
> > index 9848de949afa..4ddf54ab9cf7 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -987,6 +987,7 @@ config SCHED_CACHE
> > bool "Cache aware load balance"
> > default y
> > depends on SMP
> > + depends on SCHED_MC
> > help
> > When enabled, the scheduler will attempt to aggregate tasks from
> > the same process onto a single Last Level Cache (LLC) domain when
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 48626c81ba8e..75ba4e0bfcd3 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -8474,6 +8474,8 @@ int sched_cpu_deactivate(unsigned int cpu)
> > */
> > synchronize_rcu();
> >
> > + sched_domains_free_llc_id(cpu);
> > +
> > sched_set_rq_offline(rq, cpu);
> >
> > scx_rq_deactivate(rq);
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index 6cbc56e9adfc..04f42526e6f0 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -3862,6 +3862,7 @@ static inline bool sched_cache_enabled(void)
> > extern void sched_cache_active_set_unlocked(void);
> > #endif
> > extern void init_sched_mm(struct task_struct *p);
> > +void sched_domains_free_llc_id(int cpu);
> >
> > extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
> > extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
> > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> > index 580fb2fbc900..5e59340ad9a9 100644
> > --- a/kernel/sched/topology.c
> > +++ b/kernel/sched/topology.c
> > @@ -18,6 +18,7 @@ void sched_domains_mutex_unlock(void)
> > }
> >
> > /* Protected by sched_domains_mutex: */
> > +static cpumask_var_t sched_domains_llc_id_allocmask;
> > static cpumask_var_t sched_domains_tmpmask;
> > static cpumask_var_t sched_domains_tmpmask2;
> > static int tl_max_llcs;
> > @@ -2590,6 +2591,57 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
> > return true;
> > }
> >
> > +static int __sched_domains_alloc_llc_id(void)
> > +{
> > + int lid;
> > +
> > + lockdep_assert_held(&sched_domains_mutex);
> > +
> > + lid = cpumask_first_zero(sched_domains_llc_id_allocmask);
> > + if (lid >= tl_max_llcs)
> > + tl_max_llcs = lid + 1;
> > +
> > + /*
> > + * llc_id space should never grow larger than the
> > + * possible number of CPUs in the system.
> > + */
> > + if (!unlikely(WARN_ON_ONCE(lid >= nr_cpumask_bits)))
> > + cpumask_set_cpu(lid, sched_domains_llc_id_allocmask);
> > + return lid;
> > +}
> > +
> > +static void __sched_domains_free_llc_id(int cpu)
> > +{
> > + int i, lid;
> > +
> > + lockdep_assert_held(&sched_domains_mutex);
> > +
> > + lid = per_cpu(sd_llc_id, cpu);
> > + if (lid == -1)
> > + return;
> > +
> > + per_cpu(sd_llc_id, cpu) = -1;
> > +
> > + for_each_online_cpu(i) {
> > + /* An online CPU owns the llc_id. */
> > + if (per_cpu(sd_llc_id, i) == lid)
> > + return;
> > + }
>
> We should perhaps warn and skip clearing lid from cpumask if lid was
> found to be larger than "nr_cpumask_bits". Shouldn't happen but just
> as a precaution.

Will do

>
> > +
> > + cpumask_clear_cpu(lid, sched_domains_llc_id_allocmask);
> > +
> > + /* shrink max LLC size to save memory */
> > + if (lid == tl_max_llcs - 1)
> > + lid = tl_max_llcs--;
>
> No need to assign the local "lid" variable here; Simple decrement
> should do.

Good point

>
> > +}
> > +
> > +void sched_domains_free_llc_id(int cpu)
> > +{
> > + sched_domains_mutex_lock();
> > + __sched_domains_free_llc_id(cpu);
> > + sched_domains_mutex_unlock();
> > +}
> > +
> > /*
> > * Build sched domains for a given set of CPUs and attach the sched domains
> > * to the individual CPUs
> > @@ -2615,18 +2667,11 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
> >
> > /* Set up domains for CPUs specified by the cpu_map: */
> > for_each_cpu(i, cpu_map) {
> > - struct sched_domain_topology_level *tl, *tl_llc = NULL;
> > + struct sched_domain_topology_level *tl;
> > int lid;
> >
> > sd = NULL;
> > for_each_sd_topology(tl) {
> > - int flags = 0;
> > -
> > - if (tl->sd_flags)
> > - flags = (*tl->sd_flags)();
> > -
> > - if (flags & SD_SHARE_LLC)
> > - tl_llc = tl;
> >
> > sd = build_sched_domain(tl, cpu_map, attr, sd, i);
> >
> > @@ -2642,18 +2687,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
> > if (lid == -1) {
> > int j;
> >
> > + j = cpumask_first(cpu_coregroup_mask(i));
> > /*
> > * Assign the llc_id to the CPUs that do not
> > * have an LLC.
> > */
> > - if (!tl_llc) {
> > - per_cpu(sd_llc_id, i) = tl_max_llcs++;
> > + if (j >= nr_cpu_ids) {
> > + per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
> >
> > continue;
> > }
>
> I don't think we need to special case this out since:
>
> for_each_cpu(j, cpu_coregroup_mask(i)) {
> ...
> }
>
> would bail out if no CPU is set (also CPU "i" would definitely be
> set on it since it must be online) and the "if" after the loop will
> see "lid" as "-1" and DTRT.

That's right. Will take out the non-needed code.

Also found out that cpu_coregroup_mask() is not defined for config
without CONFIG_SMP. So will put the llc id assignment code under
CONFIG_SMP.

Thanks for the code reviews and suggestions.

Tim
>
> >
> > /* try to reuse the llc_id of its siblings */
> > - for_each_cpu(j, tl_llc->mask(tl_llc, i)) {
> > + for (; j < nr_cpu_ids; j = cpumask_next(j, cpu_coregroup_mask(i))) {
> > if (i == j)
> > continue;
> >
> > @@ -2668,7 +2714,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
> >
> > /* a new LLC is detected */
> > if (lid == -1)
> > - per_cpu(sd_llc_id, i) = tl_max_llcs++;
> > + per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
> > }
> > }
> >
> > @@ -2869,6 +2915,7 @@ int __init sched_init_domains(const struct cpumask *cpu_map)
> > {
> > int err;
> >
> > + zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL);
> > zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
> > zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
> > zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);