Re: [PATCH v3 04/21] sched/cache: Make LLC id continuous

From: Tim Chen

Date: Fri Feb 20 2026 - 14:24:23 EST

On Sat, 2026-02-21 at 00:10 +0800, Chen, Yu C wrote:
> On 2/21/2026 12:03 AM, Peter Zijlstra wrote:
> > On Fri, Feb 20, 2026 at 11:53:31PM +0800, Chen, Yu C wrote:
> > > Hi Peter,
> > >
> > > On 2/19/2026 11:40 PM, Peter Zijlstra wrote:
> > > > On Mon, Feb 16, 2026 at 01:14:20PM +0530, K Prateek Nayak wrote:
> > >
> > > [ ... ]
> > >
> > > > > +static int __sched_domains_alloc_llc_id(void)
> > > > > +{
> > > > > + int lid;
> > > > > +
> > > > > + lockdep_assert_held(&sched_domains_mutex);
> > > > > +
> > > > > + lid = cpumask_first_zero(sched_domains_llc_id_allocmask);
> > > > > + if (lid >= tl_max_llcs)
> > > > > + tl_max_llcs++;
> > > >
> > > > Urgh,. should we not rather track the max lid?
> > > >
> > >
> > > Do you mean we should not always increment the max lid,
> > > but instead decrease it when an llc_id is released?
> > > I think Tim has adjusted the code to shrink tl_max_llcs
> > > when an llc_id is released:
> > > https://lore.kernel.org/all/acc7a5c96e8235bf11af640798ce1b60bcaa8196.camel@xxxxxxxxxxxxxxx/
> >
> > You can only shrink when the max lid is released. Since lid is an array
> > index, something like max_lid = weight(mask) would be terribly broken.
> >
> > But what I was getting at is that the code as presented there is rather
> > non-obvious. Yes, if the lid is higher, it cannot be more than one
> > higher than the previous value, but something like:
> >
> > lid = cpumask_first_zero();
> > BUG_ON(lid >= nr_cpu_ids);
> > max_lid = max(max_lid, lid);
> >
> > Is way simpler to follow since it doesn't have that hidden assumption.
> >
> > Then, if you want to allow shrinking, then the clear side could do
> > something like:
> >
> > __cpumask_clear(lid, mask);
> > if (lid == max_lid)
> > max_lid = cpumask_last(mask);
> >
> > or something like that.
> >
>
> Got it, we will adjust the code accordingly.
>

How about modifying this patch like the following:

Thanks.

Tim

---
diff --git a/init/Kconfig b/init/Kconfig
index f4b2649f8401..da405c00e9e3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -994,6 +994,7 @@ config SCHED_CACHE
bool "Cache aware load balance"
default y
depends on SMP
+ depends on SCHED_MC
help
When enabled, the scheduler will attempt to aggregate tasks from
the same process onto a single Last Level Cache (LLC) domain when
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c464e370576f..e34b5842caa4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8372,6 +8372,8 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
synchronize_rcu();

+ sched_domains_free_llc_id(cpu);
+
sched_set_rq_offline(rq, cpu);

scx_rq_deactivate(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f4785f84b1f1..3096adc13074 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3932,6 +3932,13 @@ static inline bool sched_cache_enabled(void)

extern void sched_cache_active_set_unlocked(void);
#endif
+
+#ifdef CONFIG_SMP
+void sched_domains_free_llc_id(int cpu);
+#else /* !CONFIG_SMP: */
+static inline void sched_domains_free_llc_id(int cpu) { }
+#endif /* !CONFIG_SMP */
+
extern void init_sched_mm(struct task_struct *p);

extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index e86dea1b9e86..f3bc6636170f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -18,6 +18,7 @@ void sched_domains_mutex_unlock(void)
}

/* Protected by sched_domains_mutex: */
+static cpumask_var_t sched_domains_llc_id_allocmask;
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
static int tl_max_llcs;
@@ -2660,6 +2661,61 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
return true;
}

+#ifdef CONFIG_SMP
+static int __sched_domains_alloc_llc_id(void)
+{
+ int lid, max_lid;
+
+ lockdep_assert_held(&sched_domains_mutex);
+
+ lid = cpumask_first_zero(sched_domains_llc_id_allocmask);
+ /*
+ * llc_id space should never grow larger than the
+ * possible number of CPUs in the system.
+ */
+ BUG_ON(lid >= nr_cpu_ids);
+ max_lid = cpumask_last(sched_domains_llc_id_allocmask);
+ /* size is one more than max index */
+ tl_max_llcs = max(lid, max_lid) + 1;
+
+ return lid;
+}
+
+static void __sched_domains_free_llc_id(int cpu)
+{
+ int i, lid, last_lid;
+
+ lockdep_assert_held(&sched_domains_mutex);
+
+ lid = per_cpu(sd_llc_id, cpu);
+ if (lid == -1)
+ return;
+
+ BUG_ON(lid >= nr_cpu_ids);
+ per_cpu(sd_llc_id, cpu) = -1;
+
+ for_each_cpu(i, cpu_coregroup_mask(cpu)) {
+ /* An online CPU owns the llc_id. */
+ if (per_cpu(sd_llc_id, i) == lid)
+ return;
+ }
+
+ cpumask_clear_cpu(lid, sched_domains_llc_id_allocmask);
+
+ last_lid = cpumask_last(sched_domains_llc_id_allocmask);
+ /* shrink max LLC size to save memory */
+ if (last_lid < tl_max_llcs - 1)
+ tl_max_llcs = last_lid + 1;
+}
+
+void sched_domains_free_llc_id(int cpu)
+{
+ sched_domains_mutex_lock();
+ __sched_domains_free_llc_id(cpu);
+ sched_domains_mutex_unlock();
+}
+#endif /* CONFIG_SMP */
+
/*
* Build sched domains for a given set of CPUs and attach the sched domains
* to the individual CPUs
@@ -2685,18 +2741,11 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att

/* Set up domains for CPUs specified by the cpu_map: */
for_each_cpu(i, cpu_map) {
- struct sched_domain_topology_level *tl, *tl_llc = NULL;
+ struct sched_domain_topology_level *tl;
int lid;

sd = NULL;
for_each_sd_topology(tl) {
- int flags = 0;
-
- if (tl->sd_flags)
- flags = (*tl->sd_flags)();
-
- if (flags & SD_SHARE_LLC)
- tl_llc = tl;

sd = build_sched_domain(tl, cpu_map, attr, sd, i);

@@ -2708,22 +2757,14 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
break;
}

+#ifdef CONFIG_SMP
lid = per_cpu(sd_llc_id, i);
if (lid == -1) {
int j;

- /*
- * Assign the llc_id to the CPUs that do not
- * have an LLC.
- */
- if (!tl_llc) {
- per_cpu(sd_llc_id, i) = tl_max_llcs++;
-
- continue;
- }
-
+ j = cpumask_first(cpu_coregroup_mask(i));
/* try to reuse the llc_id of its siblings */
- for_each_cpu(j, tl_llc->mask(tl_llc, i)) {
+ for (; j < nr_cpu_ids; j = cpumask_next(j, cpu_coregroup_mask(i))) {
if (i == j)
continue;

@@ -2738,8 +2779,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att

/* a new LLC is detected */
if (lid == -1)
- per_cpu(sd_llc_id, i) = tl_max_llcs++;
+ per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
}
+#endif /* CONFIG_SMP */
}

if (WARN_ON(!topology_span_sane(cpu_map)))
@@ -2939,6 +2981,7 @@ int __init sched_init_domains(const struct cpumask *cpu_map)
{
int err;

+ zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL);
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);