[Patch v4 05/22] sched/cache: Make LLC id continuous

From: Tim Chen

Date: Wed Apr 01 2026 - 17:49:51 EST

Introduce an index mapping between CPUs and their LLCs. This provides
a roughly continuous per LLC index needed for cache-aware load balancing in
later patches.

The existing per_cpu llc_id usually points to the first CPU of the
LLC domain, which is sparse and unsuitable as an array index. Using
llc_id directly would waste memory.

With the new mapping, CPUs in the same LLC share an approximate
continuous id:

per_cpu(llc_id, CPU=0...15) = 0
per_cpu(llc_id, CPU=16...31) = 1
per_cpu(llc_id, CPU=32...47) = 2
...

Note that the LLC IDs are allocated via bitmask, so the IDs may be
reused during CPU offline->online transitions.

Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Originally-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Co-developed-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---

Notes:
v3->v4:
Leverage dynamic cpumask management infrastructure
for LLC id allocation.
(K Prateek Nayak, Peter Zijlstra)

kernel/sched/core.c | 2 +
kernel/sched/sched.h | 3 ++
kernel/sched/topology.c | 90 ++++++++++++++++++++++++++++++++++++++++-
3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index eff8695000e7..1188b5d24933 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8417,6 +8417,8 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
synchronize_rcu();

+ sched_domains_free_llc_id(cpu);
+
sched_set_rq_offline(rq, cpu);

scx_rq_deactivate(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0a38bfc704a4..9defeeeb3e8e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4019,6 +4019,9 @@ static inline bool sched_cache_enabled(void)
return false;
}
#endif
+
+void sched_domains_free_llc_id(int cpu);
+
extern void init_sched_mm(struct task_struct *p);

extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 32dcddaead82..edf6d7ec73ca 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -18,8 +18,10 @@ void sched_domains_mutex_unlock(void)
}

/* Protected by sched_domains_mutex: */
+static cpumask_var_t sched_domains_llc_id_allocmask;
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
+int max_lid;

static int __init sched_debug_setup(char *str)
{
@@ -663,7 +665,7 @@ static void destroy_sched_domains(struct sched_domain *sd)
*/
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
-DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(int, sd_llc_id) = -1;
DEFINE_PER_CPU(int, sd_share_id);
DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
@@ -689,7 +691,6 @@ static void update_top_cache_domain(int cpu)

rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
per_cpu(sd_llc_size, cpu) = size;
- per_cpu(sd_llc_id, cpu) = id;
rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);

sd = lowest_flag_domain(cpu, SD_CLUSTER);
@@ -1776,6 +1777,11 @@ const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu
{
return cpu_coregroup_mask(cpu);
}
+
+#define llc_mask(cpu) cpu_coregroup_mask(cpu)
+
+#else
+#define llc_mask(cpu) cpumask_of(cpu)
#endif

const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -2548,6 +2554,61 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
return true;
}

+static int __sched_domains_alloc_llc_id(void)
+{
+ int lid, max;
+
+ lockdep_assert_held(&sched_domains_mutex);
+
+ lid = cpumask_first_zero(sched_domains_llc_id_allocmask);
+ /*
+ * llc_id space should never grow larger than the
+ * possible number of CPUs in the system.
+ */
+ if (lid >= nr_cpu_ids)
+ return -1;
+
+ __cpumask_set_cpu(lid, sched_domains_llc_id_allocmask);
+ max = cpumask_last(sched_domains_llc_id_allocmask);
+ if (max > max_lid)
+ max_lid = max;
+
+ return lid;
+}
+
+static void __sched_domains_free_llc_id(int cpu)
+{
+ int i, lid, max;
+
+ lockdep_assert_held(&sched_domains_mutex);
+
+ lid = per_cpu(sd_llc_id, cpu);
+ if (lid == -1 || lid >= nr_cpu_ids)
+ return;
+
+ per_cpu(sd_llc_id, cpu) = -1;
+
+ for_each_cpu(i, llc_mask(cpu)) {
+ /* An online CPU owns the llc_id. */
+ if (per_cpu(sd_llc_id, i) == lid)
+ return;
+ }
+
+ __cpumask_clear_cpu(lid, sched_domains_llc_id_allocmask);
+
+ max = cpumask_last(sched_domains_llc_id_allocmask);
+ /* shrink max lid to save memory */
+ if (max < max_lid)
+ max_lid = max;
+}
+
+void sched_domains_free_llc_id(int cpu)
+{
+ sched_domains_mutex_lock();
+ __sched_domains_free_llc_id(cpu);
+ sched_domains_mutex_unlock();
+}
+
/*
* Build sched domains for a given set of CPUs and attach the sched domains
* to the individual CPUs
@@ -2573,6 +2634,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/* Set up domains for CPUs specified by the cpu_map: */
for_each_cpu(i, cpu_map) {
struct sched_domain_topology_level *tl;
+ int lid;

sd = NULL;
for_each_sd_topology(tl) {
@@ -2586,6 +2648,29 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (cpumask_equal(cpu_map, sched_domain_span(sd)))
break;
}
+
+ lid = per_cpu(sd_llc_id, i);
+ if (lid == -1) {
+ /* try to reuse the llc_id of its siblings */
+ for (int j = cpumask_first(llc_mask(i));
+ j < nr_cpu_ids;
+ j = cpumask_next(j, llc_mask(i))) {
+ if (i == j)
+ continue;
+
+ lid = per_cpu(sd_llc_id, j);
+
+ if (lid != -1) {
+ per_cpu(sd_llc_id, i) = lid;
+
+ break;
+ }
+ }
+
+ /* a new LLC is detected */
+ if (lid == -1)
+ per_cpu(sd_llc_id, i) = __sched_domains_alloc_llc_id();
+ }
}

if (WARN_ON(!topology_span_sane(cpu_map)))
@@ -2762,6 +2847,7 @@ int __init sched_init_domains(const struct cpumask *cpu_map)
{
int err;

+ zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL);
zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
--
2.32.0