[PATCH 1/5] sched/fair: Attach sched_domain_shared to sd_asym_cpucapacity

From: Andrea Righi

Date: Thu Apr 23 2026 - 03:42:16 EST


From: K Prateek Nayak <kprateek.nayak@xxxxxxx>

On asymmetric CPU capacity systems, the wakeup path uses
select_idle_capacity(), which scans the span of sd_asym_cpucapacity
rather than sd_llc.

The has_idle_cores hint however lives on sd_llc->shared, so the
wakeup-time read of has_idle_cores operates on an LLC-scoped blob while
the actual scan/decision spans the asym domain; nr_busy_cpus also lives
in the same shared sched_domain data, but it's never used in the asym
CPU capacity scenario.

Therefore, move the sched_domain_shared object to sd_asym_cpucapacity
whenever the CPU has a SD_ASYM_CPUCAPACITY_FULL ancestor and that
ancestor is non-overlapping (i.e., not built from SD_NUMA). In that case
the scope of has_idle_cores matches the scope of the wakeup scan.

Fall back to attaching the shared object to sd_llc in three cases:

1) plain symmetric systems (no SD_ASYM_CPUCAPACITY_FULL anywhere);

2) CPUs in an exclusive cpuset that carves out a symmetric capacity
island: has_asym is system-wide but those CPUs have no
SD_ASYM_CPUCAPACITY_FULL ancestor in their hierarchy and follow
the symmetric LLC path in select_idle_sibling();

3) exotic topologies where SD_ASYM_CPUCAPACITY_FULL lands on an
SD_NUMA-built domain. init_sched_domain_shared() keys the shared
blob off cpumask_first(span), which on overlapping NUMA domains
would alias unrelated spans onto the same blob. Keep the shared
object on the LLC there; select_idle_capacity() gracefully skips
the has_idle_cores preference when sd->shared is NULL.

Co-developed-by: Andrea Righi <arighi@xxxxxxxxxx>
Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
Signed-off-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
---
kernel/sched/fair.c | 8 ++--
kernel/sched/topology.c | 81 +++++++++++++++++++++++++++++++++++------
2 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 69361c63353ad..934eb663f445e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7925,7 +7925,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
int i, cpu, idle_cpu = -1, nr = INT_MAX;

- if (sched_feat(SIS_UTIL)) {
+ if (sched_feat(SIS_UTIL) && sd->shared) {
/*
* Increment because !--nr is the condition to stop scan.
*
@@ -12840,7 +12840,8 @@ static void set_cpu_sd_state_busy(int cpu)
goto unlock;
sd->nohz_idle = 0;

- atomic_inc(&sd->shared->nr_busy_cpus);
+ if (sd->shared)
+ atomic_inc(&sd->shared->nr_busy_cpus);
unlock:
rcu_read_unlock();
}
@@ -12869,7 +12870,8 @@ static void set_cpu_sd_state_idle(int cpu)
goto unlock;
sd->nohz_idle = 1;

- atomic_dec(&sd->shared->nr_busy_cpus);
+ if (sd->shared)
+ atomic_dec(&sd->shared->nr_busy_cpus);
unlock:
rcu_read_unlock();
}
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5847b83d9d552..dc50193b198c6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -680,19 +680,39 @@ static void update_top_cache_domain(int cpu)
int id = cpu;
int size = 1;

+ sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
+ /*
+ * The shared object is attached to sd_asym_cpucapacity only when the
+ * asym domain is non-overlapping (i.e., not built from SD_NUMA).
+ * On overlapping (NUMA) asym domains we fall back to letting the
+ * SD_SHARE_LLC path own the shared object, so sd->shared may be NULL
+ * here.
+ */
+ if (sd && sd->shared)
+ sds = sd->shared;
+
+ rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
+
sd = highest_flag_domain(cpu, SD_SHARE_LLC);
if (sd) {
id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));

- /* If sd_llc exists, sd_llc_shared should exist too. */
- WARN_ON_ONCE(!sd->shared);
- sds = sd->shared;
+ /*
+ * If sd_asym_cpucapacity didn't claim the shared object,
+ * sd_llc must have one linked.
+ */
+ if (!sds) {
+ WARN_ON_ONCE(!sd->shared);
+ sds = sd->shared;
+ }
}

rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
per_cpu(sd_llc_size, cpu) = size;
per_cpu(sd_llc_id, cpu) = id;
+
+ /* TODO: Rename sd_llc_shared to fit the new role. */
rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);

sd = lowest_flag_domain(cpu, SD_CLUSTER);
@@ -711,9 +731,6 @@ static void update_top_cache_domain(int cpu)

sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
-
- sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
- rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
}

/*
@@ -2650,6 +2667,15 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
}
}

+static void init_sched_domain_shared(struct s_data *d, struct sched_domain *sd)
+{
+ int sd_id = cpumask_first(sched_domain_span(sd));
+
+ sd->shared = *per_cpu_ptr(d->sds, sd_id);
+ atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
+ atomic_inc(&sd->shared->ref);
+}
+
/*
* Build sched domains for a given set of CPUs and attach the sched domains
* to the individual CPUs
@@ -2708,20 +2734,53 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
}

for_each_cpu(i, cpu_map) {
+ struct sched_domain *sd_asym = NULL;
+ bool asym_claimed = false;
+
sd = *per_cpu_ptr(d.sd, i);
if (!sd)
continue;

+ /*
+ * In case of ASYM_CPUCAPACITY, attach sd->shared to
+ * sd_asym_cpucapacity for wakeup stat tracking.
+ *
+ * Caveats:
+ *
+ * 1) has_asym is system-wide, but a given CPU may still
+ * lack an SD_ASYM_CPUCAPACITY_FULL ancestor (e.g., an
+ * exclusive cpuset carving out a symmetric capacity island).
+ * Such CPUs must fall through to the LLC seeding path below.
+ *
+ * 2) Skip the asym attach if the asym ancestor is an
+ * overlapping domain (SD_NUMA). On those topologies let the
+ * LLC path own the shared object instead.
+ *
+ * XXX: This assumes SD_ASYM_CPUCAPACITY_FULL domain
+ * always has more than one group else it is prone to
+ * degeneration.
+ */
+ sd_asym = sd;
+ while (sd_asym && !(sd_asym->flags & SD_ASYM_CPUCAPACITY_FULL))
+ sd_asym = sd_asym->parent;
+
+ if (sd_asym && !(sd_asym->flags & SD_NUMA)) {
+ init_sched_domain_shared(&d, sd_asym);
+ asym_claimed = true;
+ }
+
/* First, find the topmost SD_SHARE_LLC domain */
+ sd = *per_cpu_ptr(d.sd, i);
while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
sd = sd->parent;

if (sd->flags & SD_SHARE_LLC) {
- int sd_id = cpumask_first(sched_domain_span(sd));
-
- sd->shared = *per_cpu_ptr(d.sds, sd_id);
- atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
- atomic_inc(&sd->shared->ref);
+ /*
+ * Initialize the sd->shared for SD_SHARE_LLC unless
+ * the asym path above already claimed it.
+ */
+ if (!asym_claimed)
+ init_sched_domain_shared(&d, sd);

/*
* In presence of higher domains, adjust the
--
2.54.0