Re: [PATCH v3 3/8] sched/topology: Switch to assigning "sd->shared" from s_data

From: Chen, Yu C

Date: Fri Feb 13 2026 - 22:04:33 EST

On 2/6/2026 5:38 PM, Valentin Schneider wrote:

On 06/02/26 10:50, K Prateek Nayak wrote:

Hello Valentin,

On 2/5/2026 10:23 PM, Valentin Schneider wrote:

On 20/01/26 11:32, K Prateek Nayak wrote:

[ ... ]

AIUI we currently write sd->imb_numa_nr for all SD's, but it's only useful
for the SD_NUMA ones... How about the lightly tested:
---
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index afb2c26efb4e5..03db45658f6bd 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2575,6 +2575,51 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
return true;
}
+static inline void adjust_numa_imbalance(struct sched_domain *sd_llc,
+ unsigned int *imb, unsigned int *imb_span)
+{
+ /*
+ * For a single LLC per node, allow an
+ * imbalance up to 12.5% of the node. This is
+ * arbitrary cutoff based two factors -- SMT and
+ * memory channels. For SMT-2, the intent is to
+ * avoid premature sharing of HT resources but
+ * SMT-4 or SMT-8 *may* benefit from a different
+ * cutoff. For memory channels, this is a very
+ * rough estimate of how many channels may be
+ * active and is based on recent CPUs with
+ * many cores.
+ *
+ * For multiple LLCs, allow an imbalance
+ * until multiple tasks would share an LLC
+ * on one node while LLCs on another node
+ * remain idle. This assumes that there are
+ * enough logical CPUs per LLC to avoid SMT
+ * factors and that there is a correlation
+ * between LLCs and memory channels.
+ */
+ struct sched_domain *top_p;
+ unsigned int nr_llcs;
+
+ WARN_ON(!(sd_llc->flags & SD_SHARE_LLC));
+ WARN_ON(!sd_llc->parent);
+
+ nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
+ if (nr_llcs == 1)
+ *imb = sd_llc->parent->span_weight >> 3;
+ else
+ *imb = nr_llcs;
+ *imb = max(1U, *imb);
+ sd_llc->parent->imb_numa_nr = *imb;
+
+ /* Set span based on the first NUMA domain. */
+ top_p = sd_llc->parent->parent;
+ while (top_p && !(top_p->flags & SD_NUMA)) {
+ top_p = top_p->parent;
+ }
+ *imb_span = top_p ? top_p->span_weight : sd_llc->parent->span_weight;
+}
+
/*
* Build sched domains for a given set of CPUs and attach the sched domains
* to the individual CPUs
@@ -2640,63 +2685,30 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
unsigned int imb = 0;
unsigned int imb_span = 1;
- for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
- struct sched_domain *parent = sd->parent;
-
- /* Topmost SD_SHARE_LLC domain. */
- if ((sd->flags & SD_SHARE_LLC) &&
- (!parent || !(parent->flags & SD_SHARE_LLC))) {
- int sd_id = cpumask_first(sched_domain_span(sd));
- struct sched_domain *top_p;
- unsigned int nr_llcs;
-
- sd->shared = *per_cpu_ptr(d.sds, sd_id);
- atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
- atomic_inc(&sd->shared->ref);
-
- /* No SD_NUMA domains. */
- if (!parent)
- break;
-
- /*
- * For a single LLC per node, allow an
- * imbalance up to 12.5% of the node. This is
- * arbitrary cutoff based two factors -- SMT and
- * memory channels. For SMT-2, the intent is to
- * avoid premature sharing of HT resources but
- * SMT-4 or SMT-8 *may* benefit from a different
- * cutoff. For memory channels, this is a very
- * rough estimate of how many channels may be
- * active and is based on recent CPUs with
- * many cores.
- *
- * For multiple LLCs, allow an imbalance
- * until multiple tasks would share an LLC
- * on one node while LLCs on another node
- * remain idle. This assumes that there are
- * enough logical CPUs per LLC to avoid SMT
- * factors and that there is a correlation
- * between LLCs and memory channels.
- */
- nr_llcs = parent->span_weight / sd->span_weight;
- if (nr_llcs == 1)
- imb = sd->span_weight >> 3;
- else
- imb = nr_llcs;
- imb = max(1U, imb);
- sd->imb_numa_nr = imb;
-
- /* Set span based on the first NUMA domain. */
- top_p = parent;
- while (top_p && !(top_p->flags & SD_NUMA)) {
- top_p = top_p->parent;
- }
- imb_span = top_p ? top_p->span_weight : parent->span_weight;
- } else {
- int factor = max(1U, (sd->span_weight / imb_span));
+ sd = *per_cpu_ptr(d.sd, i);

might be
if (!sd)
continue;
otherwise sd->flags below might cause NULL pointer exception.

thanks,
Chenyu