[PATCH v4 2/2] sched/topology: Refinement to topology_span_sane speedup

From: Steve Wahl
Date: Tue Mar 04 2025 - 11:44:14 EST


Simplify the topology_span_sane code further, removing the need to
allocate an array and gotos used to make sure the array gets freed.

This version is in a separate commit because it could return a
different sanity result than the previous code, but only in odd
circumstances that are not expected to actually occur; for example,
when a CPU is not listed in its own mask.

Signed-off-by: Steve Wahl <steve.wahl@xxxxxxx>
---

Version 4: First appearance of this second patch.

kernel/sched/topology.c | 48 ++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 3fb834301315..23b2012ff2af 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2361,17 +2361,12 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
static bool topology_span_sane(const struct cpumask *cpu_map)
{
struct sched_domain_topology_level *tl;
- const struct cpumask **masks;
- struct cpumask *covered;
- int cpu, id;
- bool ret = false;
+ struct cpumask *covered, *id_seen;
+ int cpu;

lockdep_assert_held(&sched_domains_mutex);
covered = sched_domains_tmpmask;
-
- masks = kmalloc_array(nr_cpu_ids, sizeof(struct cpumask *), GFP_KERNEL);
- if (!masks)
- return ret;
+ id_seen = sched_domains_tmpmask2;

for_each_sd_topology(tl) {

@@ -2380,7 +2375,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
continue;

cpumask_clear(covered);
- memset(masks, 0, nr_cpu_ids * sizeof(struct cpumask *));
+ cpumask_clear(id_seen);

/*
* Non-NUMA levels cannot partially overlap - they must be either
@@ -2389,36 +2384,27 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
* breaks the linking done for an earlier span.
*/
for_each_cpu(cpu, cpu_map) {
- /* lowest bit set in this mask is used as a unique id */
- id = cpumask_first(tl->mask(cpu));
+ const struct cpumask *tl_cpu_mask = tl->mask(cpu);
+ int id;

- /* zeroed masks cannot possibly collide */
- if (id >= nr_cpu_ids)
- continue;
+ /* lowest bit set in this mask is used as a unique id */
+ id = cpumask_first(tl_cpu_mask);

- /* if this mask doesn't collide with what we've already seen */
- if (!cpumask_intersects(tl->mask(cpu), covered)) {
- /* this failing would be an error in this algorithm */
- if (WARN_ON(masks[id]))
- goto notsane;
+ if (cpumask_test_cpu(id, id_seen)) {
+ /* First CPU has already been seen, ensure identical spans */
+ if (!cpumask_equal(tl->mask(id), tl_cpu_mask))
+ return false;
+ } else {
+ /* First CPU hasn't been seen before, ensure it's a completely new span */
+ if (cpumask_intersects(tl_cpu_mask, covered))
+ return false;

- /* record the mask we saw for this id */
- masks[id] = tl->mask(cpu);
- cpumask_or(covered, tl->mask(cpu), covered);
- } else if ((!masks[id]) || !cpumask_equal(masks[id], tl->mask(cpu))) {
- /*
- * a collision with covered should have exactly matched
- * a previously seen mask with the same id
- */
- goto notsane;
+ cpumask_or(covered, covered, tl_cpu_mask);
+ cpumask_set_cpu(id, id_seen);
}
}
}
- ret = true;
-
- notsane:
- kfree(masks);
- return ret;
+ return true;
}

/*
--
2.26.2