[PATCH v13 3/8] lib/group_cpus: Add group_mask_cpus_evenly()

From: Aaron Tomlin

Date: Tue May 12 2026 - 20:57:04 EST

From: Daniel Wagner <wagi@xxxxxxxxxx>

This commit introduces group_mask_cpus_evenly(), which allows callers to
distribute a specific CPU mask evenly across groups. It serves as a bounded
version of group_cpus_evenly().

While group_cpus_evenly() operates on the global cpu_possible_mask,
group_mask_cpus_evenly() confines the distribution strictly within the
boundaries of the caller-provided mask. It preserves the kernel's native
two-stage spreading logic-first prioritising CPUs that are physically
present (cpu_present_mask) to prevent I/O starvation, and then distributing
any remaining vectors to non-present CPUs to maintain hotplug safety.

Signed-off-by: Daniel Wagner <wagi@xxxxxxxxxx>
Reviewed-by: Hannes Reinecke <hare@xxxxxxx>
[atomlin:
- Added check for numgrps == 0
- Updated commit message to resolve typo
- Removed unused <linux/sched/isolation.h>
- Fix TOCTOU race by caching the provided mask
- Implemented two-stage grouping logic to prioritise physically
present CPUs, mirroring group_cpus_evenly()]
Signed-off-by: Aaron Tomlin <atomlin@xxxxxxxxxxx>
---
include/linux/group_cpus.h | 3 ++
lib/group_cpus.c | 106 +++++++++++++++++++++++++++++++++++++
2 files changed, 109 insertions(+)

diff --git a/include/linux/group_cpus.h b/include/linux/group_cpus.h
index 9d4e5ab6c314..defab4123a82 100644
--- a/include/linux/group_cpus.h
+++ b/include/linux/group_cpus.h
@@ -10,5 +10,8 @@
#include <linux/cpu.h>

struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks);
+struct cpumask *group_mask_cpus_evenly(unsigned int numgrps,
+ const struct cpumask *mask,
+ unsigned int *nummasks);

#endif
diff --git a/lib/group_cpus.c b/lib/group_cpus.c
index b8d54398f88a..2552ccea743e 100644
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -563,3 +563,109 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks)
return masks;
}
EXPORT_SYMBOL_GPL(group_cpus_evenly);
+
+/**
+ * group_mask_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality
+ * @numgrps: number of cpumasks to create
+ * @mask: CPUs to consider for the grouping
+ * @nummasks: number of initialized cpumasks
+ *
+ * Return: cpumask array if successful, NULL otherwise. Only the CPUs
+ * marked in the mask will be considered for the grouping. And each
+ * element includes CPUs assigned to this group. nummasks contains the
+ * number of initialized masks which can be less than numgrps.
+ *
+ * Try to put close CPUs from viewpoint of CPU and NUMA locality into
+ * the same group.
+ *
+ * We guarantee in the resulting grouping that all CPUs specified in the
+ * provided mask are covered, and no same CPU is assigned to multiple
+ * groups.
+ */
+struct cpumask *group_mask_cpus_evenly(unsigned int numgrps,
+ const struct cpumask *mask,
+ unsigned int *nummasks)
+{
+ unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
+ cpumask_var_t *node_to_cpumask;
+ cpumask_var_t nmsk, local_mask, npresmsk;
+ int ret = -ENOMEM;
+ struct cpumask *masks = NULL;
+
+ if (numgrps == 0)
+ return NULL;
+
+ if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+ return NULL;
+
+ if (!zalloc_cpumask_var(&local_mask, GFP_KERNEL))
+ goto fail_nmsk;
+
+ if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
+ goto fail_local_mask;
+
+ node_to_cpumask = alloc_node_to_cpumask();
+ if (!node_to_cpumask)
+ goto fail_npresmsk;
+
+ masks = kzalloc_objs(*masks, numgrps);
+ if (!masks)
+ goto fail_node_to_cpumask;
+
+ build_node_to_cpumask(node_to_cpumask);
+
+ /*
+ * Create a stable snapshot of the mask. The grouping algorithm
+ * requires the CPU count to remain constant across its multiple
+ * passes. This prevents allocation failures if the caller passes a
+ * dynamic mask (e.g., cpu_online_mask) that changes concurrently.
+ */
+ cpumask_copy(local_mask, data_race(mask));
+
+ /*
+ * Grouping present CPUs first. We intersect the provided mask with
+ * cpu_present_mask to ensure that we prioritise physically
+ * available CPUs for the initial distribution.
+ */
+ cpumask_and(npresmsk, local_mask, data_race(cpu_present_mask));
+ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+ npresmsk, nmsk, masks);
+ if (ret < 0)
+ goto fail_node_to_cpumask;
+ nr_present = ret;
+
+ /*
+ * Allocate non-present CPUs starting from the next group to be
+ * handled. If the grouping of present CPUs already exhausted the
+ * group space, assign the non-present CPUs to the already
+ * allocated out groups.
+ */
+ if (nr_present >= numgrps)
+ curgrp = 0;
+ else
+ curgrp = nr_present;
+ cpumask_andnot(npresmsk, local_mask, npresmsk);
+ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
+ npresmsk, nmsk, masks);
+ if (ret >= 0)
+ nr_others = ret;
+
+fail_node_to_cpumask:
+ free_node_to_cpumask(node_to_cpumask);
+
+fail_npresmsk:
+ free_cpumask_var(npresmsk);
+
+fail_local_mask:
+ free_cpumask_var(local_mask);
+
+fail_nmsk:
+ free_cpumask_var(nmsk);
+ if (ret < 0) {
+ kfree(masks);
+ return NULL;
+ }
+ *nummasks = min(nr_present + nr_others, numgrps);
+ return masks;
+}
+EXPORT_SYMBOL_GPL(group_mask_cpus_evenly);
--
2.51.0