[PATCH 6/8] md/raid5: allocate worker groups per NUMA node

From: Hiroshi Nishida

Date: Wed Jun 24 2026 - 12:02:26 EST


alloc_thread_groups() previously allocated all r5worker arrays in a
single kcalloc() block, assigning workers for NUMA node N from node 0
memory. On multi-socket systems this causes remote memory traffic on
every worker->work and worker->temp_inactive_list access.

Replace the single allocation with kzalloc_node(size, GFP_NOIO, i) per
group so each node's workers live in local memory. Because the workers
are now separate per-node allocations, both free sites --
free_thread_groups() and the reallocation path in
raid5_store_group_thread_cnt() -- are updated to free each group's
allocation individually instead of only group 0's.

Also fix a latent bug: the original kcalloc() had its nmemb and size
arguments swapped (harmless due to commutativity but semantically wrong).

Assisted-by: Claude:claude-opus-4-8 [Claude Code]
Signed-off-by: Hiroshi Nishida <nishidafmly@xxxxxxxxx>
---
drivers/md/raid5.c | 39 ++++++++++++++++++++++++++-------------
1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8e9edaaca667..c8787ab7b309 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7297,8 +7297,12 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
conf->worker_groups = new_groups;
spin_unlock_irq(&conf->device_lock);

- if (old_groups)
- kfree(old_groups[0].workers);
+ if (old_groups) {
+ int node;
+
+ for (node = 0; node < num_possible_nodes(); node++)
+ kfree(old_groups[node].workers);
+ }
kfree(old_groups);
}
}
@@ -7336,7 +7340,6 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
{
int i, j, k;
ssize_t size;
- struct r5worker *workers;

if (cnt == 0) {
*group_cnt = 0;
@@ -7344,24 +7347,24 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
return 0;
}
*group_cnt = num_possible_nodes();
- size = sizeof(struct r5worker) * cnt;
- workers = kcalloc(size, *group_cnt, GFP_NOIO);
*worker_groups = kzalloc_objs(struct r5worker_group, *group_cnt,
GFP_NOIO);
- if (!*worker_groups || !workers) {
- kfree(workers);
- kfree(*worker_groups);
+ if (!*worker_groups)
return -ENOMEM;
- }

+ size = sizeof(struct r5worker) * cnt;
for (i = 0; i < *group_cnt; i++) {
- struct r5worker_group *group;
+ struct r5worker_group *group = &(*worker_groups)[i];
+ struct r5worker *workers;
+
+ workers = kzalloc_node(size, GFP_NOIO, i);
+ if (!workers)
+ goto out_free;

- group = &(*worker_groups)[i];
INIT_LIST_HEAD(&group->handle_list);
INIT_LIST_HEAD(&group->loprio_list);
group->conf = conf;
- group->workers = workers + i * cnt;
+ group->workers = workers;

for (j = 0; j < cnt; j++) {
struct r5worker *worker = group->workers + j;
@@ -7374,12 +7377,22 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
}

return 0;
+
+out_free:
+ while (--i >= 0)
+ kfree((*worker_groups)[i].workers);
+ kfree(*worker_groups);
+ *worker_groups = NULL;
+ return -ENOMEM;
}

static void free_thread_groups(struct r5conf *conf)
{
+ int i;
+
if (conf->worker_groups)
- kfree(conf->worker_groups[0].workers);
+ for (i = 0; i < conf->group_cnt; i++)
+ kfree(conf->worker_groups[i].workers);
kfree(conf->worker_groups);
conf->worker_groups = NULL;
}
--
2.43.0