[PATCH 2/3] slab: create barns for online memoryless nodes
From: Vlastimil Babka (SUSE)
Date: Wed Mar 11 2026 - 04:29:24 EST
Ming Lei has reported [1] a performance regression due to replacing cpu
(partial) slabs with sheaves. With slub stats enabled, a large amount of
slowpath allocations were observed. The affected system has 8 online
NUMA nodes but only 2 have memory.
For sheaves to work effectively on given cpu, its NUMA node has to have
struct node_barn allocated. Those are currently only allocated on nodes
with memory (N_MEMORY) where kmem_cache_node also exist as the goal is
to cache only node-local objects. But in order to have good performance
on a memoryless node, we need its barn to exist and use sheaves to cache
non-local objects (as no local objects can exist anyway).
Therefore change the implementation to allocate barns on all online
nodes, tracked in a new nodemask slab_barn_nodes. Also add a cpu hotplug
callback as that's when a memoryless node can become online.
Change rcu_sheaf->node assignment to numa_node_id() so it's returned to
the barn of the local cpu's (potentially memoryless) node, and not to
the nearest node with memory anymore.
Reported-by: Ming Lei <ming.lei@xxxxxxxxxx>
Link: https://lore.kernel.org/all/aZ0SbIqaIkwoW2mB@fedora/ [1]
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@xxxxxxxxxx>
---
mm/slub.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 59 insertions(+), 4 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 609a183f8533..d8496b37e364 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -472,6 +472,12 @@ static inline struct node_barn *get_barn(struct kmem_cache *s)
*/
static nodemask_t slab_nodes;
+/*
+ * Similar to slab_nodes but for where we have node_barn allocated.
+ * Corresponds to N_ONLINE nodes.
+ */
+static nodemask_t slab_barn_nodes;
+
/*
* Workqueue used for flushing cpu and kfree_rcu sheaves.
*/
@@ -4084,6 +4090,51 @@ void flush_all_rcu_sheaves(void)
rcu_barrier();
}
+static int slub_cpu_setup(unsigned int cpu)
+{
+ int nid = cpu_to_node(cpu);
+ struct kmem_cache *s;
+ int ret = 0;
+
+ /*
+ * we never clear a nid so it's safe to do a quick check before taking
+ * the mutex, and then recheck to handle parallel cpu hotplug safely
+ */
+ if (node_isset(nid, slab_barn_nodes))
+ return 0;
+
+ mutex_lock(&slab_mutex);
+
+ if (node_isset(nid, slab_barn_nodes))
+ goto out;
+
+ list_for_each_entry(s, &slab_caches, list) {
+ struct node_barn *barn;
+
+ /*
+ * barn might already exist if a previous callback failed midway
+ */
+ if (!cache_has_sheaves(s) || get_barn_node(s, nid))
+ continue;
+
+ barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
+
+ if (!barn) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ barn_init(barn);
+ s->per_node[nid].barn = barn;
+ }
+ node_set(nid, slab_barn_nodes);
+
+out:
+ mutex_unlock(&slab_mutex);
+
+ return ret;
+}
+
/*
* Use the cpu notifier to insure that the cpu slabs are flushed when
* necessary.
@@ -5936,7 +5987,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
rcu_sheaf = NULL;
} else {
pcs->rcu_free = NULL;
- rcu_sheaf->node = numa_mem_id();
+ rcu_sheaf->node = numa_node_id();
}
/*
@@ -7597,7 +7648,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
if (slab_state == DOWN || !cache_has_sheaves(s))
return 1;
- for_each_node_mask(node, slab_nodes) {
+ for_each_node_mask(node, slab_barn_nodes) {
struct node_barn *barn;
barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
@@ -8250,6 +8301,7 @@ static int slab_mem_going_online_callback(int nid)
* and barn initialized for the new node.
*/
node_set(nid, slab_nodes);
+ node_set(nid, slab_barn_nodes);
out:
mutex_unlock(&slab_mutex);
return ret;
@@ -8328,7 +8380,7 @@ static void __init bootstrap_cache_sheaves(struct kmem_cache *s)
if (!capacity)
return;
- for_each_node_mask(node, slab_nodes) {
+ for_each_node_mask(node, slab_barn_nodes) {
struct node_barn *barn;
barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
@@ -8400,6 +8452,9 @@ void __init kmem_cache_init(void)
for_each_node_state(node, N_MEMORY)
node_set(node, slab_nodes);
+ for_each_online_node(node)
+ node_set(node, slab_barn_nodes);
+
create_boot_cache(kmem_cache_node, "kmem_cache_node",
sizeof(struct kmem_cache_node),
SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
@@ -8426,7 +8481,7 @@ void __init kmem_cache_init(void)
/* Setup random freelists for each cache */
init_freelist_randomization();
- cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
+ cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", slub_cpu_setup,
slub_cpu_dead);
pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
--
2.53.0