[PATCH 3/3] slab: free remote objects to sheaves on memoryless nodes

From: Vlastimil Babka (SUSE)

Date: Wed Mar 11 2026 - 04:34:44 EST


On memoryless nodes we can now allocate from cpu sheaves and refill them
normally. But when a node is memoryless on a system without actual
CONFIG_HAVE_MEMORYLESS_NODES support, freeing always uses the slowpath
because all objects appear as remote. We could instead benefit from the
freeing fastpath, because the allocations can't obtain local objects
anyway if the node is memoryless.

Thus adapt the locality check when freeing, and move them to an inline
function can_free_to_pcs() for a single shared implementation.

On configurations with CONFIG_HAVE_MEMORYLESS_NODES=y continue using
numa_mem_id() so the percpu sheaves and barn on a memoryless node will
contain mostly objects from the closest memory node (returned by
numa_mem_id()). No change is thus intended for such configuration.

On systems with CONFIG_HAVE_MEMORYLESS_NODES=n use numa_node_id() (the
cpu's node) since numa_mem_id() just aliases it anyway. But if we are
freeing on a memoryless node, allow the freeing to use percpu sheaves
for objects from any node, since they are all remote anyway.

This way we avoid the slowpath and get more performant freeing. The
potential downside is that allocations will obtain objects with a larger
average distance. If we kept bypassing the sheaves on freeing, a refill
of sheaves from slabs would tend to get closer objects thanks to the
ordering of the zonelist. Architectures that allow de-facto memoryless
nodes without proper CONFIG_HAVE_MEMORYLESS_NODES support should perhaps
consider adding such support.

Signed-off-by: Vlastimil Babka (SUSE) <vbabka@xxxxxxxxxx>
---
mm/slub.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 55 insertions(+), 12 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index d8496b37e364..2e095ce76dd0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6009,6 +6009,56 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
return false;
}

+static __always_inline bool can_free_to_pcs(struct slab *slab)
+{
+ int slab_node;
+ int numa_node;
+
+ if (!IS_ENABLED(CONFIG_NUMA))
+ goto check_pfmemalloc;
+
+ slab_node = slab_nid(slab);
+
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+ /*
+ * numa_mem_id() points to the closest node with memory so only allow
+ * objects from that node to the percpu sheaves
+ */
+ numa_node = numa_mem_id();
+
+ if (likely(slab_node == numa_node))
+ goto check_pfmemalloc;
+#else
+
+ /*
+ * numa_mem_id() is only a wrapper to numa_node_id() which is where this
+ * cpu belongs to, but it might be a memoryless node anyway. We don't
+ * know what the closest node is.
+ */
+ numa_node = numa_node_id();
+
+ /* freed object is from this cpu's node, proceed */
+ if (likely(slab_node == numa_node))
+ goto check_pfmemalloc;
+
+ /*
+ * Freed object isn't from this cpu's node, but that node is memoryless.
+ * Proceed as it's better to cache remote objects than falling back to
+ * the slowpath for everything. The allocation side can never obtain
+ * a local object anyway, if none exist. We don't have numa_mem_id() to
+ * point to the closest node as we would on a proper memoryless node
+ * setup.
+ */
+ if (unlikely(!node_isset(numa_node, slab_nodes)))
+ goto check_pfmemalloc;
+#endif
+
+ return false;
+
+check_pfmemalloc:
+ return likely(!slab_test_pfmemalloc(slab));
+}
+
/*
* Bulk free objects to the percpu sheaves.
* Unlike free_to_pcs() this includes the calls to all necessary hooks
@@ -6023,7 +6073,6 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
struct node_barn *barn;
void *remote_objects[PCS_BATCH_MAX];
unsigned int remote_nr = 0;
- int node = numa_mem_id();

next_remote_batch:
while (i < size) {
@@ -6037,8 +6086,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
continue;
}

- if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)
- || slab_test_pfmemalloc(slab))) {
+ if (unlikely(!can_free_to_pcs(slab))) {
remote_objects[remote_nr] = p[i];
p[i] = p[--size];
if (++remote_nr >= PCS_BATCH_MAX)
@@ -6214,11 +6262,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
return;

- if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
- && likely(!slab_test_pfmemalloc(slab))) {
- if (likely(free_to_pcs(s, object, true)))
- return;
- }
+ if (likely(can_free_to_pcs(slab)) && likely(free_to_pcs(s, object, true)))
+ return;

__slab_free(s, slab, object, object, 1, addr);
stat(s, FREE_SLOWPATH);
@@ -6589,10 +6634,8 @@ void kfree_nolock(const void *object)
*/
kasan_slab_free(s, x, false, false, /* skip quarantine */true);

- if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) {
- if (likely(free_to_pcs(s, x, false)))
- return;
- }
+ if (likely(can_free_to_pcs(slab)) && likely(free_to_pcs(s, x, false)))
+ return;

/*
* __slab_free() can locklessly cmpxchg16 into a slab, but then it might

--
2.53.0