[PATCH for-next v3 7/9] mm/slab: introduce kfree_rcu_nolock()

From: Harry Yoo (Oracle)

Date: Mon Jun 15 2026 - 07:07:36 EST

Currently, k[v]free_rcu() cannot be called in unknown context since
it could lead to a deadlock when called in the middle of k[v]free_rcu().

Make users' lives easier by introducing kfree_rcu_nolock() variant,
now that kfree_rcu_sheaf() is available on PREEMPT_RT and
__kfree_rcu_sheaf() handles unknown context.

Unlike k[v]free_rcu(), kfree_rcu_nolock() does not fall back to
the kvfree_rcu batching when the sheaves path fails, and falls back to
defer_kfree_rcu() instead. In most cases, the sheaves path is expected
to succeed and it's unnecessary to add complexity to the existing
kvfree_rcu batching.

Since defer_kfree_rcu() can be called on caches without sheaves, move
deferred_work_barrier() and rcu_barrier() outside the branch in
kvfree_rcu_barrier_on_cache().

Signed-off-by: Harry Yoo (Oracle) <harry@xxxxxxxxxx>
---
include/linux/rcupdate.h | 12 ++++++++++++
mm/slab.h | 1 +
mm/slab_common.c | 22 ++++++++++++++++++++--
mm/slub.c | 23 ++++++++++++++++++++++-
4 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5e95acc33989..3025249bfcb5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1099,6 +1099,7 @@ static inline void rcu_read_unlock_migrate(void)
* In mm/slab_common.c, no suitable header to include here.
*/
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+void kfree_call_rcu_nolock(struct rcu_head *head, void *ptr);

/*
* The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the
@@ -1122,6 +1123,17 @@ do { \
kvfree_call_rcu(NULL, (void *) (___p)); \
} while (0)

+/* kfree_rcu_nolock() supports 2-arg variant only */
+#define kfree_rcu_nolock(ptr, krhf) \
+do { \
+ typeof (ptr) ___p = (ptr); \
+ \
+ if (___p) { \
+ BUILD_BUG_ON(offsetof(typeof(*(ptr)), krhf) >= 4096); \
+ kfree_call_rcu_nolock(&((___p)->krhf), (void *) (___p));\
+ } \
+} while (0)
+
/*
* Place this after a lock-acquisition primitive to guarantee that
* an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
diff --git a/mm/slab.h b/mm/slab.h
index 961581e35ec8..a493c5201e96 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -745,6 +745,7 @@ void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);

void deferred_work_barrier(void);
+void defer_kfree_rcu(struct rcu_head *head);

static inline bool slub_debug_orig_size(struct kmem_cache *s)
{
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 807924a94fb0..5a39e6225160 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1263,6 +1263,23 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);

+void kfree_call_rcu_nolock(struct rcu_head *head, void *ptr)
+{
+ struct slab *slab;
+ struct kmem_cache *s;
+
+ VM_WARN_ON_ONCE(is_vmalloc_addr(ptr) || !virt_to_slab(ptr));
+
+ slab = virt_to_slab(ptr);
+ s = slab->slab_cache;
+
+ if (__kfree_rcu_sheaf(s, ptr, /* allow_spin = */ false))
+ return;
+
+ defer_kfree_rcu(head);
+}
+EXPORT_SYMBOL_GPL(kfree_call_rcu_nolock);
+
#ifndef CONFIG_KVFREE_RCU_BATCHED

void kvfree_call_rcu(struct rcu_head *head, void *ptr)
@@ -2120,10 +2137,11 @@ void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
cpus_read_lock();
flush_rcu_sheaves_on_cache(s);
cpus_read_unlock();
- deferred_work_barrier();
- rcu_barrier();
}

+ /* kfree_rcu_nolock() might have deferred frees even without sheaves */
+ deferred_work_barrier();
+ rcu_barrier();
__kvfree_rcu_barrier();
}
EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache);
diff --git a/mm/slub.c b/mm/slub.c
index 4850629774b2..19018a979445 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4075,6 +4075,7 @@ static void flush_all(struct kmem_cache *s)

struct deferred_percpu_work {
struct llist_head objects;
+ struct llist_head objects_by_rcu;
struct llist_head rcu_sheaves;
struct irq_work work;
};
@@ -4083,6 +4084,7 @@ static void deferred_percpu_work_fn(struct irq_work *work);

static DEFINE_PER_CPU(struct deferred_percpu_work, deferred_percpu_work) = {
.objects = LLIST_HEAD_INIT(objects),
+ .objects_by_rcu = LLIST_HEAD_INIT(objects_by_rcu),
.rcu_sheaves = LLIST_HEAD_INIT(rcu_sheaves),
.work = IRQ_WORK_INIT(deferred_percpu_work_fn),
};
@@ -6392,12 +6394,13 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
static void deferred_percpu_work_fn(struct irq_work *work)
{
struct deferred_percpu_work *dpw;
- struct llist_head *objs, *rcu_sheaves;
+ struct llist_head *objs, *objs_by_rcu, *rcu_sheaves;
struct llist_node *llnode, *pos, *t;

dpw = container_of(work, struct deferred_percpu_work, work);
rcu_sheaves = &dpw->rcu_sheaves;
objs = &dpw->objects;
+ objs_by_rcu = &dpw->objects_by_rcu;

llnode = llist_del_all(objs);
llist_for_each_safe(pos, t, llnode) {
@@ -6428,6 +6431,13 @@ static void deferred_percpu_work_fn(struct irq_work *work)

call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf);
}
+
+ llnode = llist_del_all(objs_by_rcu);
+ llist_for_each_safe(pos, t, llnode) {
+ struct rcu_head *head = (struct rcu_head *)pos;
+
+ call_rcu(head, kvfree_rcu_cb);
+ }
}

static void defer_free(struct kmem_cache *s, void *head)
@@ -6443,6 +6453,17 @@ static void defer_free(struct kmem_cache *s, void *head)
irq_work_queue(&dpw->work);
}

+void defer_kfree_rcu(struct rcu_head *head)
+{
+ struct deferred_percpu_work *dpw;
+
+ guard(preempt)();
+
+ dpw = this_cpu_ptr(&deferred_percpu_work);
+ if (llist_add((struct llist_node *)head, &dpw->objects_by_rcu))
+ irq_work_queue(&dpw->work);
+}
+
void deferred_work_barrier(void)
{
int cpu;

--
2.53.0