[PATCH for-next v3 6/9] mm/slab: allow kfree_rcu_sheaf() on PREEMPT_RT

From: Harry Yoo (Oracle)

Date: Mon Jun 15 2026 - 07:07:11 EST


As suggested by Vlastimil Babka, kfree_rcu_sheaf() can be used
on PREEMPT_RT if we always assume spinning is not allowed on PREEMPT_RT.
This is because local_trylock and spinlock_t are safe to use with
trylock variant as long as the kernel does not spin and the context is
not NMI and not hardirq.

Now that __kfree_rcu_sheaf() knows how to handle allow_spin = false,
relax the limitation and try the sheaves path on PREEMPT_RT as well.

Keep the lockdep map on non RT kernels. However, do not use the lockdep
map on PREEMPT_RT to avoid suppressing valid lockdep warnings.

Link: https://lore.kernel.org/linux-mm/6811cc17-8ee4-48c8-8cbf-6bf4d9f98162@xxxxxxxxxx
Suggested-by: Vlastimil Babka (SUSE) <vbabka@xxxxxxxxxx>
Signed-off-by: Harry Yoo (Oracle) <harry@xxxxxxxxxx>
---
mm/slab_common.c | 11 +++++++++--
mm/slub.c | 17 ++++++++++-------
2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 55546b8385ff..807924a94fb0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1595,6 +1595,13 @@ static bool kfree_rcu_sheaf(void *obj)
{
struct kmem_cache *s;
struct slab *slab;
+ bool allow_spin;
+
+ /*
+ * It is not safe to spin on PREEMPT_RT because the kernel might be
+ * holding a raw spinlock and slab acquires sleeping locks.
+ */
+ allow_spin = !IS_ENABLED(CONFIG_PREEMPT_RT);

if (is_vmalloc_addr(obj))
return false;
@@ -1605,7 +1612,7 @@ static bool kfree_rcu_sheaf(void *obj)

s = slab->slab_cache;
if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
- return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
+ return __kfree_rcu_sheaf(s, obj, allow_spin);

return false;
}
@@ -1954,7 +1961,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
if (!head)
might_sleep();

- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr))
+ if (kfree_rcu_sheaf(ptr))
return;

// Queue the object but don't yet schedule the batch.
diff --git a/mm/slub.c b/mm/slub.c
index ba593c1c53d5..4850629774b2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6082,12 +6082,13 @@ static void rcu_free_sheaf(struct rcu_head *head)
* kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since
* __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on PREEMPT_RT),
* this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids
- * this problem by bypassing the sheaves layer entirely on PREEMPT_RT.
+ * this problem by passing allow_spin = false on PREEMPT_RT.
*
* However, lockdep still complains that it is invalid to acquire spinlock_t
* while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a
* spinning lock. Tell lockdep that acquiring spinlock_t is valid here
- * by temporarily raising the wait-type to LD_WAIT_CONFIG.
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG. Skip the lockdep map
+ * on PREEMPT_RT to avoid suppressing valid lockdep warnings.
*/
static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);

@@ -6096,10 +6097,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
struct slub_percpu_sheaves *pcs;
struct slab_sheaf *rcu_sheaf;

- if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
- return false;
+ VM_WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT) && allow_spin);

- lock_map_acquire_try(&kfree_rcu_sheaf_map);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ lock_map_acquire_try(&kfree_rcu_sheaf_map);

if (!local_trylock(&s->cpu_sheaves->lock))
goto fail;
@@ -6199,12 +6200,14 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
local_unlock(&s->cpu_sheaves->lock);

stat(s, FREE_RCU_SHEAF);
- lock_map_release(&kfree_rcu_sheaf_map);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ lock_map_release(&kfree_rcu_sheaf_map);
return true;

fail:
stat(s, FREE_RCU_SHEAF_FAIL);
- lock_map_release(&kfree_rcu_sheaf_map);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ lock_map_release(&kfree_rcu_sheaf_map);
return false;
}


--
2.53.0