[PATCH] fs: move mntput_no_expire() slowpath into a dedicated routine

From: Mateusz Guzik

Date: Fri Nov 14 2025 - 15:18:08 EST


In the stock variant the compiler spills several registers on the stack
and employs stack smashing protection, adding even more code + a branch
on exit..

The actual fast path is small enough that the compiler inlines it for
all callers -- the symbol is no longer emitted.

Forcing noinline on it just for code-measurement purposes shows the fast
path dropping from 111 to 39 bytes.

Signed-off-by: Mateusz Guzik <mjguzik@xxxxxxxxx>
---

fast path prior:
call ffffffff81374630 <__fentry__>
push %r15
push %r14
push %r13
push %r12
push %rbp
push %rbx
sub $0x18,%rsp
mov %gs:0x2deef5d(%rip),%rbx # ffffffff8454f008 <__stack_chk_guard>

mov %rbx,0x10(%rsp)
mov %rdi,%rbx
mov %rsp,(%rsp)
mov %rsp,0x8(%rsp)
call ffffffff814615f0 <__rcu_read_lock>
mov 0xe8(%rbx),%rax
test %rax,%rax
je ffffffff817600ff <mntput_no_expire+0x6f>
mov 0x58(%rbx),%rax
decl %gs:(%rax)
call ffffffff81466810 <__rcu_read_unlock>
mov 0x10(%rsp),%rax
sub %gs:0x2deef22(%rip),%rax # ffffffff8454f008 <__stack_chk_guard>

jne ffffffff8176030b <mntput_no_expire+0x27b>
add $0x18,%rsp
pop %rbx
pop %rbp
pop %r12
pop %r13
pop %r14
pop %r15
jmp ffffffff823091f0 <__pi___x86_return_thunk>

after (when forced to be out-of-line):
call ffffffff81374630 <__fentry__>
push %rbx
mov %rdi,%rbx
call ffffffff814615f0 <__rcu_read_lock>
mov 0xe8(%rbx),%rax
test %rax,%rax
je ffffffff81760347 <mntput_no_expire+0x27>
mov 0x58(%rbx),%rax
decl %gs:(%rax)
pop %rbx
jmp ffffffff81466810 <__rcu_read_unlock>

fs/namespace.c | 38 ++++++++++++++++++++++----------------
1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index e8f1fe4bca06..6af6b082043c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1346,26 +1346,12 @@ static void delayed_mntput(struct work_struct *unused)
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);

-static void mntput_no_expire(struct mount *mnt)
+static void noinline mntput_no_expire_slowpath(struct mount *mnt)
{
LIST_HEAD(list);
int count;

- rcu_read_lock();
- if (likely(READ_ONCE(mnt->mnt_ns))) {
- /*
- * Since we don't do lock_mount_hash() here,
- * ->mnt_ns can change under us. However, if it's
- * non-NULL, then there's a reference that won't
- * be dropped until after an RCU delay done after
- * turning ->mnt_ns NULL. So if we observe it
- * non-NULL under rcu_read_lock(), the reference
- * we are dropping is not the final one.
- */
- mnt_add_count(mnt, -1);
- rcu_read_unlock();
- return;
- }
+ VFS_BUG_ON(mnt->mnt_ns);
lock_mount_hash();
/*
* make sure that if __legitimize_mnt() has not seen us grab
@@ -1416,6 +1402,26 @@ static void mntput_no_expire(struct mount *mnt)
cleanup_mnt(mnt);
}

+static void mntput_no_expire(struct mount *mnt)
+{
+ rcu_read_lock();
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
+ rcu_read_unlock();
+ return;
+ }
+ mntput_no_expire_slowpath(mnt);
+}
+
void mntput(struct vfsmount *mnt)
{
if (mnt) {
--
2.48.1