[PATCH v4] fs/namespace: defer RCU sync for MNT_DETACH umount
From: Eric Chanudet
Date: Tue Apr 08 2025 - 17:31:20 EST
Defer releasing the detached file-system when calling namespace_unlock()
during a lazy umount to return faster.
When requesting MNT_DETACH, the caller does not expect the file-system
to be shut down upon returning from the syscall. Calling
synchronize_rcu_expedited() has a significant cost on RT kernel that
defaults to rcupdate.rcu_normal_after_boot=1. Queue the detached struct
mount in a separate list and put it on a workqueue to run post RCU
grace-period.
w/o patch, 6.15-rc1 PREEMPT_RT:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
0.02455 +- 0.00107 seconds time elapsed ( +- 4.36% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
0.02555 +- 0.00114 seconds time elapsed ( +- 4.46% )
w/ patch, 6.15-rc1 PREEMPT_RT:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
0.026311 +- 0.000869 seconds time elapsed ( +- 3.30% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
0.003194 +- 0.000160 seconds time elapsed ( +- 5.01% )
Signed-off-by: Alexander Larsson <alexl@xxxxxxxxxx>
Signed-off-by: Lucas Karpinski <lkarpins@xxxxxxxxxx>
Signed-off-by: Eric Chanudet <echanude@xxxxxxxxxx>
---
Attempt to re-spin this series based on the feedback received in v3 that
pointed out the need to wait the grace-period in namespace_unlock()
before calling the deferred mntput().
v4:
- Use queue_rcu_work() to defer free_mounts() for lazy umounts
- Drop lazy_unlock global and refactor using a helper
v3: https://lore.kernel.org/all/20240626201129.272750-2-lkarpins@xxxxxxxxxx/
- Removed unneeded code for lazy umount case.
- Don't block within interrupt context.
v2: https://lore.kernel.org/all/20240426195429.28547-1-lkarpins@xxxxxxxxxx/
- Only defer releasing umount'ed filesystems for lazy umounts
v1: https://lore.kernel.org/all/20230119205521.497401-1-echanude@xxxxxxxxxx/
fs/namespace.c | 52 +++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 45 insertions(+), 7 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 14935a0500a2..e5b0b920dd97 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -45,6 +45,11 @@ static unsigned int m_hash_shift __ro_after_init;
static unsigned int mp_hash_mask __ro_after_init;
static unsigned int mp_hash_shift __ro_after_init;
+struct deferred_free_mounts {
+ struct rcu_work rwork;
+ struct hlist_head release_list;
+};
+
static __initdata unsigned long mhash_entries;
static int __init set_mhash_entries(char *str)
{
@@ -1789,11 +1794,29 @@ static bool need_notify_mnt_list(void)
}
#endif
-static void namespace_unlock(void)
+static void free_mounts(struct hlist_head *mount_list)
{
- struct hlist_head head;
struct hlist_node *p;
struct mount *m;
+
+ hlist_for_each_entry_safe(m, p, mount_list, mnt_umount) {
+ hlist_del(&m->mnt_umount);
+ mntput(&m->mnt);
+ }
+}
+
+static void defer_free_mounts(struct work_struct *work)
+{
+ struct deferred_free_mounts *d = container_of(
+ to_rcu_work(work), struct deferred_free_mounts, rwork);
+
+ free_mounts(&d->release_list);
+ kfree(d);
+}
+
+static void __namespace_unlock(bool lazy)
+{
+ HLIST_HEAD(head);
LIST_HEAD(list);
hlist_move_list(&unmounted, &head);
@@ -1817,12 +1840,27 @@ static void namespace_unlock(void)
if (likely(hlist_empty(&head)))
return;
- synchronize_rcu_expedited();
+ if (lazy) {
+ struct deferred_free_mounts *d =
+ kmalloc(sizeof(*d), GFP_KERNEL);
- hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
- hlist_del(&m->mnt_umount);
- mntput(&m->mnt);
+ if (unlikely(!d))
+ goto out;
+
+ hlist_move_list(&head, &d->release_list);
+ INIT_RCU_WORK(&d->rwork, defer_free_mounts);
+ queue_rcu_work(system_wq, &d->rwork);
+ return;
}
+
+out:
+ synchronize_rcu_expedited();
+ free_mounts(&head);
+}
+
+static inline void namespace_unlock(void)
+{
+ __namespace_unlock(false);
}
static inline void namespace_lock(void)
@@ -2056,7 +2094,7 @@ static int do_umount(struct mount *mnt, int flags)
}
out:
unlock_mount_hash();
- namespace_unlock();
+ __namespace_unlock(flags & MNT_DETACH);
return retval;
}
--
2.49.0