linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] fs/namespace: defer RCU sync for MNT_DETACH umount
@ 2025-04-08 20:58 Eric Chanudet
  2025-04-09 10:37 ` Christian Brauner
                   ` (3 more replies)
  0 siblings, 4 replies; 47+ messages in thread
From: Eric Chanudet @ 2025-04-08 20:58 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara,
	Sebastian Andrzej Siewior, Clark Williams, Steven Rostedt,
	Ian Kent
  Cc: linux-fsdevel, linux-kernel, linux-rt-devel, Eric Chanudet,
	Alexander Larsson, Lucas Karpinski

Defer releasing the detached file-system when calling namespace_unlock()
during a lazy umount to return faster.

When requesting MNT_DETACH, the caller does not expect the file-system
to be shut down upon returning from the syscall. Calling
synchronize_rcu_expedited() has a significant cost on RT kernel that
defaults to rcupdate.rcu_normal_after_boot=1. Queue the detached struct
mount in a separate list and put it on a workqueue to run post RCU
grace-period.

w/o patch, 6.15-rc1 PREEMPT_RT:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
    0.02455 +- 0.00107 seconds time elapsed  ( +-  4.36% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
    0.02555 +- 0.00114 seconds time elapsed  ( +-  4.46% )

w/ patch, 6.15-rc1 PREEMPT_RT:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
    0.026311 +- 0.000869 seconds time elapsed  ( +-  3.30% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
    0.003194 +- 0.000160 seconds time elapsed  ( +-  5.01% )

Signed-off-by: Alexander Larsson <alexl@redhat.com>
Signed-off-by: Lucas Karpinski <lkarpins@redhat.com>
Signed-off-by: Eric Chanudet <echanude@redhat.com>
---

Attempt to re-spin this series based on the feedback received in v3 that
pointed out the need to wait the grace-period in namespace_unlock()
before calling the deferred mntput().

v4:
- Use queue_rcu_work() to defer free_mounts() for lazy umounts
- Drop lazy_unlock global and refactor using a helper
v3: https://lore.kernel.org/all/20240626201129.272750-2-lkarpins@redhat.com/
- Removed unneeded code for lazy umount case.
- Don't block within interrupt context.
v2: https://lore.kernel.org/all/20240426195429.28547-1-lkarpins@redhat.com/
- Only defer releasing umount'ed filesystems for lazy umounts
v1: https://lore.kernel.org/all/20230119205521.497401-1-echanude@redhat.com/

 fs/namespace.c | 52 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 14935a0500a2..e5b0b920dd97 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -45,6 +45,11 @@ static unsigned int m_hash_shift __ro_after_init;
 static unsigned int mp_hash_mask __ro_after_init;
 static unsigned int mp_hash_shift __ro_after_init;
 
+struct deferred_free_mounts {
+	struct rcu_work rwork;
+	struct hlist_head release_list;
+};
+
 static __initdata unsigned long mhash_entries;
 static int __init set_mhash_entries(char *str)
 {
@@ -1789,11 +1794,29 @@ static bool need_notify_mnt_list(void)
 }
 #endif
 
-static void namespace_unlock(void)
+static void free_mounts(struct hlist_head *mount_list)
 {
-	struct hlist_head head;
 	struct hlist_node *p;
 	struct mount *m;
+
+	hlist_for_each_entry_safe(m, p, mount_list, mnt_umount) {
+		hlist_del(&m->mnt_umount);
+		mntput(&m->mnt);
+	}
+}
+
+static void defer_free_mounts(struct work_struct *work)
+{
+	struct deferred_free_mounts *d = container_of(
+		to_rcu_work(work), struct deferred_free_mounts, rwork);
+
+	free_mounts(&d->release_list);
+	kfree(d);
+}
+
+static void __namespace_unlock(bool lazy)
+{
+	HLIST_HEAD(head);
 	LIST_HEAD(list);
 
 	hlist_move_list(&unmounted, &head);
@@ -1817,12 +1840,27 @@ static void namespace_unlock(void)
 	if (likely(hlist_empty(&head)))
 		return;
 
-	synchronize_rcu_expedited();
+	if (lazy) {
+		struct deferred_free_mounts *d =
+			kmalloc(sizeof(*d), GFP_KERNEL);
 
-	hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
-		hlist_del(&m->mnt_umount);
-		mntput(&m->mnt);
+		if (unlikely(!d))
+			goto out;
+
+		hlist_move_list(&head, &d->release_list);
+		INIT_RCU_WORK(&d->rwork, defer_free_mounts);
+		queue_rcu_work(system_wq, &d->rwork);
+		return;
 	}
+
+out:
+	synchronize_rcu_expedited();
+	free_mounts(&head);
+}
+
+static inline void namespace_unlock(void)
+{
+	__namespace_unlock(false);
 }
 
 static inline void namespace_lock(void)
@@ -2056,7 +2094,7 @@ static int do_umount(struct mount *mnt, int flags)
 	}
 out:
 	unlock_mount_hash();
-	namespace_unlock();
+	__namespace_unlock(flags & MNT_DETACH);
 	return retval;
 }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2025-04-23 15:04 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-08 20:58 [PATCH v4] fs/namespace: defer RCU sync for MNT_DETACH umount Eric Chanudet
2025-04-09 10:37 ` Christian Brauner
2025-04-09 13:14   ` Sebastian Andrzej Siewior
2025-04-09 14:02     ` Mateusz Guzik
2025-04-09 14:25       ` Sebastian Andrzej Siewior
2025-04-09 16:04         ` Christian Brauner
2025-04-10  3:04           ` Ian Kent
2025-04-10  8:28           ` Sebastian Andrzej Siewior
2025-04-10 10:48             ` Christian Brauner
2025-04-10 13:58           ` Ian Kent
2025-04-11  2:36             ` Ian Kent
2025-04-09 16:08         ` Eric Chanudet
2025-04-11 15:17           ` Christian Brauner
2025-04-11 18:30             ` Eric Chanudet
2025-04-09 16:09     ` Christian Brauner
2025-04-10  1:17   ` Ian Kent
2025-04-09 13:04 ` Mateusz Guzik
2025-04-09 16:41   ` Eric Chanudet
2025-04-16 22:11 ` Mark Brown
2025-04-17  9:01   ` Christian Brauner
2025-04-17 10:17     ` Ian Kent
2025-04-17 11:31       ` Christian Brauner
2025-04-17 11:49         ` Mark Brown
2025-04-17 15:12         ` Christian Brauner
2025-04-17 15:28           ` Christian Brauner
2025-04-17 15:31             ` Sebastian Andrzej Siewior
2025-04-17 16:28               ` Christian Brauner
2025-04-17 22:33                 ` Eric Chanudet
2025-04-18  1:13                 ` Ian Kent
2025-04-18  1:20                   ` Ian Kent
2025-04-18  8:47                     ` Christian Brauner
2025-04-18 12:55                       ` Christian Brauner
2025-04-18 19:59                       ` Christian Brauner
2025-04-18 21:20                         ` Eric Chanudet
2025-04-19  1:24                       ` Ian Kent
2025-04-19 10:44                         ` Christian Brauner
2025-04-19 13:26                           ` Christian Brauner
2025-04-21  0:12                             ` Ian Kent
2025-04-21  0:44                               ` Al Viro
2025-04-18  0:31           ` Ian Kent
2025-04-18  8:59             ` Christian Brauner
2025-04-19  1:14               ` Ian Kent
2025-04-20  4:24           ` Al Viro
2025-04-20  5:54 ` Al Viro
2025-04-22 19:53   ` Eric Chanudet
2025-04-23  2:15     ` Al Viro
2025-04-23 15:04       ` Eric Chanudet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).