All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-fsdevel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
	"Lai, Yi" <yi1.lai@linux.intel.com>,
	Tycho Andersen <tycho@tycho.pizza>,
	Andrei Vagin <avagin@google.com>,
	Pavel Tikhomirov <snorcht@gmail.com>
Subject: Re: [PATCHES][RFC][CFT] mount fixes
Date: Sat, 16 Aug 2025 16:58:32 +0100	[thread overview]
Message-ID: <20250816155832.GT222315@ZenIV> (raw)
In-Reply-To: <20250815233316.GS222315@ZenIV>

On Sat, Aug 16, 2025 at 12:33:16AM +0100, Al Viro wrote:

> 4) change_mnt_propagation() slowdown in some cases.  On umount we want all
> victims out of propagation graph and propagation between the surviving mounts
> to be unchanged.  So if victim used to have slaves, they need to be transfered
> to its peer (if any) or master.  In case when victim had many peers, all
> taken out by that umount(), that ended up with all its slaves being gradually
> transferred between all peers until we finally ran out of those.  It can
> easily lead to quadratic time.  The patch in -rc1 switched that to "just
> find where they'll end up upfront, and move them once", which eliminated
> that... except that I hadn't noticed that on massage of change_mnt_propagation()
> we ended up calculating the place where they'd be transferred in cases
> when there had been nothing to transfer.  With obvious effects when there
> had been a large peer group entirely taken out, with not a single slave between
> them.  The minimal fix ("call propagation_source() only if we are going to
> use its return value") is enough to recover in all cases.
> Longer term we should kick all victims out of propagation graph at once
> and I have that plotted out, but that's for the next merge window; for
> now the minimal obvious fix is good enough.

FWIW, proposed longer term fix (on top of this series, completely
untested) would be the patch below.  Basically, calculate where the slaves
end up for all mounts to be removed, taking the mounts themselves out
of propagation graph, then do all transfers; duplicate work on finding
destinations is avoided that way, since if we run into a mount that
already had destination found, we don't need to trace the rest of the way.
That's guaranteed O(removed mounts) for finding destinations and removing
from propagation graph and O(surviving mounts that have master removed)
for transfers.

diff --git a/fs/namespace.c b/fs/namespace.c
index 88db58061919..5c68a05f9679 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1842,6 +1842,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 	if (how & UMOUNT_PROPAGATE)
 		propagate_umount(&tmp_list);
 
+	bulk_make_private(&tmp_list);
+
 	while (!list_empty(&tmp_list)) {
 		struct mnt_namespace *ns;
 		bool disconnect;
@@ -1866,7 +1868,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 				umount_mnt(p);
 			}
 		}
-		change_mnt_propagation(p, MS_PRIVATE);
 		if (disconnect)
 			hlist_add_head(&p->mnt_umount, &unmounted);
 
diff --git a/fs/pnode.c b/fs/pnode.c
index 6f7d02f3fa98..9fe2ddaf52db 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -70,19 +70,6 @@ static inline bool will_be_unmounted(struct mount *m)
 	return m->mnt.mnt_flags & MNT_UMOUNT;
 }
 
-static struct mount *propagation_source(struct mount *mnt)
-{
-	do {
-		struct mount *m;
-		for (m = next_peer(mnt); m != mnt; m = next_peer(m)) {
-			if (!will_be_unmounted(m))
-				return m;
-		}
-		mnt = mnt->mnt_master;
-	} while (mnt && will_be_unmounted(mnt));
-	return mnt;
-}
-
 static void transfer_propagation(struct mount *mnt, struct mount *to)
 {
 	struct hlist_node *p = NULL, *n;
@@ -111,11 +98,10 @@ void change_mnt_propagation(struct mount *mnt, int type)
 		return;
 	}
 	if (IS_MNT_SHARED(mnt)) {
-		if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list))
-			m = propagation_source(mnt);
 		if (list_empty(&mnt->mnt_share)) {
 			mnt_release_group_id(mnt);
 		} else {
+			m = next_peer(mnt);
 			list_del_init(&mnt->mnt_share);
 			mnt->mnt_group_id = 0;
 		}
@@ -136,6 +122,57 @@ void change_mnt_propagation(struct mount *mnt, int type)
 	}
 }
 
+static struct mount *trace_transfers(struct mount *m)
+{
+	while (1) {
+		struct mount *next = next_peer(m);
+
+		if (next != m) {
+			list_del_init(&m->mnt_share);
+			m->mnt_group_id = 0;
+			m->mnt_master = next;
+		} else {
+			if (IS_MNT_SHARED(m))
+				mnt_release_group_id(m);
+			next = m->mnt_master;
+		}
+		hlist_del_init(&m->mnt_slave);
+		CLEAR_MNT_SHARED(m);
+		SET_MNT_MARK(m);
+
+		if (!next || !will_be_unmounted(next))
+			return next;
+		if (IS_MNT_MARKED(next))
+			return next->mnt_master;
+		m = next;
+	}
+}
+
+static void set_destinations(struct mount *m, struct mount *master)
+{
+	struct mount *next;
+
+	while ((next = m->mnt_master) != master) {
+		m->mnt_master = master;
+		m = next;
+	}
+}
+
+void bulk_make_private(struct list_head *set)
+{
+	struct mount *m;
+
+	list_for_each_entry(m, set, mnt_list)
+		if (!IS_MNT_MARKED(m))
+			set_destinations(m, trace_transfers(m));
+
+	list_for_each_entry(m, set, mnt_list) {
+		transfer_propagation(m, m->mnt_master);
+		m->mnt_master = NULL;
+		CLEAR_MNT_MARK(m);
+	}
+}
+
 static struct mount *__propagation_next(struct mount *m,
 					 struct mount *origin)
 {
diff --git a/fs/pnode.h b/fs/pnode.h
index 00ab153e3e9d..b029db225f33 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -42,6 +42,7 @@ static inline bool peers(const struct mount *m1, const struct mount *m2)
 }
 
 void change_mnt_propagation(struct mount *, int);
+void bulk_make_private(struct list_head *);
 int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
 		struct hlist_head *);
 void propagate_umount(struct list_head *);

  parent reply	other threads:[~2025-08-16 15:58 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-15 23:33 [PATCHES][RFC][CFT] mount fixes Al Viro
2025-08-15 23:34 ` [PATCH 1/4] fix the softlockups in attach_recursive_mnt() Al Viro
2025-08-19 10:18   ` Christian Brauner
2025-08-15 23:34 ` [PATCH 2/4] propagate_umount(): only surviving overmounts should be remounted Al Viro
2025-08-19 10:19   ` Christian Brauner
2025-08-15 23:35 ` [PATCH 3/4] use uniform permission checks for all mount propagation changes Al Viro
2025-08-16 18:28   ` Andrei Vagin
2025-08-19  4:44   ` Pavel Tikhomirov
2025-08-19 10:20   ` Christian Brauner
2025-08-15 23:36 ` [PATCH 4/4] change_mnt_propagation(): calculate propagation source only if we'll need it Al Viro
2025-08-19 10:20   ` Christian Brauner
2025-08-16 15:58 ` Al Viro [this message]
2025-08-19 16:12 ` [git pull] mount fixes Al Viro
2025-08-19 17:31   ` Linus Torvalds
2025-08-19 17:33   ` pr-tracker-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250816155832.GT222315@ZenIV \
    --to=viro@zeniv.linux.org.uk \
    --cc=avagin@google.com \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=snorcht@gmail.com \
    --cc=torvalds@linux-foundation.org \
    --cc=tycho@tycho.pizza \
    --cc=yi1.lai@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.