From: Al Viro <viro@zeniv.linux.org.uk>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: linux-fsdevel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
Christian Brauner <brauner@kernel.org>
Subject: Re: [BUG] propagate_umount() breakage
Date: Thu, 15 May 2025 12:41:50 +0100 [thread overview]
Message-ID: <20250515114150.GA3221059@ZenIV> (raw)
In-Reply-To: <20250513035622.GE2023217@ZenIV>
On Tue, May 13, 2025 at 04:56:22AM +0100, Al Viro wrote:
> And yes, it's going to be posted along with the proof of correctness -
> I've had it with the amount of subtle corner cases in that area ;-/
FWIW, it seems to survive testing; it's _not_ final - I'm still editing
the promised proof, but by this point it's stylistic work. I hadn't
touched that kind of formal writing for quite a while, so the text is clumsy.
The code changes are pretty much in the final shape. Current diff of
code (all in fs/pnode.*) relative to mainline:
diff --git a/fs/pnode.c b/fs/pnode.c
index fb77427df39e..32ecebbc1d19 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,11 +24,6 @@ static inline struct mount *first_slave(struct mount *p)
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
}
-static inline struct mount *last_slave(struct mount *p)
-{
- return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
-}
-
static inline struct mount *next_slave(struct mount *p)
{
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -136,6 +131,23 @@ void change_mnt_propagation(struct mount *mnt, int type)
}
}
+static struct mount *__propagation_next(struct mount *m,
+ struct mount *origin)
+{
+ while (1) {
+ struct mount *master = m->mnt_master;
+
+ if (master == origin->mnt_master) {
+ struct mount *next = next_peer(m);
+ return (next == origin) ? NULL : next;
+ } else if (m->mnt_slave.next != &master->mnt_slave_list)
+ return next_slave(m);
+
+ /* back at master */
+ m = master;
+ }
+}
+
/*
* get the next mount in the propagation tree.
* @m: the mount seen last
@@ -153,31 +165,26 @@ static struct mount *propagation_next(struct mount *m,
if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
return first_slave(m);
- while (1) {
- struct mount *master = m->mnt_master;
-
- if (master == origin->mnt_master) {
- struct mount *next = next_peer(m);
- return (next == origin) ? NULL : next;
- } else if (m->mnt_slave.next != &master->mnt_slave_list)
- return next_slave(m);
+ return __propagation_next(m, origin);
+}
- /* back at master */
- m = master;
- }
+static inline bool peers(const struct mount *m1, const struct mount *m2)
+{
+ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
}
static struct mount *skip_propagation_subtree(struct mount *m,
struct mount *origin)
{
/*
- * Advance m such that propagation_next will not return
- * the slaves of m.
+ * Advance m past everything that gets propagation from it.
*/
- if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
- m = last_slave(m);
+ struct mount *p = __propagation_next(m, origin);
- return m;
+ while (p && peers(m, p))
+ p = __propagation_next(p, origin);
+
+ return p;
}
static struct mount *next_group(struct mount *m, struct mount *origin)
@@ -216,11 +223,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
static struct mount *last_dest, *first_source, *last_source, *dest_master;
static struct hlist_head *list;
-static inline bool peers(const struct mount *m1, const struct mount *m2)
-{
- return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
-}
-
static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
{
struct mount *child;
@@ -463,109 +465,144 @@ void propagate_mount_unlock(struct mount *mnt)
}
}
-static void umount_one(struct mount *mnt, struct list_head *to_umount)
+static LIST_HEAD(to_umount);
+static LIST_HEAD(candidates);
+
+static inline struct mount *first_candidate(void)
{
- CLEAR_MNT_MARK(mnt);
- mnt->mnt.mnt_flags |= MNT_UMOUNT;
- list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_umounting);
- move_from_ns(mnt, to_umount);
+ if (list_empty(&candidates))
+ return NULL;
+ return list_first_entry(&candidates, struct mount, mnt_umounting);
}
-/*
- * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
- * parent propagates to.
- */
-static bool __propagate_umount(struct mount *mnt,
- struct list_head *to_umount,
- struct list_head *to_restore)
+static inline bool is_candidate(struct mount *m)
{
- bool progress = false;
- struct mount *child;
-
- /*
- * The state of the parent won't change if this mount is
- * already unmounted or marked as without children.
- */
- if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
- goto out;
+ return !list_empty(&m->mnt_umounting);
+}
- /* Verify topper is the only grandchild that has not been
- * speculatively unmounted.
- */
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (child->mnt_mountpoint == mnt->mnt.mnt_root)
- continue;
- if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
- continue;
- /* Found a mounted child */
- goto children;
- }
+static inline bool will_be_unmounted(struct mount *m)
+{
+ return m->mnt.mnt_flags & MNT_UMOUNT;
+}
- /* Mark mounts that can be unmounted if not locked */
- SET_MNT_MARK(mnt);
- progress = true;
+static void umount_one(struct mount *m)
+{
+ m->mnt.mnt_flags |= MNT_UMOUNT;
+ list_del_init(&m->mnt_child);
+ move_from_ns(m, &to_umount);
+}
- /* If a mount is without children and not locked umount it. */
- if (!IS_MNT_LOCKED(mnt)) {
- umount_one(mnt, to_umount);
- } else {
-children:
- list_move_tail(&mnt->mnt_umounting, to_restore);
- }
-out:
- return progress;
+static void remove_candidate(struct mount *m)
+{
+ CLEAR_MNT_MARK(m);
+ list_del_init(&m->mnt_umounting);
}
-static void umount_list(struct list_head *to_umount,
- struct list_head *to_restore)
+static void gather_candidates(struct list_head *set)
{
- struct mount *mnt, *child, *tmp;
- list_for_each_entry(mnt, to_umount, mnt_list) {
- list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
- /* topper? */
- if (child->mnt_mountpoint == mnt->mnt.mnt_root)
- list_move_tail(&child->mnt_umounting, to_restore);
- else
- umount_one(child, to_umount);
+ LIST_HEAD(visited);
+ struct mount *m, *p, *q;
+
+ list_for_each_entry(m, set, mnt_list) {
+ if (is_candidate(m))
+ continue;
+ list_add(&m->mnt_umounting, &visited);
+ p = m->mnt_parent;
+ q = propagation_next(p, p);
+ while (q) {
+ struct mount *child = __lookup_mnt(&q->mnt,
+ m->mnt_mountpoint);
+ if (child) {
+ struct list_head *head;
+
+ if (is_candidate(child)) {
+ q = skip_propagation_subtree(q, p);
+ continue;
+ }
+ if (will_be_unmounted(child))
+ head = &visited;
+ else
+ head = &candidates;
+ list_add(&child->mnt_umounting, head);
+ }
+ q = propagation_next(q, p);
}
}
+ while (!list_empty(&visited)) // empty visited
+ list_del_init(visited.next);
}
-static void restore_mounts(struct list_head *to_restore)
+static struct mount *trim_one(struct mount *m)
{
- /* Restore mounts to a clean working state */
- while (!list_empty(to_restore)) {
- struct mount *mnt, *parent;
- struct mountpoint *mp;
-
- mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
- CLEAR_MNT_MARK(mnt);
- list_del_init(&mnt->mnt_umounting);
-
- /* Should this mount be reparented? */
- mp = mnt->mnt_mp;
- parent = mnt->mnt_parent;
- while (parent->mnt.mnt_flags & MNT_UMOUNT) {
- mp = parent->mnt_mp;
- parent = parent->mnt_parent;
+ bool remove_this = false, found = false, umount_this = false;
+ struct mount *n;
+ struct list_head *next;
+
+ list_for_each_entry(n, &m->mnt_mounts, mnt_child) {
+ if (!is_candidate(n)) {
+ found = true;
+ if (n->mnt_mountpoint != m->mnt.mnt_root) {
+ remove_this = true;
+ break;
+ }
}
- if (parent != mnt->mnt_parent) {
- mnt_change_mountpoint(parent, mp, mnt);
- mnt_notify_add(mnt);
+ }
+ if (found) {
+ struct mount *this = m;
+ for (struct mount *p = this->mnt_parent;
+ !IS_MNT_MARKED(this) && is_candidate(p);
+ this = p, p = p->mnt_parent) {
+ SET_MNT_MARK(this);
+ if (this->mnt_mountpoint != p->mnt.mnt_root)
+ remove_candidate(p);
}
+ } else if (!IS_MNT_LOCKED(m) && list_empty(&m->mnt_mounts)) {
+ remove_this = true;
+ umount_this = true;
}
+ next = m->mnt_umounting.next;
+ if (remove_this) {
+ remove_candidate(m);
+ if (umount_this)
+ umount_one(m);
+ }
+ if (next == &candidates)
+ return NULL;
+
+ return list_entry(next, struct mount, mnt_umounting);
}
-static void cleanup_umount_visitations(struct list_head *visited)
+static void handle_locked(struct mount *m)
{
- while (!list_empty(visited)) {
- struct mount *mnt =
- list_first_entry(visited, struct mount, mnt_umounting);
- list_del_init(&mnt->mnt_umounting);
+ struct mount *cutoff = m, *p;
+
+ for (p = m; is_candidate(p); p = p->mnt_parent) {
+ remove_candidate(p);
+ if (!IS_MNT_LOCKED(p))
+ cutoff = p->mnt_parent;
+ }
+ if (will_be_unmounted(p))
+ cutoff = p;
+ while (m != cutoff) {
+ umount_one(m);
+ m = m->mnt_parent;
}
}
+static void reparent(struct mount *m)
+{
+ struct mount *p = m;
+ struct mountpoint *mp;
+
+ do {
+ mp = p->mnt_mp;
+ p = p->mnt_parent;
+ } while (will_be_unmounted(p));
+
+ mnt_change_mountpoint(p, mp, m);
+ mnt_notify_add(m);
+}
+
/*
* collect all mounts that receive propagation from the mount in @list,
* and return these additional mounts in the same list.
@@ -573,71 +610,27 @@ static void cleanup_umount_visitations(struct list_head *visited)
*
* vfsmount lock must be held for write
*/
-int propagate_umount(struct list_head *list)
+void propagate_umount(struct list_head *set)
{
- struct mount *mnt;
- LIST_HEAD(to_restore);
- LIST_HEAD(to_umount);
- LIST_HEAD(visited);
+ struct mount *m;
- /* Find candidates for unmounting */
- list_for_each_entry_reverse(mnt, list, mnt_list) {
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
+ gather_candidates(set);
- /*
- * If this mount has already been visited it is known that it's
- * entire peer group and all of their slaves in the propagation
- * tree for the mountpoint has already been visited and there is
- * no need to visit them again.
- */
- if (!list_empty(&mnt->mnt_umounting))
- continue;
+ // make it non-shifting
+ for (m = first_candidate(); m; m = trim_one(m))
+ ;
- list_add_tail(&mnt->mnt_umounting, &visited);
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt(&m->mnt,
- mnt->mnt_mountpoint);
- if (!child)
- continue;
-
- if (!list_empty(&child->mnt_umounting)) {
- /*
- * If the child has already been visited it is
- * know that it's entire peer group and all of
- * their slaves in the propgation tree for the
- * mountpoint has already been visited and there
- * is no need to visit this subtree again.
- */
- m = skip_propagation_subtree(m, parent);
- continue;
- } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
- /*
- * We have come across a partially unmounted
- * mount in a list that has not been visited
- * yet. Remember it has been visited and
- * continue about our merry way.
- */
- list_add_tail(&child->mnt_umounting, &visited);
- continue;
- }
+ // ... and non-revealing
+ while (!list_empty(&candidates))
+ handle_locked(first_candidate());
- /* Check the child and parents while progress is made */
- while (__propagate_umount(child,
- &to_umount, &to_restore)) {
- /* Is the parent a umount candidate? */
- child = child->mnt_parent;
- if (list_empty(&child->mnt_umounting))
- break;
- }
- }
+ // now to_umount consists of all non-rejected candidates
+ // deal with reparenting of remaining overmounts on those
+ list_for_each_entry(m, &to_umount, mnt_list) {
+ while (!list_empty(&m->mnt_mounts)) // should be at most one
+ reparent(list_first_entry(&m->mnt_mounts,
+ struct mount, mnt_child));
}
- umount_list(&to_umount, &to_restore);
- restore_mounts(&to_restore);
- cleanup_umount_visitations(&visited);
- list_splice_tail(&to_umount, list);
-
- return 0;
+ list_splice_tail_init(&to_umount, set);
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 34b6247af01d..d84a397bfd43 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -39,7 +39,7 @@ static inline void set_mnt_shared(struct mount *mnt)
void change_mnt_propagation(struct mount *, int);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
struct hlist_head *);
-int propagate_umount(struct list_head *);
+void propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
void propagate_mount_unlock(struct mount *);
void mnt_release_group_id(struct mount *);
next prev parent reply other threads:[~2025-05-15 11:41 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-11 23:27 [BUG] propagate_umount() breakage Al Viro
2025-05-12 4:50 ` Eric W. Biederman
2025-05-13 3:56 ` Al Viro
2025-05-15 11:41 ` Al Viro [this message]
2025-05-15 11:47 ` Al Viro
2025-05-16 5:21 ` [RFC][CFT][PATCH] Rewrite of propagate_umount() (was Re: [BUG] propagate_umount() breakage) Al Viro
2025-05-19 18:11 ` Linus Torvalds
2025-05-19 21:35 ` Al Viro
2025-05-19 22:08 ` Linus Torvalds
2025-05-19 22:26 ` Linus Torvalds
2025-05-20 22:27 ` Eric W. Biederman
2025-05-20 23:08 ` Al Viro
2025-05-23 2:10 ` [RFC][CFT][PATCH v2] Rewrite of propagate_umount() Al Viro
[not found] ` <20250520075317.GB2023217@ZenIV>
[not found] ` <87y0uqlvxs.fsf@email.froward.int.ebiederm.org>
[not found] ` <20250520231854.GF2023217@ZenIV>
[not found] ` <20250521023219.GA1309405@ZenIV>
[not found] ` <20250617041754.GA1591763@ZenIV>
2025-06-17 21:18 ` [PATCH][RFC] replace collect_mounts()/drop_collected_mounts() with safer variant Al Viro
2025-05-20 11:10 ` [RFC][CFT][PATCH] Rewrite of propagate_umount() (was Re: [BUG] propagate_umount() breakage) Christian Brauner
2025-05-21 2:11 ` Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250515114150.GA3221059@ZenIV \
--to=viro@zeniv.linux.org.uk \
--cc=brauner@kernel.org \
--cc=ebiederm@xmission.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).