From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>,
linux-fsdevel@vger.kernel.org,
Christian Brauner <brauner@kernel.org>
Subject: [PATCH v3 4/4] super: wait until we passed kill super
Date: Fri, 18 Aug 2023 16:00:51 +0200 [thread overview]
Message-ID: <20230818-vfs-super-fixes-v3-v3-4-9f0b1876e46b@kernel.org> (raw)
In-Reply-To: <20230818-vfs-super-fixes-v3-v3-0-9f0b1876e46b@kernel.org>
Recent rework moved block device closing out of sb->put_super() and into
sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
blkdev_put() can end up taking s_umount again.
That means we need to move the removal of the superblock from @fs_supers
out of generic_shutdown_super() and into deactivate_locked_super() to
ensure that concurrent mounters don't fail to open block devices that
are still in use because blkdev_put() in sb->kill_sb() hasn't been
called yet.
We can now do this as we can make iterators through @fs_super and
@super_blocks wait without holding s_umount. Concurrent mounts will wait
until a dying superblock is fully dead so until sb->kill_sb() has been
called and SB_DEAD been set. Concurrent iterators can already discard
any SB_DYING superblock.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++------
include/linux/fs.h | 1 +
2 files changed, 65 insertions(+), 7 deletions(-)
diff --git a/fs/super.c b/fs/super.c
index 896f05f34377..015e428818ce 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb)
}
/* wake waiters */
-#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
static void super_wake(struct super_block *sb, unsigned int flag)
{
WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
@@ -457,6 +457,25 @@ void deactivate_locked_super(struct super_block *s)
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
+ /*
+ * Remove it from @fs_supers so it isn't found by new
+ * sget{_fc}() walkers anymore. Any concurrent mounter still
+ * managing to grab a temporary reference is guaranteed to
+ * already see SB_DYING and will wait until we notify them about
+ * SB_DEAD.
+ */
+ spin_lock(&sb_lock);
+ hlist_del_init(&s->s_instances);
+ spin_unlock(&sb_lock);
+
+ /*
+ * Let concurrent mounts know that this thing is really dead.
+ * We don't need @sb->s_umount here as every concurrent caller
+ * will see SB_DYING and either discard the superblock or wait
+ * for SB_DEAD.
+ */
+ super_wake(s, SB_DEAD);
+
put_filesystem(fs);
put_super(s);
} else {
@@ -513,6 +532,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
return 0;
}
+static inline bool wait_dead(struct super_block *sb)
+{
+ unsigned int flags;
+
+ /*
+ * Pairs with memory barrier in super_wake() and ensures
+ * that we see SB_DEAD after we're woken.
+ */
+ flags = smp_load_acquire(&sb->s_flags);
+ return flags & SB_DEAD;
+}
+
+/**
+ * grab_super_dead - acquire an active reference to a superblock
+ * @sb: superblock to acquire
+ *
+ * Acquire a temporary reference on a superblock and try to trade it for
+ * an active reference. This is used in sget{_fc}() to wait for a
+ * superblock to either become SB_BORN or for it to pass through
+ * sb->kill() and be marked as SB_DEAD.
+ *
+ * Return: This returns true if an active reference could be acquired,
+ * false if not.
+ */
+static bool grab_super_dead(struct super_block *sb)
+{
+
+ sb->s_count++;
+ if (grab_super(sb)) {
+ put_super(sb);
+ lockdep_assert_held(&sb->s_umount);
+ return true;
+ }
+ wait_var_event(&sb->s_flags, wait_dead(sb));
+ put_super(sb);
+ lockdep_assert_not_held(&sb->s_umount);
+ return false;
+}
+
/*
* super_trylock_shared - try to grab ->s_umount shared
* @sb: reference we are trying to grab
@@ -639,15 +697,14 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb->s_inode_list_lock);
}
}
- spin_lock(&sb_lock);
- /* should be initialized for __put_super_and_need_restart() */
- hlist_del_init(&sb->s_instances);
- spin_unlock(&sb_lock);
/*
* Broadcast to everyone that grabbed a temporary reference to this
* superblock before we removed it from @fs_supers that the superblock
* is dying. Every walker of @fs_supers outside of sget{_fc}() will now
* discard this superblock and treat it as dead.
+ *
+ * We leave the superblock on @fs_supers so it can be found by
+ * sget{_fc}() until we passed sb->kill_sb().
*/
super_wake(sb, SB_DYING);
super_unlock_excl(sb);
@@ -742,7 +799,7 @@ struct super_block *sget_fc(struct fs_context *fc,
destroy_unused_super(s);
return ERR_PTR(-EBUSY);
}
- if (!grab_super(old))
+ if (!grab_super_dead(old))
goto retry;
destroy_unused_super(s);
return old;
@@ -786,7 +843,7 @@ struct super_block *sget(struct file_system_type *type,
destroy_unused_super(s);
return ERR_PTR(-EBUSY);
}
- if (!grab_super(old))
+ if (!grab_super_dead(old))
goto retry;
destroy_unused_super(s);
return old;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 173672645156..a63da68305e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
#define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
--
2.34.1
next prev parent reply other threads:[~2023-08-18 14:02 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-18 14:00 [PATCH v3 0/4] super: allow waiting without s_umount held Christian Brauner
2023-08-18 14:00 ` [PATCH v3 1/4] super: use locking helpers Christian Brauner
2023-08-18 14:00 ` [PATCH v3 2/4] super: make locking naming consistent Christian Brauner
2023-08-18 14:00 ` [PATCH v3 3/4] super: wait for nascent superblocks Christian Brauner
2023-08-18 14:57 ` Matthew Wilcox
2023-08-18 15:21 ` Christian Brauner
2023-08-21 15:52 ` Jan Kara
2023-08-21 15:56 ` Christian Brauner
2023-08-21 16:04 ` Jan Kara
2023-08-21 16:02 ` Jan Kara
2023-08-21 16:08 ` Christian Brauner
2023-08-18 14:00 ` Christian Brauner [this message]
2023-08-21 15:57 ` [PATCH v3 4/4] super: wait until we passed kill super Jan Kara
2023-08-22 12:36 ` [PATCH v3 0/4] super: allow waiting without s_umount held Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230818-vfs-super-fixes-v3-v3-4-9f0b1876e46b@kernel.org \
--to=brauner@kernel.org \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).