linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org,
	Christian Brauner <brauner@kernel.org>
Subject: [PATCH v2 4/4] super: wait until we passed kill super
Date: Fri, 18 Aug 2023 12:54:18 +0200	[thread overview]
Message-ID: <20230818-vfs-super-fixes-v3-v2-4-cdab45934983@kernel.org> (raw)
In-Reply-To: <20230818-vfs-super-fixes-v3-v2-0-cdab45934983@kernel.org>

Recent rework moved block device closing out of sb->put_super() and into
sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
blkdev_put() can end up taking s_umount again.

That means we need to move the removal of the superblock from @fs_supers
out of generic_shutdown_super() and into deactivate_locked_super() to
ensure that concurrent mounters don't fail to open block devices that
are still in use because blkdev_put() in sb->kill_sb() hasn't been
called yet.

We can now do this as we can make iterators through @fs_super and
@super_blocks wait without holding s_umount. Concurrent mounts will wait
until a dying superblock is fully dead so until sb->kill_sb() has been
called and SB_DEAD been set. Concurrent iterators can already discard
any SB_DYING superblock.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c         | 100 +++++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h |   1 +
 2 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 52e7b4153035..045d8611c44b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -93,6 +93,18 @@ static inline bool wait_born(struct super_block *sb)
 	return flags & (SB_BORN | SB_DYING);
 }
 
+static inline bool wait_dead(struct super_block *sb)
+{
+	unsigned int flags;
+
+	/*
+	 * Pairs with smp_store_release() in super_wake() and ensures
+	 * that we see SB_DEAD after we're woken.
+	 */
+	flags = smp_load_acquire(&sb->s_flags);
+	return flags & SB_DEAD;
+}
+
 /**
  * super_lock - wait for superblock to become ready
  * @sb: superblock to wait for
@@ -140,6 +152,33 @@ static bool super_lock(struct super_block *sb, bool excl)
 	goto relock;
 }
 
+/**
+ * super_lock_dead - wait for superblock to become ready or fully dead
+ * @sb: superblock to wait for
+ *
+ * Wait for a superblock to be SB_BORN or to be SB_DEAD. In other words,
+ * don't just wait for the superblock to be shutdown in
+ * generic_shutdown_super() but actually wait until sb->kill_sb() has
+ * finished.
+ *
+ * The caller must have acquired a temporary reference on @sb->s_count.
+ *
+ * Return: This returns true if SB_BORN was set, false if SB_DEAD was
+ *         set. The function acquires s_umount and returns with it held.
+ */
+static bool super_lock_dead(struct super_block *sb)
+{
+	if (super_lock(sb, true))
+		return true;
+
+	lockdep_assert_held(&sb->s_umount);
+	super_unlock_excl(sb);
+	/* If superblock is dying, wait for everything to be shutdown. */
+	wait_var_event(&sb->s_flags, wait_dead(sb));
+	__super_lock_excl(sb);
+	return false;
+}
+
 /* wait and acquire read-side of @sb->s_umount */
 static inline bool super_lock_shared(struct super_block *sb)
 {
@@ -153,7 +192,7 @@ static inline bool super_lock_excl(struct super_block *sb)
 }
 
 /* wake waiters */
-#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
 static void super_wake(struct super_block *sb, unsigned int flag)
 {
 	unsigned int flags = sb->s_flags;
@@ -169,6 +208,35 @@ static void super_wake(struct super_block *sb, unsigned int flag)
 	wake_up_var(&sb->s_flags);
 }
 
+/**
+ * grab_super_dead - acquire an active reference to a superblock
+ * @sb: superblock to acquire
+ *
+ * Acquire a temporary reference on a superblock and try to trade it for
+ * an active reference. This is used in sget{_fc}() to wait for a
+ * superblock to either become SB_BORN or for it to pass through
+ * sb->kill() and be marked as SB_DEAD.
+ *
+ * Return: This returns true if an active reference could be acquired,
+ *         false if not. The function acquires s_umount and returns with
+ *         it held.
+ */
+static bool grab_super_dead(struct super_block *s) __releases(sb_lock)
+{
+	bool born;
+
+	s->s_count++;
+	spin_unlock(&sb_lock);
+	born = super_lock_dead(s);
+	if (born && atomic_inc_not_zero(&s->s_active)) {
+		put_super(s);
+		return true;
+	}
+	up_write(&s->s_umount);
+	put_super(s);
+	return false;
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -456,6 +524,25 @@ void deactivate_locked_super(struct super_block *s)
 		list_lru_destroy(&s->s_dentry_lru);
 		list_lru_destroy(&s->s_inode_lru);
 
+		/*
+		 * Remove it from @fs_supers so it isn't found by new
+		 * sget{_fc}() walkers anymore. Any concurrent mounter still
+		 * managing to grab a temporary reference is guaranteed to
+		 * already see SB_DYING and will wait until we notify them about
+		 * SB_DEAD.
+		 */
+		spin_lock(&sb_lock);
+		hlist_del_init(&s->s_instances);
+		spin_unlock(&sb_lock);
+
+		/*
+		 * Let concurrent mounts know that this thing is really dead.
+		 * We don't need @sb->s_umount here as every concurrent caller
+		 * will see SB_DYING and either discard the superblock or wait
+		 * for SB_DEAD.
+		 */
+		super_wake(s, SB_DEAD);
+
 		put_filesystem(fs);
 		put_super(s);
 	} else {
@@ -638,15 +725,14 @@ void generic_shutdown_super(struct super_block *sb)
 			spin_unlock(&sb->s_inode_list_lock);
 		}
 	}
-	spin_lock(&sb_lock);
-	/* should be initialized for __put_super_and_need_restart() */
-	hlist_del_init(&sb->s_instances);
-	spin_unlock(&sb_lock);
 	/*
 	 * Broadcast to everyone that grabbed a temporary reference to this
 	 * superblock before we removed it from @fs_supers that the superblock
 	 * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
 	 * discard this superblock and treat it as dead.
+	 *
+	 * We leave the superblock on @fs_supers so it can be found by
+	 * sget{_fc}() until we passed sb->kill_sb().
 	 */
 	super_wake(sb, SB_DYING);
 	super_unlock_excl(sb);
@@ -741,7 +827,7 @@ struct super_block *sget_fc(struct fs_context *fc,
 		destroy_unused_super(s);
 		return ERR_PTR(-EBUSY);
 	}
-	if (!grab_super(old))
+	if (!grab_super_dead(old))
 		goto retry;
 	destroy_unused_super(s);
 	return old;
@@ -785,7 +871,7 @@ struct super_block *sget(struct file_system_type *type,
 				destroy_unused_super(s);
 				return ERR_PTR(-EBUSY);
 			}
-			if (!grab_super(old))
+			if (!grab_super_dead(old))
 				goto retry;
 			destroy_unused_super(s);
 			return old;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 173672645156..a63da68305e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define SB_DEAD         BIT(21)
 #define SB_DYING        BIT(24)
 #define SB_SUBMOUNT     BIT(26)
 #define SB_FORCE        BIT(27)

-- 
2.34.1


  parent reply	other threads:[~2023-08-18 10:55 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-18 10:54 [PATCH v2 0/4] super: allow waiting without s_umount held Christian Brauner
2023-08-18 10:54 ` [PATCH v2 1/4] super: use locking helpers Christian Brauner
2023-08-18 11:08   ` Jan Kara
2023-08-18 10:54 ` [PATCH v2 2/4] super: make locking naming consistent Christian Brauner
2023-08-18 11:08   ` Jan Kara
2023-08-18 10:54 ` [PATCH v2 3/4] super: wait for nascent superblocks Christian Brauner
2023-08-18 12:02   ` Jan Kara
2023-08-18 12:46     ` Christian Brauner
2023-08-18 13:06       ` Jan Kara
2023-08-18 10:54 ` Christian Brauner [this message]
2023-08-18 12:26   ` [PATCH v2 4/4] super: wait until we passed kill super Jan Kara
2023-08-18 12:48     ` Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230818-vfs-super-fixes-v3-v2-4-cdab45934983@kernel.org \
    --to=brauner@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).