From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org,
"Darrick J. Wong" <djwong@kernel.org>,
Christian Brauner <brauner@kernel.org>
Subject: [PATCH 3/7] bdev: implement freeze and thaw holder operations
Date: Wed, 27 Sep 2023 15:21:16 +0200 [thread overview]
Message-ID: <20230927-vfs-super-freeze-v1-3-ecc36d9ab4d9@kernel.org> (raw)
In-Reply-To: <20230927-vfs-super-freeze-v1-0-ecc36d9ab4d9@kernel.org>
The old method of implementing block device freeze and thaw operations
required us to rely on get_active_super() to walk the list of all
superblocks on the system to find any superblock that might use the
block device. This is wasteful and not very pleasant overall.
Now that we can finally go straight from block device to owning
superblock things become way simpler. In fact, we can even get rid of
bd_fsfreeze_mutex and rely on the holder lock for freezing.
We let fs_bdev_thaw() grab its own active reference so we can hold
bd_holder_lock across freeze and thaw without risk of deadlocks. That in
turn allows us to get rid of bd_fsfreeze_mutex. This means
fs_bdev_thaw() might put the last reference to the superblock instead of
thaw_super(). This shouldn't be an issue. If it turns out to be one we
can reshuffle the code to simply hold s_umount when thaw_super() returns.
Thanks to Jan and Christoph who pointed out that bd_fsfreeze_mutex is
unnecessary now.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
block/bdev.c | 74 +++++++++++++++++--------------------
fs/super.c | 94 ++++++++++++++++++++++++++++++++++++++++++-----
include/linux/blk_types.h | 2 +-
3 files changed, 119 insertions(+), 51 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index 0d27db3e69e7..3deccd0ffcf2 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -222,32 +222,23 @@ EXPORT_SYMBOL(sync_blockdev_range);
*/
int bdev_freeze(struct block_device *bdev)
{
- struct super_block *sb;
int error = 0;
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1)
- goto done;
+ mutex_lock(&bdev->bd_holder_lock);
- sb = get_active_super(bdev);
- if (!sb)
- goto sync;
- if (sb->s_op->freeze_super)
- error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- else
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- deactivate_super(sb);
+ if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
+ mutex_unlock(&bdev->bd_holder_lock);
+ return 0;
+ }
- if (error) {
- bdev->bd_fsfreeze_count--;
- goto done;
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
+ error = bdev->bd_holder_ops->freeze(bdev);
+ lockdep_assert_not_held(&bdev->bd_holder_lock);
+ } else {
+ sync_blockdev(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
}
- bdev->bd_fsfreeze_sb = sb;
-sync:
- sync_blockdev(bdev);
-done:
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
EXPORT_SYMBOL(bdev_freeze);
@@ -262,31 +253,32 @@ EXPORT_SYMBOL(bdev_freeze);
*/
int bdev_thaw(struct block_device *bdev)
{
- struct super_block *sb;
- int error = -EINVAL;
+ int error = 0, nr_freeze;
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count)
- goto out;
+ mutex_lock(&bdev->bd_holder_lock);
- error = 0;
- if (--bdev->bd_fsfreeze_count > 0)
- goto out;
+ /*
+ * If this returns < 0 it means that @bd_fsfreeze_count was
+ * already 0 and no decrement was performed.
+ */
+ nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
+ if (nr_freeze < 0) {
+ mutex_unlock(&bdev->bd_holder_lock);
+ return -EINVAL;
+ }
- sb = bdev->bd_fsfreeze_sb;
- if (!sb)
- goto out;
+ if (nr_freeze > 0) {
+ mutex_unlock(&bdev->bd_holder_lock);
+ return 0;
+ }
+
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
+ error = bdev->bd_holder_ops->thaw(bdev);
+ lockdep_assert_not_held(&bdev->bd_holder_lock);
+ } else {
+ mutex_unlock(&bdev->bd_holder_lock);
+ }
- if (sb->s_op->thaw_super)
- error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- else
- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- if (error)
- bdev->bd_fsfreeze_count++;
- else
- bdev->bd_fsfreeze_sb = NULL;
-out:
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
return error;
}
EXPORT_SYMBOL(bdev_thaw);
diff --git a/fs/super.c b/fs/super.c
index e54866345dc7..672f1837fbef 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1469,9 +1469,91 @@ static void fs_bdev_sync(struct block_device *bdev)
super_unlock_shared(sb);
}
+static struct super_block *get_bdev_super(const struct block_device *bdev)
+{
+ struct super_block *sb_bdev = bdev->bd_holder, *sb = NULL;
+
+ if (!sb_bdev)
+ return NULL;
+ if (super_lock_excl(sb_bdev) && atomic_inc_not_zero(&sb_bdev->s_active))
+ sb = sb_bdev;
+ super_unlock_excl(sb_bdev);
+ return sb;
+}
+
+static int fs_bdev_freeze(struct block_device *bdev)
+ __releases(&bdev->bd_holder_lock)
+{
+ struct super_block *sb;
+ int error = 0;
+
+ lockdep_assert_held(&bdev->bd_holder_lock);
+
+ sb = get_bdev_super(bdev);
+ if (sb) {
+ if (sb->s_op->freeze_super)
+ error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ else
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ if (error)
+ atomic_dec(&bdev->bd_fsfreeze_count);
+ }
+
+ /*
+ * We have grabbed an active reference which means that the
+ * superblock and the block device cannot go away. But we might
+ * end up holding the last reference and so end up shutting the
+ * superblock down. So drop @bdev->bd_holder_lock to avoid
+ * deadlocks with blkdev_put().
+ */
+ mutex_unlock(&bdev->bd_holder_lock);
+
+ if (sb)
+ deactivate_super(sb);
+
+ if (!error)
+ sync_blockdev(bdev);
+
+ return error;
+}
+
+static int fs_bdev_thaw(struct block_device *bdev)
+ __releases(&bdev->bd_holder_lock)
+{
+ struct super_block *sb;
+ int error;
+
+ lockdep_assert_held(&bdev->bd_holder_lock);
+
+ sb = get_bdev_super(bdev);
+ if (WARN_ON_ONCE(!sb))
+ return -EINVAL;
+
+ if (sb->s_op->thaw_super)
+ error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ else
+ error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ if (error)
+ atomic_inc(&bdev->bd_fsfreeze_count);
+
+ /*
+ * We have grabbed an active reference which means that the
+ * superblock and the block device cannot go away. But we might
+ * end up holding the last reference and so end up shutting the
+ * superblock down. So drop @bdev->bd_holder_lock to avoid
+ * deadlocks with blkdev_put().
+ */
+ mutex_unlock(&bdev->bd_holder_lock);
+ deactivate_super(sb);
+
+ return error;
+}
+
const struct blk_holder_ops fs_holder_ops = {
.mark_dead = fs_bdev_mark_dead,
.sync = fs_bdev_sync,
+ .freeze = fs_bdev_freeze,
+ .thaw = fs_bdev_thaw,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);
@@ -1499,15 +1581,10 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
}
/*
- * Until SB_BORN flag is set, there can be no active superblock
- * references and thus no filesystem freezing. get_active_super() will
- * just loop waiting for SB_BORN so even bdev_freeze() cannot proceed.
- *
- * It is enough to check bdev was not frozen before we set s_bdev.
+ * It is enough to check bdev was not frozen before we set
+ * s_bdev as freezing will wait until SB_BORN is set.
*/
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
blkdev_put(bdev, sb);
@@ -1519,7 +1596,6 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (bdev_stable_writes(bdev))
sb->s_iflags |= SB_I_STABLE_WRITES;
spin_unlock(&sb_lock);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..88e1848b0869 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -57,7 +57,7 @@ struct block_device {
const struct blk_holder_ops *bd_holder_ops;
struct mutex bd_holder_lock;
/* The counter of freeze processes */
- int bd_fsfreeze_count;
+ atomic_t bd_fsfreeze_count;
int bd_holders;
struct kobject *bd_holder_dir;
--
2.34.1
next prev parent reply other threads:[~2023-09-27 13:21 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-27 13:21 [PATCH 0/7] Implement freeze and thaw as holder operations Christian Brauner
2023-09-27 13:21 ` [PATCH 1/7] bdev: rename freeze and thaw helpers Christian Brauner
2023-09-27 14:35 ` Darrick J. Wong
2023-10-02 6:51 ` Christoph Hellwig
2023-10-02 11:28 ` Jan Kara
2023-09-27 13:21 ` [PATCH 2/7] bdev: add freeze and thaw holder operations Christian Brauner
2023-09-27 14:38 ` Darrick J. Wong
2023-10-02 6:52 ` Christoph Hellwig
2023-10-02 16:32 ` Jan Kara
2023-09-27 13:21 ` Christian Brauner [this message]
2023-09-27 14:53 ` [PATCH 3/7] bdev: implement " Darrick J. Wong
2023-09-27 15:15 ` Christian Brauner
2023-09-27 16:01 ` Darrick J. Wong
2023-10-02 6:54 ` Christoph Hellwig
2023-10-02 7:10 ` Christoph Hellwig
2023-09-27 13:21 ` [PATCH 4/7] fs: remove get_active_super() Christian Brauner
2023-09-27 14:54 ` Darrick J. Wong
2023-10-02 7:10 ` Christoph Hellwig
2023-10-02 16:22 ` Jan Kara
2023-09-27 13:21 ` [PATCH 5/7] super: remove bd_fsfreeze_{mutex,sb} Christian Brauner
2023-09-27 15:11 ` Darrick J. Wong
2023-09-27 15:18 ` Christian Brauner
2023-10-02 7:12 ` Christoph Hellwig
2023-10-02 16:24 ` Jan Kara
2023-09-27 13:21 ` [PATCH 6/7] fs: remove unused helper Christian Brauner
2023-09-27 15:12 ` Darrick J. Wong
2023-10-02 7:12 ` Christoph Hellwig
2023-10-02 16:26 ` Jan Kara
2023-09-27 13:21 ` [PATCH 7/7] porting: document block device freeze and thaw changes Christian Brauner
2023-09-27 15:19 ` Darrick J. Wong
2023-10-02 16:45 ` Jan Kara
2023-10-05 6:48 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230927-vfs-super-freeze-v1-3-ecc36d9ab4d9@kernel.org \
--to=brauner@kernel.org \
--cc=djwong@kernel.org \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).