From: "Darrick J. Wong" <djwong@kernel.org>
To: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v2 05/10] bdev: implement freeze and thaw holder operations
Date: Tue, 24 Oct 2023 08:21:22 -0700 [thread overview]
Message-ID: <20231024152122.GH11424@frogsfrogsfrogs> (raw)
In-Reply-To: <20231024-vfs-super-freeze-v2-5-599c19f4faac@kernel.org>
On Tue, Oct 24, 2023 at 03:01:11PM +0200, Christian Brauner wrote:
> The old method of implementing block device freeze and thaw operations
> required us to rely on get_active_super() to walk the list of all
> superblocks on the system to find any superblock that might use the
> block device. This is wasteful and not very pleasant overall.
>
> Now that we can finally go straight from block device to owning
> superblock things become way simpler.
>
> Link: https://lore.kernel.org/r/20230927-vfs-super-freeze-v1-3-ecc36d9ab4d9@kernel.org
> Signed-off-by: Christian Brauner <brauner@kernel.org>
Looks good now,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
> ---
> block/bdev.c | 62 +++++++++++------------
> fs/super.c | 124 ++++++++++++++++++++++++++++++++++++----------
> include/linux/blk_types.h | 2 +-
> 3 files changed, 128 insertions(+), 60 deletions(-)
>
> diff --git a/block/bdev.c b/block/bdev.c
> index a3e2af580a73..9deacd346192 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -222,31 +222,24 @@ EXPORT_SYMBOL(sync_blockdev_range);
> */
> int bdev_freeze(struct block_device *bdev)
> {
> - struct super_block *sb;
> int error = 0;
>
> mutex_lock(&bdev->bd_fsfreeze_mutex);
> - if (++bdev->bd_fsfreeze_count > 1)
> - goto done;
> -
> - sb = get_active_super(bdev);
> - if (!sb)
> - goto sync;
> - if (sb->s_op->freeze_super)
> - error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> - else
> - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> - deactivate_super(sb);
>
> - if (error) {
> - bdev->bd_fsfreeze_count--;
> - goto done;
> + if (atomic_inc_return(&bdev->bd_fsfreeze_count) > 1) {
> + mutex_unlock(&bdev->bd_fsfreeze_mutex);
> + return 0;
> + }
> +
> + mutex_lock(&bdev->bd_holder_lock);
> + if (bdev->bd_holder_ops && bdev->bd_holder_ops->freeze) {
> + error = bdev->bd_holder_ops->freeze(bdev);
> + lockdep_assert_not_held(&bdev->bd_holder_lock);
> + } else {
> + mutex_unlock(&bdev->bd_holder_lock);
> + error = sync_blockdev(bdev);
> }
> - bdev->bd_fsfreeze_sb = sb;
>
> -sync:
> - error = sync_blockdev(bdev);
> -done:
> mutex_unlock(&bdev->bd_fsfreeze_mutex);
> return error;
> }
> @@ -262,29 +255,30 @@ EXPORT_SYMBOL(bdev_freeze);
> */
> int bdev_thaw(struct block_device *bdev)
> {
> - struct super_block *sb;
> - int error = -EINVAL;
> + int error = -EINVAL, nr_freeze;
>
> mutex_lock(&bdev->bd_fsfreeze_mutex);
> - if (!bdev->bd_fsfreeze_count)
> +
> + /*
> + * If this returns < 0 it means that @bd_fsfreeze_count was
> + * already 0 and no decrement was performed.
> + */
> + nr_freeze = atomic_dec_if_positive(&bdev->bd_fsfreeze_count);
> + if (nr_freeze < 0)
> goto out;
>
> error = 0;
> - if (--bdev->bd_fsfreeze_count > 0)
> + if (nr_freeze > 0)
> goto out;
>
> - sb = bdev->bd_fsfreeze_sb;
> - if (!sb)
> - goto out;
> + mutex_lock(&bdev->bd_holder_lock);
> + if (bdev->bd_holder_ops && bdev->bd_holder_ops->thaw) {
> + error = bdev->bd_holder_ops->thaw(bdev);
> + lockdep_assert_not_held(&bdev->bd_holder_lock);
> + } else {
> + mutex_unlock(&bdev->bd_holder_lock);
> + }
>
> - if (sb->s_op->thaw_super)
> - error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> - else
> - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> - if (error)
> - bdev->bd_fsfreeze_count++;
> - else
> - bdev->bd_fsfreeze_sb = NULL;
> out:
> mutex_unlock(&bdev->bd_fsfreeze_mutex);
> return error;
> diff --git a/fs/super.c b/fs/super.c
> index b224182f2440..ee0795ce09c7 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1430,14 +1430,8 @@ struct super_block *sget_dev(struct fs_context *fc, dev_t dev)
> EXPORT_SYMBOL(sget_dev);
>
> #ifdef CONFIG_BLOCK
> -/*
> - * Lock the superblock that is holder of the bdev. Returns the superblock
> - * pointer if we successfully locked the superblock and it is alive. Otherwise
> - * we return NULL and just unlock bdev->bd_holder_lock.
> - *
> - * The function must be called with bdev->bd_holder_lock and releases it.
> - */
> -static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
> +
> +static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
> __releases(&bdev->bd_holder_lock)
> {
> struct super_block *sb = bdev->bd_holder;
> @@ -1451,18 +1445,37 @@ static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
> spin_lock(&sb_lock);
> sb->s_count++;
> spin_unlock(&sb_lock);
> +
> mutex_unlock(&bdev->bd_holder_lock);
>
> - locked = super_lock_shared(sb);
> - if (!locked || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> - put_super(sb);
> + locked = super_lock(sb, excl);
> + put_super(sb);
> + if (!locked)
> + return NULL;
> +
> + return sb;
> +}
> +
> +/*
> + * Lock the superblock that is holder of the bdev. Returns the superblock
> + * pointer if we successfully locked the superblock and it is alive. Otherwise
> + * we return NULL and just unlock bdev->bd_holder_lock.
> + *
> + * The function must be called with bdev->bd_holder_lock and releases it.
> + */
> +static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
> +{
> + struct super_block *sb;
> +
> + sb = bdev_super_lock(bdev, false);
> + if (!sb)
> + return NULL;
> +
> + if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> + super_unlock_shared(sb);
> return NULL;
> }
> - /*
> - * The superblock is active and we hold s_umount, we can drop our
> - * temporary reference now.
> - */
> - put_super(sb);
> +
> return sb;
> }
>
> @@ -1495,9 +1508,76 @@ static void fs_bdev_sync(struct block_device *bdev)
> super_unlock_shared(sb);
> }
>
> +static struct super_block *get_bdev_super(struct block_device *bdev)
> +{
> + bool active = false;
> + struct super_block *sb;
> +
> + sb = bdev_super_lock(bdev, true);
> + if (sb) {
> + active = atomic_inc_not_zero(&sb->s_active);
> + super_unlock_excl(sb);
> + }
> + if (!active)
> + return NULL;
> + return sb;
> +}
> +
> +static int fs_bdev_freeze(struct block_device *bdev)
> +{
> + struct super_block *sb;
> + int error = 0;
> +
> + lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
> +
> + if (WARN_ON_ONCE(unlikely(!bdev->bd_holder)))
> + return -EINVAL;
> +
> + sb = get_bdev_super(bdev);
> + if (!sb)
> + return -EINVAL;
> +
> + if (sb->s_op->freeze_super)
> + error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> + else
> + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
> + if (error)
> + atomic_dec(&bdev->bd_fsfreeze_count);
> + if (!error)
> + error = sync_blockdev(bdev);
> + deactivate_super(sb);
> + return error;
> +}
> +
> +static int fs_bdev_thaw(struct block_device *bdev)
> +{
> + struct super_block *sb;
> + int error;
> +
> + lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
> +
> + if (WARN_ON_ONCE(unlikely(!bdev->bd_holder)))
> + return -EINVAL;
> +
> + sb = get_bdev_super(bdev);
> + if (WARN_ON_ONCE(!sb))
> + return -EINVAL;
> +
> + if (sb->s_op->thaw_super)
> + error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> + else
> + error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
> + if (error)
> + atomic_inc(&bdev->bd_fsfreeze_count);
> + deactivate_super(sb);
> + return error;
> +}
> +
> const struct blk_holder_ops fs_holder_ops = {
> .mark_dead = fs_bdev_mark_dead,
> .sync = fs_bdev_sync,
> + .freeze = fs_bdev_freeze,
> + .thaw = fs_bdev_thaw,
> };
> EXPORT_SYMBOL_GPL(fs_holder_ops);
>
> @@ -1527,15 +1607,10 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
> }
>
> /*
> - * Until SB_BORN flag is set, there can be no active superblock
> - * references and thus no filesystem freezing. get_active_super() will
> - * just loop waiting for SB_BORN so even bdev_freeze() cannot proceed.
> - *
> - * It is enough to check bdev was not frozen before we set s_bdev.
> + * It is enough to check bdev was not frozen before we set
> + * s_bdev as freezing will wait until SB_BORN is set.
> */
> - mutex_lock(&bdev->bd_fsfreeze_mutex);
> - if (bdev->bd_fsfreeze_count > 0) {
> - mutex_unlock(&bdev->bd_fsfreeze_mutex);
> + if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
> if (fc)
> warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
> bdev_release(bdev_handle);
> @@ -1548,7 +1623,6 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
> if (bdev_stable_writes(bdev))
> sb->s_iflags |= SB_I_STABLE_WRITES;
> spin_unlock(&sb_lock);
> - mutex_unlock(&bdev->bd_fsfreeze_mutex);
>
> snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
> shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index d5c5e59ddbd2..88e1848b0869 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -57,7 +57,7 @@ struct block_device {
> const struct blk_holder_ops *bd_holder_ops;
> struct mutex bd_holder_lock;
> /* The counter of freeze processes */
> - int bd_fsfreeze_count;
> + atomic_t bd_fsfreeze_count;
> int bd_holders;
> struct kobject *bd_holder_dir;
>
>
> --
> 2.34.1
>
next prev parent reply other threads:[~2023-10-24 15:21 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-24 13:01 [PATCH v2 00/10] Implement freeze and thaw as holder operations Christian Brauner
2023-10-24 13:01 ` [PATCH v2 01/10] fs: massage locking helpers Christian Brauner
2023-10-25 12:34 ` Jan Kara
2023-10-25 13:21 ` Christian Brauner
2023-10-25 14:01 ` Jan Kara
2023-10-27 6:25 ` Christoph Hellwig
2023-10-24 13:01 ` [PATCH v2 02/10] bdev: rename freeze and thaw helpers Christian Brauner
2023-10-24 13:01 ` [PATCH v2 03/10] bdev: surface the error from sync_blockdev() Christian Brauner
2023-10-24 15:14 ` Darrick J. Wong
2023-10-25 12:36 ` Jan Kara
2023-10-27 6:25 ` Christoph Hellwig
2023-10-24 13:01 ` [PATCH v2 04/10] bdev: add freeze and thaw holder operations Christian Brauner
2023-10-27 6:26 ` Christoph Hellwig
2023-10-24 13:01 ` [PATCH v2 05/10] bdev: implement " Christian Brauner
2023-10-24 15:21 ` Darrick J. Wong [this message]
2023-10-25 14:01 ` Jan Kara
2023-10-26 8:44 ` Christian Brauner
2023-10-26 9:31 ` Jan Kara
2023-10-27 6:29 ` Christoph Hellwig
2023-10-24 13:01 ` [PATCH v2 06/10] fs: remove get_active_super() Christian Brauner
2023-10-24 13:01 ` [PATCH v2 07/10] super: remove bd_fsfreeze_sb Christian Brauner
2023-10-24 13:01 ` [PATCH v2 08/10] fs: remove unused helper Christian Brauner
2023-10-27 6:30 ` Christoph Hellwig
2023-10-24 13:01 ` [PATCH v2 09/10] porting: document block device freeze and thaw changes Christian Brauner
2023-10-24 15:17 ` Darrick J. Wong
2023-10-25 14:05 ` Jan Kara
2023-10-24 13:01 ` [PATCH v2 10/10] blkdev: comment fs_holder_ops Christian Brauner
2023-10-24 15:16 ` Darrick J. Wong
2023-10-25 14:06 ` Jan Kara
2023-10-25 14:27 ` Christoph Hellwig
2023-10-26 11:45 ` [PATCH v2 00/10] Implement freeze and thaw as holder operations Christian Brauner
2023-10-27 6:40 ` Christoph Hellwig
2023-10-27 11:03 ` Jan Kara
2023-10-27 13:20 ` [PATCH] fs: streamline thaw_super_locked Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231024152122.GH11424@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=brauner@kernel.org \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).