All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sunil Mushran <sunil.mushran@oracle.com>
To: Josef Bacik <josef@redhat.com>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	chris.mason@oracle.com, viro@ZenIV.linux.org.uk, hch@lst.de
Subject: Re: [PATCH] Introduce freeze_super and thaw_super for the fsfreeze ioctl
Date: Tue, 23 Mar 2010 11:19:35 -0700	[thread overview]
Message-ID: <4BA90637.5000202@oracle.com> (raw)
In-Reply-To: <20100323142200.GA2381@localhost.localdomain>

Thanks for doing this. This helps ocfs2 too because we need to be able
to freeze the fs both from the user-context (node the ioctl was issued)
and the kernel-context (other nodes). In the older scheme, it was tricky
to handle nodes racing to freeze the fs. This will make it a lot easier.

Sunil

Josef Bacik wrote:
> Currently the way we do freezing is by passing sb>s_bdev to freeze_bdev and then
> letting it do all the work.  But freezing is more of an fs thing, and doesn't
> really have much to do with the bdev at all, all the work gets done with the
> super.  In btrfs we do not populate s_bdev, since we can have multiple bdev's
> for one fs and setting s_bdev makes removing devices from a pool kind of tricky.
> This means that freezing a btrfs filesystem fails, which causes us to corrupt
> with things like tux-on-ice which use the fsfreeze mechanism.  So instead of
> populating sb->s_bdev with a random bdev in our pool, I've broken the actual fs
> freezing stuff into freeze_super and thaw_super.  These just take the
> super_block that we're freezing and does the appropriate work.  It's basically
> just copy and pasted from freeze_bdev.  I've then converted freeze_bdev over to
> use the new super helpers.  I've tested this with ext4 and btrfs and verified
> everything continues to work the same as before.
>
> The only new gotcha is multiple calls to the fsfreeze ioctl will return EBUSY if
> the fs is already frozen.  I thought this was a better solution than adding a
> freeze counter to the super_block, but if everybody hates this idea I'm open to
> suggestions.  Thanks,
>
> Signed-off-by: Josef Bacik <josef@redhat.com>
> ---
>  fs/block_dev.c     |   66 ++++++++------------------------------
>  fs/ioctl.c         |   18 +++--------
>  fs/super.c         |   88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/fs.h |    2 +
>  4 files changed, 109 insertions(+), 65 deletions(-)
>
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 8bed055..71b6165 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -245,35 +245,13 @@ struct super_block *freeze_bdev(struct block_device *bdev)
>  	sb = get_active_super(bdev);
>  	if (!sb)
>  		goto out;
> -	if (sb->s_flags & MS_RDONLY) {
> -		deactivate_locked_super(sb);
> -		mutex_unlock(&bdev->bd_fsfreeze_mutex);
> -		return sb;
> -	}
> -
> -	sb->s_frozen = SB_FREEZE_WRITE;
> -	smp_wmb();
> -
> -	sync_filesystem(sb);
>  
> -	sb->s_frozen = SB_FREEZE_TRANS;
> -	smp_wmb();
> -
> -	sync_blockdev(sb->s_bdev);
> -
> -	if (sb->s_op->freeze_fs) {
> -		error = sb->s_op->freeze_fs(sb);
> -		if (error) {
> -			printk(KERN_ERR
> -				"VFS:Filesystem freeze failed\n");
> -			sb->s_frozen = SB_UNFROZEN;
> -			deactivate_locked_super(sb);
> -			bdev->bd_fsfreeze_count--;
> -			mutex_unlock(&bdev->bd_fsfreeze_mutex);
> -			return ERR_PTR(error);
> -		}
> +	error = freeze_super(sb, 1);
> +	if (error) {
> +		bdev->bd_fsfreeze_count--;
> +		mutex_unlock(&bdev->bd_fsfreeze_mutex);
> +		return ERR_PTR(error);
>  	}
> -	up_write(&sb->s_umount);
>  
>   out:
>  	sync_blockdev(bdev);
> @@ -295,40 +273,24 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
>  
>  	mutex_lock(&bdev->bd_fsfreeze_mutex);
>  	if (!bdev->bd_fsfreeze_count)
> -		goto out_unlock;
> +		goto out;
>  
>  	error = 0;
>  	if (--bdev->bd_fsfreeze_count > 0)
> -		goto out_unlock;
> +		goto out;
>  
>  	if (!sb)
> -		goto out_unlock;
> +		goto out;
>  
>  	BUG_ON(sb->s_bdev != bdev);
> -	down_write(&sb->s_umount);
> -	if (sb->s_flags & MS_RDONLY)
> -		goto out_deactivate;
> -
> -	if (sb->s_op->unfreeze_fs) {
> -		error = sb->s_op->unfreeze_fs(sb);
> -		if (error) {
> -			printk(KERN_ERR
> -				"VFS:Filesystem thaw failed\n");
> -			sb->s_frozen = SB_FREEZE_TRANS;
> -			bdev->bd_fsfreeze_count++;
> -			mutex_unlock(&bdev->bd_fsfreeze_mutex);
> -			return error;
> -		}
> +	error = thaw_super(sb);
> +	if (error) {
> +		bdev->bd_fsfreeze_count++;
> +		mutex_unlock(&bdev->bd_fsfreeze_mutex);
> +		return error;
>  	}
>  
> -	sb->s_frozen = SB_UNFROZEN;
> -	smp_wmb();
> -	wake_up(&sb->s_wait_unfrozen);
> -
> -out_deactivate:
> -	if (sb)
> -		deactivate_locked_super(sb);
> -out_unlock:
> +out:
>  	mutex_unlock(&bdev->bd_fsfreeze_mutex);
>  	return 0;
>  }
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 6c75110..a065eff 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -503,6 +503,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
>  static int ioctl_fsfreeze(struct file *filp)
>  {
>  	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
> +	int ret;
>  
>  	if (!capable(CAP_SYS_ADMIN))
>  		return -EPERM;
> @@ -511,15 +512,10 @@ static int ioctl_fsfreeze(struct file *filp)
>  	if (sb->s_op->freeze_fs == NULL)
>  		return -EOPNOTSUPP;
>  
> -	/* If a blockdevice-backed filesystem isn't specified, return. */
> -	if (sb->s_bdev == NULL)
> -		return -EINVAL;
> -
>  	/* Freeze */
> -	sb = freeze_bdev(sb->s_bdev);
> -	if (IS_ERR(sb))
> -		return PTR_ERR(sb);
> -	return 0;
> +	ret = freeze_super(sb, 0);
> +
> +	return ret;
>  }
>  
>  static int ioctl_fsthaw(struct file *filp)
> @@ -529,12 +525,8 @@ static int ioctl_fsthaw(struct file *filp)
>  	if (!capable(CAP_SYS_ADMIN))
>  		return -EPERM;
>  
> -	/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
> -	if (sb->s_bdev == NULL)
> -		return -EINVAL;
> -
>  	/* Thaw */
> -	return thaw_bdev(sb->s_bdev, sb);
> +	return thaw_super(sb);
>  }
>  
>  /*
> diff --git a/fs/super.c b/fs/super.c
> index 19eb70b..305d475 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -971,6 +971,94 @@ out:
>  
>  EXPORT_SYMBOL_GPL(vfs_kern_mount);
>  
> +/**
> + * freeze_super -- lock the filesystem and force it into a consistent state
> + * @super: the super to lock
> + *
> + * Syncs the super to make sure the filesystem is consistent and calls the fs's
> + * freeze_fs.  We hold the s_umount semaphore in order to make sure the fs is
> + * not unmounted until after we thaw the fs.  This cannot be called multiple
> + * times like freeze_bdev, if we're already frozen we'll return -EBUSY.
> + */
> +int freeze_super(struct super_block *sb, int locked)
> +{
> +	int ret;
> +
> +	if (!locked) {
> +		spin_lock(&sb_lock);
> +		ret = grab_super(sb);
> +		if (!ret)
> +			return 0;
> +	}
> +
> +	if (sb->s_flags & MS_RDONLY) {
> +		deactivate_locked_super(sb);
> +		return 0;
> +	}
> +
> +	if (sb->s_frozen) {
> +		deactivate_locked_super(sb);
> +		return -EBUSY;
> +	}
> +
> +	sb->s_frozen = SB_FREEZE_WRITE;
> +	smp_wmb();
> +
> +	sync_filesystem(sb);
> +
> +	sb->s_frozen = SB_FREEZE_TRANS;
> +	smp_wmb();
> +
> +	sync_blockdev(sb->s_bdev);
> +	if (sb->s_op->freeze_fs) {
> +		ret = sb->s_op->freeze_fs(sb);
> +		if (ret) {
> +			printk(KERN_ERR
> +				"VFS:Filesystem freeze failed\n");
> +			sb->s_frozen = SB_UNFROZEN;
> +			deactivate_locked_super(sb);
> +			return ret;
> +		}
> +	}
> +	up_write(&sb->s_umount);
> +	return 0;
> +}
> +EXPORT_SYMBOL(freeze_super);
> +
> +/**
> + * thaw_super -- unlock filesystem
> + * @sb: the super to thaw
> + *
> + * Unlocks the filesystem and marks it writeable again after freeze_super().
> + */
> +int thaw_super(struct super_block *sb)
> +{
> +	int error;
> +
> +	down_write(&sb->s_umount);
> +	if (sb->s_flags & MS_RDONLY)
> +		goto out;
> +
> +	if (sb->s_op->unfreeze_fs) {
> +		error = sb->s_op->unfreeze_fs(sb);
> +		if (error) {
> +			printk(KERN_ERR
> +				"VFS:Filesystem thaw failed\n");
> +			sb->s_frozen = SB_FREEZE_TRANS;
> +			return error;
> +		}
> +	}
> +
> +	sb->s_frozen = SB_UNFROZEN;
> +	smp_wmb();
> +	wake_up(&sb->s_wait_unfrozen);
> +
> +out:
> +	deactivate_locked_super(sb);
> +	return 0;
> +}
> +EXPORT_SYMBOL(thaw_super);
> +
>  static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
>  {
>  	int err;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 2620a8c..a5778ae 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1802,6 +1802,8 @@ extern int may_umount(struct vfsmount *);
>  extern long do_mount(char *, char *, char *, unsigned long, void *);
>  extern struct vfsmount *collect_mounts(struct path *);
>  extern void drop_collected_mounts(struct vfsmount *);
> +extern int freeze_super(struct super_block *super, int locked);
> +extern int thaw_super(struct super_block *super);
>  
>  extern int vfs_statfs(struct dentry *, struct kstatfs *);
>  


  parent reply	other threads:[~2010-03-23 18:21 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-23 14:22 [PATCH] Introduce freeze_super and thaw_super for the fsfreeze ioctl Josef Bacik
2010-03-23 14:28 ` Al Viro
2010-03-23 14:34   ` Josef Bacik
2010-03-23 14:48     ` Al Viro
2010-03-23 15:03       ` Josef Bacik
2010-03-23 15:09         ` Al Viro
2010-03-23 15:12           ` Al Viro
2010-03-23 15:15             ` Al Viro
2010-03-23 22:31           ` Nigel Cunningham
2010-03-23 23:18             ` Al Viro
2010-03-23 23:47               ` Al Viro
2010-03-23 23:52               ` Nigel Cunningham
2010-03-23 23:55                 ` Nigel Cunningham
2010-03-24  0:21                   ` Al Viro
2010-03-24  0:25                     ` Nigel Cunningham
2010-03-24  0:03               ` Nigel Cunningham
2010-03-23 18:19 ` Sunil Mushran [this message]
2010-03-23 22:25 ` Nigel Cunningham
2010-03-24  1:17   ` Josef Bacik
2010-03-24  5:16     ` Nigel Cunningham

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BA90637.5000202@oracle.com \
    --to=sunil.mushran@oracle.com \
    --cc=chris.mason@oracle.com \
    --cc=hch@lst.de \
    --cc=josef@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.