public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <quwenruo.btrfs@gmx.com>
To: Josef Bacik <josef@toxicpanda.com>,
	linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: Re: [PATCH] btrfs: introduce rescue=all
Date: Wed, 9 Sep 2020 07:10:49 +0800	[thread overview]
Message-ID: <b862077c-0903-b4f0-ccc9-ee1c815534bc@gmx.com> (raw)
In-Reply-To: <921662f28b90d7e5a67bb52a1e0b0b2e9584f946.1599579772.git.josef@toxicpanda.com>


[-- Attachment #1.1: Type: text/plain, Size: 12654 bytes --]



On 2020/9/8 下午11:43, Josef Bacik wrote:
> One of the things that came up consistently in talking with Fedora about
> switching to btrfs as default is that btrfs is particularly vulnerable
> to metadata corruption.  If any of the core global roots are corrupted,
> the fs is unmountable and fsck can't usually do anything for you without
> some special options.
> 
> What we really want is a simple mount option we can tell all users to
> try if things are really wrong that are going to give them the highest
> chance of allowing them to mount their file system and copy off their
> data in the most direct way possible.

Then "all" doesn't look correct for such usage.
I'd prefer "salvage" then.

Thanks,
Qu

> 
> Obviously if the fs roots are screwed then the user is in trouble, but
> otherwise this makes it much easier to pull stuff off the disk without
> needing our special rescue tools.  I tested this with an xfstest that
> I've submitted along side this patch that clears the roots and makes
> sure we're still able to read the data on the disk.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/block-group.c | 46 ++++++++++++++++++++++++++
>  fs/btrfs/ctree.h       |  1 +
>  fs/btrfs/disk-io.c     | 73 ++++++++++++++++++++++++++++--------------
>  fs/btrfs/inode.c       |  6 +++-
>  fs/btrfs/super.c       | 29 +++++++++++++++--
>  fs/btrfs/volumes.c     |  7 ++++
>  6 files changed, 135 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 01e8ba1da1d3..dfff2b6e7708 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1991,6 +1991,49 @@ static int read_one_block_group(struct btrfs_fs_info *info,
>  	return ret;
>  }
>  
> +static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
> +{
> +	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
> +	struct extent_map *em;
> +	struct map_lookup *map;
> +	struct btrfs_block_group *bg;
> +	struct btrfs_space_info *space_info;
> +	struct rb_node *node;
> +	int ret = 0;
> +
> +	for (node = rb_first_cached(&em_tree->map); node;
> +	     node = rb_next(node)) {
> +		em = rb_entry(node, struct extent_map, rb_node);
> +		map = em->map_lookup;
> +		bg = btrfs_create_block_group_cache(fs_info, em->start);
> +		if (!bg) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		/* Fill dummy cache as FULL */
> +		bg->length = em->len;
> +		bg->flags = map->type;
> +		bg->last_byte_to_unpin = (u64)-1;
> +		bg->cached = BTRFS_CACHE_FINISHED;
> +		bg->used = em->len;
> +		bg->flags = map->type;
> +		ret = btrfs_add_block_group_cache(fs_info, bg);
> +		if (ret) {
> +			btrfs_remove_free_space_cache(bg);
> +			btrfs_put_block_group(bg);
> +			break;
> +		}
> +		btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
> +					0, &space_info);
> +		bg->space_info = space_info;
> +		link_block_group(bg);
> +
> +		set_avail_alloc_bits(fs_info, bg->flags);
> +	}
> +	return ret;
> +}
> +
>  int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  {
>  	struct btrfs_path *path;
> @@ -2001,6 +2044,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  	int need_clear = 0;
>  	u64 cache_gen;
>  
> +	if (btrfs_test_opt(info, RESCUE_ALL))
> +		return fill_dummy_bgs(info);
> +
>  	key.objectid = 0;
>  	key.offset = 0;
>  	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 98c5f6178efc..f84b3f67faa4 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1275,6 +1275,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
>  #define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27)
>  #define BTRFS_MOUNT_REF_VERIFY		(1 << 28)
>  #define BTRFS_MOUNT_DISCARD_ASYNC	(1 << 29)
> +#define BTRFS_MOUNT_RESCUE_ALL		(1 << 30)
>  
>  #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30)
>  #define BTRFS_DEFAULT_MAX_INLINE	(2048)
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 7147237d9bf0..f61366a57be4 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2303,30 +2303,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
>  
>  	root = btrfs_read_tree_root(tree_root, &location);
>  	if (IS_ERR(root)) {
> -		ret = PTR_ERR(root);
> -		goto out;
> +		if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			ret = PTR_ERR(root);
> +			goto out;
> +		}
> +	} else {
> +		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +		fs_info->extent_root = root;
>  	}
> -	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> -	fs_info->extent_root = root;
>  
>  	location.objectid = BTRFS_DEV_TREE_OBJECTID;
>  	root = btrfs_read_tree_root(tree_root, &location);
>  	if (IS_ERR(root)) {
> -		ret = PTR_ERR(root);
> -		goto out;
> +		if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			ret = PTR_ERR(root);
> +			goto out;
> +		}
> +	} else {
> +		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +		fs_info->dev_root = root;
> +		btrfs_init_devices_late(fs_info);
>  	}
> -	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> -	fs_info->dev_root = root;
> -	btrfs_init_devices_late(fs_info);
>  
>  	location.objectid = BTRFS_CSUM_TREE_OBJECTID;
>  	root = btrfs_read_tree_root(tree_root, &location);
>  	if (IS_ERR(root)) {
> -		ret = PTR_ERR(root);
> -		goto out;
> +		if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			ret = PTR_ERR(root);
> +			goto out;
> +		}
> +	} else {
> +		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +		fs_info->csum_root = root;
>  	}
> -	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> -	fs_info->csum_root = root;
>  
>  	/*
>  	 * This tree can share blocks with some other fs tree during relocation
> @@ -2335,11 +2344,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
>  	root = btrfs_get_fs_root(tree_root->fs_info,
>  				 BTRFS_DATA_RELOC_TREE_OBJECTID, true);
>  	if (IS_ERR(root)) {
> -		ret = PTR_ERR(root);
> -		goto out;
> +		if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			ret = PTR_ERR(root);
> +			goto out;
> +		}
> +	} else {
> +		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +		fs_info->data_reloc_root = root;
>  	}
> -	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> -	fs_info->data_reloc_root = root;
>  
>  	location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
>  	root = btrfs_read_tree_root(tree_root, &location);
> @@ -2352,9 +2364,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
>  	location.objectid = BTRFS_UUID_TREE_OBJECTID;
>  	root = btrfs_read_tree_root(tree_root, &location);
>  	if (IS_ERR(root)) {
> -		ret = PTR_ERR(root);
> -		if (ret != -ENOENT)
> -			goto out;
> +		if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			ret = PTR_ERR(root);
> +			if (ret != -ENOENT)
> +				goto out;
> +		}
>  	} else {
>  		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
>  		fs_info->uuid_root = root;
> @@ -2364,11 +2378,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
>  		location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
>  		root = btrfs_read_tree_root(tree_root, &location);
>  		if (IS_ERR(root)) {
> -			ret = PTR_ERR(root);
> -			goto out;
> +			if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +				ret = PTR_ERR(root);
> +				goto out;
> +			}
> +		}  else {
> +			set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> +			fs_info->free_space_root = root;
>  		}
> -		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> -		fs_info->free_space_root = root;
>  	}
>  
>  	return 0;
> @@ -3082,6 +3099,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>  		goto fail_alloc;
>  	}
>  
> +	/* Skip bg needs RO and no tree-log to replay */
> +	if (btrfs_test_opt(fs_info, RESCUE_ALL) && !sb_rdonly(sb)) {
> +		btrfs_err(fs_info,
> +			  "rescue=all can only be used on read-only mount");
> +		err = -EINVAL;
> +		goto fail_alloc;
> +	}
> +
>  	ret = btrfs_init_workqueues(fs_info, fs_devices);
>  	if (ret) {
>  		err = ret;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index cce6f8789a4e..d90c86ac9ebe 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2196,7 +2196,8 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
>  	int skip_sum;
>  	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
>  
> -	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
> +	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
> +		!fs_info->csum_root;
>  
>  	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
>  		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
> @@ -2851,6 +2852,9 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
>  	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
>  		return 0;
>  
> +	if (!root->fs_info->csum_root)
> +		return 0;
> +
>  	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
>  	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
>  		clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 3ebe7240c63d..89703c2ec1f3 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -360,6 +360,7 @@ enum {
>  	Opt_rescue,
>  	Opt_usebackuproot,
>  	Opt_nologreplay,
> +	Opt_rescue_all,
>  
>  	/* Deprecated options */
>  	Opt_recovery,
> @@ -455,6 +456,7 @@ static const match_table_t tokens = {
>  static const match_table_t rescue_tokens = {
>  	{Opt_usebackuproot, "usebackuproot"},
>  	{Opt_nologreplay, "nologreplay"},
> +	{Opt_rescue_all, "all"},
>  	{Opt_err, NULL},
>  };
>  
> @@ -487,6 +489,11 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
>  			btrfs_set_and_info(info, NOLOGREPLAY,
>  					   "disabling log replay at mount time");
>  			break;
> +		case Opt_rescue_all:
> +			btrfs_set_and_info(info, RESCUE_ALL,
> +					   "only reading fs roots, also setting  nologreplay");
> +			btrfs_set_opt(info->mount_opt, NOLOGREPLAY);
> +			break;
>  		case Opt_err:
>  			btrfs_info(info, "unrecognized rescue option '%s'", p);
>  			ret = -EINVAL;
> @@ -1421,6 +1428,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
>  		seq_puts(seq, ",notreelog");
>  	if (btrfs_test_opt(info, NOLOGREPLAY))
>  		seq_puts(seq, ",rescue=nologreplay");
> +	if (btrfs_test_opt(info, RESCUE_ALL))
> +		seq_puts(seq, ",rescue=all");
>  	if (btrfs_test_opt(info, FLUSHONCOMMIT))
>  		seq_puts(seq, ",flushoncommit");
>  	if (btrfs_test_opt(info, DISCARD_SYNC))
> @@ -1858,6 +1867,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
>  	if (ret)
>  		goto restore;
>  
> +	if (btrfs_test_opt(fs_info, RESCUE_ALL) !=
> +	    (old_opts & BTRFS_MOUNT_RESCUE_ALL)) {
> +		btrfs_err(fs_info,
> +		"rescue=all mount option can't be changed during remount");
> +		ret = -EINVAL;
> +		goto restore;
> +	}
> +
>  	btrfs_remount_begin(fs_info, old_opts, *flags);
>  	btrfs_resize_thread_pool(fs_info,
>  		fs_info->thread_pool_size, old_thread_pool_size);
> @@ -1924,6 +1941,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
>  			goto restore;
>  		}
>  
> +		if (btrfs_test_opt(fs_info, RESCUE_ALL)) {
> +			btrfs_err(fs_info,
> +		"remounting read-write with rescue=all is not allowed");
> +			ret = -EINVAL;
> +			goto restore;
> +		}
> +
>  		ret = btrfs_cleanup_fs_roots(fs_info);
>  		if (ret)
>  			goto restore;
> @@ -2238,8 +2262,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  	 * still can allocate chunks and thus are fine using the currently
>  	 * calculated f_bavail.
>  	 */
> -	if (!mixed && block_rsv->space_info->full &&
> -	    total_free_meta - thresh < block_rsv->size)
> +	if (btrfs_test_opt(fs_info, RESCUE_ALL) ||
> +	    (!mixed && block_rsv->space_info->full &&
> +	     total_free_meta - thresh < block_rsv->size))
>  		buf->f_bavail = 0;
>  
>  	buf->f_type = BTRFS_SUPER_MAGIC;
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2a55356ef4a2..614acd4b9988 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -7648,6 +7648,13 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
>  	u64 prev_dev_ext_end = 0;
>  	int ret = 0;
>  
> +	/*
> +	 * For rescue=all mount option, we're already RO and are salvaging
> +	 * data, no need for such strict check.
> +	 */
> +	if (btrfs_test_opt(fs_info, RESCUE_ALL))
> +		return 0;
> +
>  	key.objectid = 1;
>  	key.type = BTRFS_DEV_EXTENT_KEY;
>  	key.offset = 0;
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2020-09-08 23:11 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-08 15:43 [PATCH] btrfs: introduce rescue=all Josef Bacik
2020-09-08 23:10 ` Qu Wenruo [this message]
2020-09-09  7:20   ` David Sterba
2020-09-09  7:25     ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b862077c-0903-b4f0-ccc9-ee1c815534bc@gmx.com \
    --to=quwenruo.btrfs@gmx.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox