linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Su Yue <suy.fnst@cn.fujitsu.com>
To: Qu Wenruo <wqu@suse.com>, <linux-btrfs@vger.kernel.org>
Subject: Re: [PATCH v2 4/6] btrfs: Introduce mount time chunk <-> dev extent mapping check
Date: Wed, 1 Aug 2018 11:18:04 +0800	[thread overview]
Message-ID: <6e062d6b-6e8c-94e5-d742-645b7dbdd8ed@cn.fujitsu.com> (raw)
In-Reply-To: <20180801023721.32143-5-wqu@suse.com>



On 08/01/2018 10:37 AM, Qu Wenruo wrote:
> This patch will introduce chunk <-> dev extent mapping check, to protect
> us against invalid dev extents or chunks.
> 
> Since chunk mapping is the fundamental infrastructure of btrfs, extra
> check at mount time could prevent a lot of unexpected behavior (BUG_ON).
> 
> Reported-by: Xu Wen <wen.xu@gatech.edu>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407
> Signed-off-by: Qu Wenruo <wqu@suse.com>

LGTM.
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>

> ---
>   fs/btrfs/disk-io.c |   7 ++
>   fs/btrfs/volumes.c | 183 +++++++++++++++++++++++++++++++++++++++++++++
>   fs/btrfs/volumes.h |   2 +
>   3 files changed, 192 insertions(+)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 205092dc9390..068ca7498e94 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3075,6 +3075,13 @@ int open_ctree(struct super_block *sb,
>   	fs_info->generation = generation;
>   	fs_info->last_trans_committed = generation;
>   
> +	ret = btrfs_verify_dev_extents(fs_info);
> +	if (ret) {
> +		btrfs_err(fs_info,
> +			  "failed to verify dev extents against chunks: %d",
> +			  ret);
> +		goto fail_block_groups;
> +	}
>   	ret = btrfs_recover_balance(fs_info);
>   	if (ret) {
>   		btrfs_err(fs_info, "failed to recover balance: %d", ret);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index e6a8e4aabc66..467a589854fa 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6440,6 +6440,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
>   	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
>   	map->type = btrfs_chunk_type(leaf, chunk);
>   	map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
> +	map->verified_stripes = 0;
>   	for (i = 0; i < num_stripes; i++) {
>   		map->stripes[i].physical =
>   			btrfs_stripe_offset_nr(leaf, chunk, i);
> @@ -7295,3 +7296,185 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
>   		fs_devices = fs_devices->seed;
>   	}
>   }
> +
> +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
> +{
> +	int index = btrfs_bg_flags_to_raid_index(type);
> +	int ncopies = btrfs_raid_array[index].ncopies;
> +	int data_stripes;
> +
> +	switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> +	case BTRFS_BLOCK_GROUP_RAID5:
> +		data_stripes = num_stripes - 1;
> +		break;
> +	case BTRFS_BLOCK_GROUP_RAID6:
> +		data_stripes = num_stripes - 2;
> +		break;
> +	default:
> +		data_stripes = num_stripes / ncopies;
> +		break;
> +	}
> +	return div_u64(chunk_len, data_stripes);
> +}
> +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
> +				 u64 chunk_offset, u64 devid,
> +				 u64 physical_offset, u64 physical_len)
> +{
> +	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
> +	struct extent_map *em;
> +	struct map_lookup *map;
> +	u64 stripe_len;
> +	bool found = false;
> +	int ret = 0;
> +	int i;
> +
> +	read_lock(&em_tree->lock);
> +	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
> +	read_unlock(&em_tree->lock);
> +
> +	if (!em) {
> +		ret = -EUCLEAN;
> +		btrfs_err(fs_info,
> +		"dev extent (%llu, %llu) doesn't have corresponding chunk",
> +			  devid, physical_offset);
> +		goto out;
> +	}
> +
> +	map = em->map_lookup;
> +	stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
> +	if (physical_len != stripe_len) {
> +		btrfs_err(fs_info,
> +"dev extent (%llu, %llu) length doesn't match with chunk %llu, have %llu expect %llu",
> +			  devid, physical_offset, em->start, physical_len,
> +			  stripe_len);
> +		ret = -EUCLEAN;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < map->num_stripes; i++) {
> +		if (map->stripes[i].dev->devid == devid &&
> +		    map->stripes[i].physical == physical_offset) {
> +			found = true;
> +			if (map->verified_stripes >= map->num_stripes) {
> +				btrfs_err(fs_info,
> +			"too many dev extent for chunk %llu is detected",
> +					  em->start);
> +				ret = -EUCLEAN;
> +				goto out;
> +			}
> +			map->verified_stripes++;
> +			break;
> +		}
> +	}
> +	if (!found) {
> +		ret = -EUCLEAN;
> +		btrfs_err(fs_info,
> +			"dev extent (%llu, %llu) has no corresponding chunk",
> +			devid, physical_offset);
> +	}
> +out:
> +	free_extent_map(em);
> +	return ret;
> +}
> +
> +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
> +{
> +	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
> +	struct extent_map *em;
> +	struct rb_node *node;
> +	int ret = 0;
> +
> +	read_lock(&em_tree->lock);
> +	for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
> +		em = rb_entry(node, struct extent_map, rb_node);
> +		if (em->map_lookup->num_stripes !=
> +		    em->map_lookup->verified_stripes) {
> +			btrfs_err(fs_info,
> +			"chunk %llu has missing dev extent, have %d expect %d",
> +				  em->start, em->map_lookup->verified_stripes,
> +				  em->map_lookup->num_stripes);
> +			ret = -EUCLEAN;
> +			goto out;
> +		}
> +	}
> +out:
> +	read_unlock(&em_tree->lock);
> +	return ret;
> +}
> +
> +/*
> + * Ensure all dev extents are mapped to correct chunk.
> + * Or later chunk allocation/free would cause unexpected behavior.
> + *
> + * NOTE: This will iterate through the whole device tree, which should be
> + * at the same size level of chunk tree.
> + * This would increase mount time by a tiny fraction.
> + */
> +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_path *path;
> +	struct btrfs_root *root = fs_info->dev_root;
> +	struct btrfs_key key;
> +	int ret = 0;
> +
> +	key.objectid = 1;
> +	key.type = BTRFS_DEV_EXTENT_KEY;
> +	key.offset = 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	path->reada = READA_FORWARD;
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret < 0)
> +		goto out;
> +
> +	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
> +		ret = btrfs_next_item(root, path);
> +		if (ret < 0)
> +			goto out;
> +		/* No dev extents at all? Not good */
> +		if (ret > 0) {
> +			ret = -EUCLEAN;
> +			goto out;
> +		}
> +	}
> +	while (1) {
> +		struct extent_buffer *leaf = path->nodes[0];
> +		struct btrfs_dev_extent *dext;
> +		int slot = path->slots[0];
> +		u64 chunk_offset;
> +		u64 physical_offset;
> +		u64 physical_len;
> +		u64 devid;
> +
> +		btrfs_item_key_to_cpu(leaf, &key, slot);
> +		if (key.type != BTRFS_DEV_EXTENT_KEY)
> +			break;
> +		devid = key.objectid;
> +		physical_offset = key.offset;
> +
> +		dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
> +		chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
> +		physical_len = btrfs_dev_extent_length(leaf, dext);
> +
> +		ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
> +					    physical_offset, physical_len);
> +		if (ret < 0)
> +			goto out;
> +		ret = btrfs_next_item(root, path);
> +		if (ret < 0)
> +			goto out;
> +		if (ret > 0) {
> +			ret = 0;
> +			break;
> +		}
> +	}
> +
> +	/* Ensure all chunks have corresponding dev extents */
> +	ret = verify_chunk_dev_extent_mapping(fs_info);
> +out:
> +	btrfs_free_path(path);
> +	return ret;
> +}
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 6d4f38ad9f5c..4301bf2d0534 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -345,6 +345,7 @@ struct map_lookup {
>   	u64 stripe_len;
>   	int num_stripes;
>   	int sub_stripes;
> +	int verified_stripes; /* For mount time dev extent verification */
>   	struct btrfs_bio_stripe stripes[];
>   };
>   
> @@ -559,5 +560,6 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
>   void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
>   bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
>   					struct btrfs_device *failing_dev);
> +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
>   
>   #endif
> 



  reply	other threads:[~2018-08-01  4:54 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-01  2:37 [PATCH v2 0/6] btrfs: Enhanced validation check for fuzzed images Qu Wenruo
2018-08-01  2:37 ` [PATCH v2 1/6] btrfs: Check each block group has corresponding chunk at mount time Qu Wenruo
2018-08-01  2:54   ` Su Yue
2018-08-01  2:37 ` [PATCH v2 2/6] btrfs: Verify every chunk has corresponding block group " Qu Wenruo
2018-08-01  2:37 ` [PATCH v2 3/6] btrfs: Remove unused function btrfs_account_dev_extents_size() Qu Wenruo
2018-08-01  2:37 ` [PATCH v2 4/6] btrfs: Introduce mount time chunk <-> dev extent mapping check Qu Wenruo
2018-08-01  3:18   ` Su Yue [this message]
2019-01-14 11:09   ` Filipe Manana
2019-01-14 11:28     ` Qu Wenruo
2018-08-01  2:37 ` [PATCH v2 5/6] btrfs: Exit gracefully when failed to add chunk map Qu Wenruo
2018-08-01  2:37 ` [PATCH v2 6/6] btrfs: locking: Allow btrfs_tree_lock() to return error to avoid deadlock Qu Wenruo
2018-08-01  2:55   ` Su Yue
2018-08-02 16:40 ` [PATCH v2 0/6] btrfs: Enhanced validation check for fuzzed images David Sterba
2018-08-03  0:06   ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6e062d6b-6e8c-94e5-d742-645b7dbdd8ed@cn.fujitsu.com \
    --to=suy.fnst@cn.fujitsu.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).