From: Nikolay Borisov <nborisov@suse.com>
To: Qu Wenruo <wqu@suse.com>, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 1/3] btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check
Date: Mon, 29 Jul 2019 16:42:30 +0300 [thread overview]
Message-ID: <bd6c75f8-c9b5-c29f-a691-101fee33cef5@suse.com> (raw)
In-Reply-To: <20190729074337.10573-2-wqu@suse.com>
On 29.07.19 г. 10:43 ч., Qu Wenruo wrote:
> This patch introduces the ability to check extent items.
>
> This check involves:
> - key->objectid check
> Basic alignment check.
>
> - key->type check
> Against btrfs_extent_item::type and SKINNY_METADATA feature.
>
> - key->offset alignment check for EXTENT_ITEM
>
> - key->offset check for METADATA_ITEM
>
> - item size check
> Both against minimal size and stepping check.
>
> - btrfs_extent_item check
> Checks its flags and generation.
>
> - btrfs_extent_inline_ref checks
> Against 4 types inline ref.
> Checks bytenr alignment and tree level.
>
> - btrfs_extent_item::refs check
> Check against total refs found in inline refs.
>
> This check would be the most complex single item check due to its nature
> of inlined items.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/tree-checker.c | 249 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 249 insertions(+)
>
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index 71bbbce3076d..1e97bb127893 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -899,6 +899,251 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
> return 0;
> }
>
> +__printf(3,4)
> +__cold
> +static void extent_err(const struct extent_buffer *eb, int slot,
> + const char *fmt, ...)
> +{
> + struct btrfs_key key;
> + struct va_format vaf;
> + va_list args;
> + u64 bytenr;
> + u64 len;
> +
> + btrfs_item_key_to_cpu(eb, &key, slot);
> + bytenr = key.objectid;
> + if (key.type == BTRFS_METADATA_ITEM_KEY)
> + len = eb->fs_info->nodesize;
> + else
> + len = key.offset;
> + va_start(args, fmt);
> +
> + vaf.fmt = fmt;
> + vaf.va = &args;
> +
> + btrfs_crit(eb->fs_info,
> + "corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
> + btrfs_header_level(eb) == 0 ? "leaf" : "node",
> + eb->start, slot, bytenr, len, &vaf);
> + va_end(args);
> +}
> +
> +static int check_extent_item(struct extent_buffer *leaf,
> + struct btrfs_key *key, int slot)
> +{
> + struct btrfs_fs_info *fs_info = leaf->fs_info;
> + struct btrfs_extent_item *ei;
> + bool is_tree_block = false;
> + u64 ptr; /* Current pointer inside inline refs */
> + u64 end; /* Extent item end */
> + u32 item_size = btrfs_item_size_nr(leaf, slot);
> + u64 flags;
> + u64 generation;
> + u64 total_refs; /* Total refs in btrfs_extent_item */
> + u64 inline_refs = 0; /* found total inline refs */
> +
> + if (key->type == BTRFS_METADATA_ITEM_KEY &&
> + !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
> + generic_err(leaf, slot,
> +"invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
> + return -EUCLEAN;
> + }
> + /* key->objectid is the bytenr for both key types */
> + if (!IS_ALIGNED(key->objectid, fs_info->sectorsize)) {
> + generic_err(leaf, slot,
> +"invalid key objectid, have %llu expect to be aligned to %u",
> + key->objectid, fs_info->sectorsize);
> + return -EUCLEAN;
> + }
> +
> + /* key->offset is tree level for METADATA_ITEM_KEY */
> + if (key->type == BTRFS_METADATA_ITEM_KEY) {
> + if (key->offset >= BTRFS_MAX_LEVEL) {
make it a compound statement:
if key->type && key->offset. The extra if doesn't bring anything...
> + extent_err(leaf, slot,
> + "invalid tree level, have %llu expect [0, %u]",
> + key->offset, BTRFS_MAX_LEVEL - 1);
> + return -EUCLEAN;
> + }
> + }
> +
> + /*
> + * EXTENT/METADATA_ITEM is consistent of:
> + * 1) One btrfs_extent_item
> + * Records the total refs, type and generation of the extent.
> + *
> + * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
> + * Records the first key and level of the tree block.
> + *
> + * 2) *Zero* or more btrfs_extent_inline_ref(s)
> + * Each inline ref has one btrfs_extent_inline_ref shows:
> + * 2.1) The ref type, one of the 4
> + * TREE_BLOCK_REF Tree block only
> + * SHARED_BLOCK_REF Tree block only
> + * EXTENT_DATA_REF Data only
> + * SHARED_DATA_REF Data only
> + * 2.2) Ref type specific data
> + * Either using btrfs_extent_inline_ref::offset, or specific
> + * data structure.
> + */
> + if (item_size < sizeof(*ei)) {
> + extent_err(leaf, slot,
> + "invalid item size, have %u expect >= %lu",
> + item_size, sizeof(*ei));
> + return -EUCLEAN;
> + }
> + end = item_size + btrfs_item_ptr_offset(leaf, slot);
> +
> + /* Checks against extent_item */
> + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
> + flags = btrfs_extent_flags(leaf, ei);
> + total_refs = btrfs_extent_refs(leaf, ei);
> + generation = btrfs_extent_generation(leaf, ei);
> + if (generation > btrfs_super_generation(fs_info->super_copy) + 1) {
> + extent_err(leaf, slot,
> + "invalid generation, have %llu expect <= %llu",
nit: I find the '<= [number]' somewhat confusing, wouldn't it be better
if it's spelled our e.g 'expecting less than or equal than [number]'.
Might be just me.
> + generation,
> + btrfs_super_generation(fs_info->super_copy) + 1);
> + return -EUCLEAN;
> + }
> + if (!is_power_of_2(flags & (BTRFS_EXTENT_FLAG_DATA |
> + BTRFS_EXTENT_FLAG_TREE_BLOCK))) {
> + extent_err(leaf, slot,
> + "invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
> + flags, BTRFS_EXTENT_FLAG_DATA |
> + BTRFS_EXTENT_FLAG_TREE_BLOCK);
> + return -EUCLEAN;
> + }
> + is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
> + if (is_tree_block && key->type == BTRFS_EXTENT_ITEM_KEY &&
> + key->offset != fs_info->nodesize) {
> + extent_err(leaf, slot,
> + "invalid extent length, have %llu expect %u",
> + key->offset, fs_info->nodesize);
> + return -EUCLEAN;
> + }
> + if (!is_tree_block) {
> + if (key->type != BTRFS_EXTENT_ITEM_KEY) {
> + extent_err(leaf, slot,
> + "invalid key type, have %u expect %u for data backref",
> + key->type, BTRFS_EXTENT_ITEM_KEY);
> + return -EUCLEAN;
> + }
> + if (!IS_ALIGNED(key->offset, fs_info->sectorsize)) {
> + extent_err(leaf, slot,
> + "invalid extent length, have %llu expect aliged to %u",
> + key->offset, fs_info->sectorsize);
> + return -EUCLEAN;
> + }
> + }
unify the two is_tree_block/!is_tree_block either in:
if (is_tree_block) {
if (keypt->type = EXTENT_ITEM_KEY && offset != nodesize {
foo;
}
} else {
bar;
}
or
if (is_tree_block && key->type == BTRFS_EXTENT_ITEM_KEY ..) {
} else if (!is_tree_block) {
bar
}
> + ptr = (u64)(struct btrfs_extent_item *)(ei + 1);
> +
> + /* Check the special case of btrfs_tree_block_info */
> + if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
> + struct btrfs_tree_block_info *info;
> +
> + info = (struct btrfs_tree_block_info *)ptr;
> + if (btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL) {
> + extent_err(leaf, slot,
> + "invalid tree block info level, have %u expect [0, %u)",
nit: Strictly speaking using [0, 7) is wrong, because ')' already
implies an open interval. Since we have 8 levels 0/7 inclusive this
means the correct way to print it would be [0, 7] or [0, 8).
> + btrfs_tree_block_level(leaf, info),
> + BTRFS_MAX_LEVEL - 1);
> + return -EUCLEAN;
> + }
> + ptr = (u64)(struct btrfs_tree_block_info *)(info + 1);
> + }
> +
> + /* Check inline refs */
> + while (ptr < end) {
> + struct btrfs_extent_inline_ref *iref;
> + struct btrfs_extent_data_ref *dref;
> + struct btrfs_shared_data_ref *sref;
> + u64 dref_offset;
> + u64 inline_offset;
> + u8 inline_type;
> +
> + if (ptr + sizeof(*iref) > end) {
> + extent_err(leaf, slot,
> + "invalid item size, size too small, ptr %llu end %llu",
> + ptr, end);
> + goto err;
> + }
> + iref = (struct btrfs_extent_inline_ref *)ptr;
> + inline_type = btrfs_extent_inline_ref_type(leaf, iref);
> + inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
> + if (ptr + btrfs_extent_inline_ref_size(inline_type) > end) {
> + extent_err(leaf, slot,
> + "invalid item size, size too small, ptr %llu end %llu",
Make that text explicit:
"inline ref item overflows extent item" or some such.
> + ptr, end);
> + goto err;
> + }
> +
> + switch (inline_type) {
> + /* inline_offset is subvolid of the owner, no need to check */
> + case BTRFS_TREE_BLOCK_REF_KEY:
> + inline_refs++;
> + break;
> + /* contains parent bytenr */
> + case BTRFS_SHARED_BLOCK_REF_KEY:
> + if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
> + extent_err(leaf, slot,
> + "invalid tree parent bytenr, have %llu expect aligned to %u",
> + inline_offset, fs_info->sectorsize);
> + goto err;
> + }
> + inline_refs++;
> + break;
> + /*
> + * contains owner subvolid, owner key objectid, adjusted offset.
> + * the only obvious corruption can happen in that offset.
> + */
> + case BTRFS_EXTENT_DATA_REF_KEY:
> + dref = (struct btrfs_extent_data_ref *)(&iref->offset);
> + dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
> + if (!IS_ALIGNED(dref_offset, fs_info->sectorsize)) {
> + extent_err(leaf, slot,
> + "invalid data ref offset, have %llu expect aligned to %u",
> + dref_offset, fs_info->sectorsize);
> + goto err;
> + }
> + inline_refs += btrfs_extent_data_ref_count(leaf, dref);
> + break;
> + /* contains parent bytenr and ref count */
> + case BTRFS_SHARED_DATA_REF_KEY:
> + sref = (struct btrfs_shared_data_ref *)(iref + 1);
> + if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
> + extent_err(leaf, slot,
> + "invalid data parent bytenr, have %llu expect aligned to %u",
> + inline_offset, fs_info->sectorsize);
> + goto err;
> + }
> + inline_refs += btrfs_shared_data_ref_count(leaf, sref);
> + break;
> + default:
> + extent_err(leaf, slot, "unknown inline ref type: %u",
> + inline_type);
> + goto err;
> + }
> + ptr += btrfs_extent_inline_ref_size(inline_type);
> + }
> + /* No padding is allowed */
> + if (ptr != end) {
> + extent_err(leaf, slot,
> + "invalid extent item size, padding bytes found");
> + goto err;
> + }
> +
> + /* Finally, check the inline refs against total refs */
> + if (total_refs < inline_refs) {
nit: if (inline_refs > total_refs) {} looks saner to me.
> + extent_err(leaf, slot,
> + "invalid extent refs, have %llu expect >= %llu",
> + total_refs, inline_refs);
> + goto err;
> + }
> + return 0;
> +err:
> + return -EUCLEAN;
> +}
> +
> /*
> * Common point to switch the item-specific validation.
> */
> @@ -937,6 +1182,10 @@ static int check_leaf_item(struct extent_buffer *leaf,
> case BTRFS_ROOT_ITEM_KEY:
> ret = check_root_item(leaf, key, slot);
> break;
> + case BTRFS_EXTENT_ITEM_KEY:
> + case BTRFS_METADATA_ITEM_KEY:
> + ret = check_extent_item(leaf, key, slot);
> + break;
> }
> return ret;
> }
>
next prev parent reply other threads:[~2019-07-29 13:42 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-29 7:43 [PATCH 0/3] btrfs: tree-checker: Add extent items check Qu Wenruo
2019-07-29 7:43 ` [PATCH 1/3] btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check Qu Wenruo
2019-07-29 13:42 ` Nikolay Borisov [this message]
2019-07-29 13:53 ` Qu Wenruo
2019-07-29 7:43 ` [PATCH 2/3] btrfs: tree-checker: Add simple keyed refs check Qu Wenruo
2019-07-29 7:43 ` [PATCH 3/3] btrfs: tree-checker: Add EXTENT_DATA_REF check Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bd6c75f8-c9b5-c29f-a691-101fee33cef5@suse.com \
--to=nborisov@suse.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=wqu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).