All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: Mark Harmstone <maharmstone@fb.com>
Cc: linux-btrfs@vger.kernel.org, Mark Harmstone <maharmstone@meta.com>
Subject: Re: [PATCH] btrfs-progs: add rudimentary log checking
Date: Wed, 3 Jul 2024 13:13:59 -0400	[thread overview]
Message-ID: <20240703171359.GA736953@perftesting> (raw)
In-Reply-To: <20240703162925.493914-1-maharmstone@fb.com>

On Wed, Jul 03, 2024 at 05:28:32PM +0100, Mark Harmstone wrote:
> From: Mark Harmstone <maharmstone@meta.com>
> 
> Currently the transaction log is more or less ignored by btrfs check,
> meaning that it's possible for a FS with a corrupt log to pass btrfs
> check, but be immediately corrupted by the kernel when it's mounted.
> 
> This patch adds a check that if there's an inode in the log, any pending
> non-compressed writes also have corresponding csum entries.
> 
> Signed-off-by: Mark Harmstone <maharmstone@meta.com>
> ---
>  check/main.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 281 insertions(+), 12 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index 83c721d3..6f3fab35 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -9787,6 +9787,263 @@ static int zero_log_tree(struct btrfs_root *root)
>  	return ret;
>  }
>  
> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	u16 csum_size = gfs_info->csum_size;
> +	u16 num_entries;
> +	u64 data_len;
> +	int ret;
> +
> +	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
> +	key.type = BTRFS_EXTENT_CSUM_KEY;
> +	key.offset = addr;
> +
> +	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ret > 0 && path.slots[0])
> +		path.slots[0]--;
> +
> +	ret = 0;
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(root, &path);
> +			if (ret) {
> +				if (ret > 0)
> +					ret = 0;
> +
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
> +			break;
> +
> +		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
> +		    key.type == BTRFS_EXTENT_CSUM_KEY) {
> +			if (key.offset >= addr + length)
> +				break;
> +

You can turn this into

if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
    key.type != BTRFS_EXTENT_CSUM_KEY)
	goto next;

if (key.offset >= addr + length)
	break;

and put a next at path.slots[0]++;

and then you don't have to indent the bits below.

> +			num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
> +			data_len = num_entries * gfs_info->sectorsize;
> +
> +			if (addr >= key.offset && addr + length <= key.offset + data_len) {
> +				u64 end = min(addr + length, key.offset + data_len);
> +
> +				length = addr + length - end;
> +				addr = end;
> +
> +				if (length == 0)
> +					break;
> +			}
> +		}
> +
> +		path.slots[0]++;
> +	}
> +
> +	btrfs_release_path(&path);
> +
> +	if (ret >= 0)
> +		ret = length == 0 ? 1 : 0;
> +
> +	return ret;
> +}
> +
> +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
> +			  struct walk_control *wc)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	int ret, err = 0;
> +	u64 last_csum_inode = 0;
> +
> +	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
> +	key.type = BTRFS_INODE_ITEM_KEY;
> +	key.offset = 0;
> +	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +	if (ret < 0)
> +		return 1;
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(root, &path);
> +			if (ret) {
> +				if (ret < 0)
> +					err = 1;
> +
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
> +			break;
> +
> +		if (key.type == BTRFS_INODE_ITEM_KEY) {
> +			struct btrfs_inode_item *item;
> +
> +			item = btrfs_item_ptr(leaf, path.slots[0],
> +					      struct btrfs_inode_item);
> +
> +			if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
> +				last_csum_inode = key.objectid;
> +		} else if (key.type == BTRFS_EXTENT_DATA_KEY &&
> +			   key.objectid == last_csum_inode) {
> +			struct btrfs_file_extent_item *fi;
> +			u64 addr, length;
> +
> +			fi = btrfs_item_ptr(leaf, path.slots[0],
> +					    struct btrfs_file_extent_item);
> +
> +			if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
> +				goto next;
> +
> +			if (btrfs_file_extent_compression(leaf, fi) != 0)
> +				goto next;

Compressed file extents should definitely have a csum associated with them.

> +
> +			addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
> +				btrfs_file_extent_offset(leaf, fi);
> +			length = btrfs_file_extent_disk_num_bytes(leaf, fi);
> +
> +			ret = check_log_csum(root, addr, length);
> +			if (ret < 0) {
> +				err = 1;
> +				break;
> +			}
> +
> +			if (!ret) {
> +				error("csum missing in log (root %llu, inode %llu, "
> +				      "offset %llu, address 0x%llx, length %llu)",
> +				      root->objectid, last_csum_inode, key.offset,
> +				      addr, length);
> +				err = 1;
> +			}
> +		}
> +
> +next:
> +		path.slots[0]++;
> +	}
> +
> +	btrfs_release_path(&path);
> +
> +	return err;
> +}
> +
> +static int check_log(struct cache_tree *root_cache)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct walk_control wc;
> +
> +	memset(&wc, 0, sizeof(wc));

You can just do

struct walk_contro wc = { 0 };

as well.

> +	cache_tree_init(&wc.shared);
> +
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	struct btrfs_root *log_root = gfs_info->log_root_tree;
> +	int ret;
> +	int err = 0;

We tend to prefer the declarations first, then code, so move this above please.

> +
> +	key.objectid = BTRFS_TREE_LOG_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = 0;
> +	ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
> +	if (ret < 0) {
> +		err = 1;
> +		goto out;
> +	}
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(log_root, &path);
> +			if (ret) {
> +				if (ret < 0)
> +					err = 1;
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
> +		    key.type > BTRFS_ROOT_ITEM_KEY)
> +			break;
> +
> +		if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
> +		    key.type == BTRFS_ROOT_ITEM_KEY &&
> +		    fs_root_objectid(key.offset)) {
> +			struct btrfs_root tmp_root;
> +			struct extent_buffer *l;
> +			struct btrfs_tree_parent_check check = { 0 };
> +
> +			memset(&tmp_root, 0, sizeof(tmp_root));
> +
> +			btrfs_setup_root(&tmp_root, gfs_info, key.offset);
> +
> +			l = path.nodes[0];
> +			read_extent_buffer(l, &tmp_root.root_item,
> +					btrfs_item_ptr_offset(l, path.slots[0]),
> +					sizeof(tmp_root.root_item));
> +
> +			tmp_root.root_key.objectid = key.offset;
> +			tmp_root.root_key.type = BTRFS_ROOT_ITEM_KEY;
> +			tmp_root.root_key.offset = 0;
> +
> +			check.owner_root = btrfs_root_id(&tmp_root);
> +			check.transid = btrfs_root_generation(&tmp_root.root_item);
> +			check.level = btrfs_root_level(&tmp_root.root_item);
> +
> +			tmp_root.node = read_tree_block(gfs_info,
> +							btrfs_root_bytenr(&tmp_root.root_item),
> +							&check);
> +			if (IS_ERR(tmp_root.node)) {
> +				tmp_root.node = NULL;
> +				err = 1;
> +				goto next;
> +			}
> +
> +			if (btrfs_header_level(tmp_root.node) != btrfs_root_level(&tmp_root.root_item)) {
> +				error("root [%llu %llu] level %d does not match %d",
> +					tmp_root.root_key.objectid,
> +					tmp_root.root_key.offset,
> +					btrfs_header_level(tmp_root.node),
> +					btrfs_root_level(&tmp_root.root_item));
> +				err = 1;
> +				goto next;
> +			}

Turn the above into a helper.  Thanks,

Josef

  reply	other threads:[~2024-07-03 17:14 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-03 16:28 [PATCH] btrfs-progs: add rudimentary log checking Mark Harmstone
2024-07-03 17:13 ` Josef Bacik [this message]
2024-07-03 23:14 ` Qu Wenruo
2024-07-11 14:12   ` Mark Harmstone

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240703171359.GA736953@perftesting \
    --to=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=maharmstone@fb.com \
    --cc=maharmstone@meta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.