From: Josef Bacik <josef@toxicpanda.com>
To: Mark Harmstone <maharmstone@fb.com>
Cc: linux-btrfs@vger.kernel.org, Mark Harmstone <maharmstone@meta.com>
Subject: Re: [PATCH] btrfs-progs: add rudimentary log checking
Date: Wed, 3 Jul 2024 13:13:59 -0400 [thread overview]
Message-ID: <20240703171359.GA736953@perftesting> (raw)
In-Reply-To: <20240703162925.493914-1-maharmstone@fb.com>
On Wed, Jul 03, 2024 at 05:28:32PM +0100, Mark Harmstone wrote:
> From: Mark Harmstone <maharmstone@meta.com>
>
> Currently the transaction log is more or less ignored by btrfs check,
> meaning that it's possible for a FS with a corrupt log to pass btrfs
> check, but be immediately corrupted by the kernel when it's mounted.
>
> This patch adds a check that if there's an inode in the log, any pending
> non-compressed writes also have corresponding csum entries.
>
> Signed-off-by: Mark Harmstone <maharmstone@meta.com>
> ---
> check/main.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 281 insertions(+), 12 deletions(-)
>
> diff --git a/check/main.c b/check/main.c
> index 83c721d3..6f3fab35 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -9787,6 +9787,263 @@ static int zero_log_tree(struct btrfs_root *root)
> return ret;
> }
>
> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
> +{
> + struct btrfs_path path = { 0 };
> + struct btrfs_key key;
> + struct extent_buffer *leaf;
> + u16 csum_size = gfs_info->csum_size;
> + u16 num_entries;
> + u64 data_len;
> + int ret;
> +
> + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
> + key.type = BTRFS_EXTENT_CSUM_KEY;
> + key.offset = addr;
> +
> + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> + if (ret < 0)
> + return ret;
> +
> + if (ret > 0 && path.slots[0])
> + path.slots[0]--;
> +
> + ret = 0;
> +
> + while (1) {
> + leaf = path.nodes[0];
> + if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> + ret = btrfs_next_leaf(root, &path);
> + if (ret) {
> + if (ret > 0)
> + ret = 0;
> +
> + break;
> + }
> + leaf = path.nodes[0];
> + }
> +
> + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> + if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
> + break;
> +
> + if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
> + key.type == BTRFS_EXTENT_CSUM_KEY) {
> + if (key.offset >= addr + length)
> + break;
> +
You can turn this into
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
key.type != BTRFS_EXTENT_CSUM_KEY)
goto next;
if (key.offset >= addr + length)
break;
and put a next at path.slots[0]++;
and then you don't have to indent the bits below.
> + num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
> + data_len = num_entries * gfs_info->sectorsize;
> +
> + if (addr >= key.offset && addr + length <= key.offset + data_len) {
> + u64 end = min(addr + length, key.offset + data_len);
> +
> + length = addr + length - end;
> + addr = end;
> +
> + if (length == 0)
> + break;
> + }
> + }
> +
> + path.slots[0]++;
> + }
> +
> + btrfs_release_path(&path);
> +
> + if (ret >= 0)
> + ret = length == 0 ? 1 : 0;
> +
> + return ret;
> +}
> +
> +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
> + struct walk_control *wc)
> +{
> + struct btrfs_path path = { 0 };
> + struct btrfs_key key;
> + struct extent_buffer *leaf;
> + int ret, err = 0;
> + u64 last_csum_inode = 0;
> +
> + key.objectid = BTRFS_FIRST_FREE_OBJECTID;
> + key.type = BTRFS_INODE_ITEM_KEY;
> + key.offset = 0;
> + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> + if (ret < 0)
> + return 1;
> +
> + while (1) {
> + leaf = path.nodes[0];
> + if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> + ret = btrfs_next_leaf(root, &path);
> + if (ret) {
> + if (ret < 0)
> + err = 1;
> +
> + break;
> + }
> + leaf = path.nodes[0];
> + }
> + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> + if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
> + break;
> +
> + if (key.type == BTRFS_INODE_ITEM_KEY) {
> + struct btrfs_inode_item *item;
> +
> + item = btrfs_item_ptr(leaf, path.slots[0],
> + struct btrfs_inode_item);
> +
> + if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
> + last_csum_inode = key.objectid;
> + } else if (key.type == BTRFS_EXTENT_DATA_KEY &&
> + key.objectid == last_csum_inode) {
> + struct btrfs_file_extent_item *fi;
> + u64 addr, length;
> +
> + fi = btrfs_item_ptr(leaf, path.slots[0],
> + struct btrfs_file_extent_item);
> +
> + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
> + goto next;
> +
> + if (btrfs_file_extent_compression(leaf, fi) != 0)
> + goto next;
Compressed file extents should definitely have a csum associated with them.
> +
> + addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
> + btrfs_file_extent_offset(leaf, fi);
> + length = btrfs_file_extent_disk_num_bytes(leaf, fi);
> +
> + ret = check_log_csum(root, addr, length);
> + if (ret < 0) {
> + err = 1;
> + break;
> + }
> +
> + if (!ret) {
> + error("csum missing in log (root %llu, inode %llu, "
> + "offset %llu, address 0x%llx, length %llu)",
> + root->objectid, last_csum_inode, key.offset,
> + addr, length);
> + err = 1;
> + }
> + }
> +
> +next:
> + path.slots[0]++;
> + }
> +
> + btrfs_release_path(&path);
> +
> + return err;
> +}
> +
> +static int check_log(struct cache_tree *root_cache)
> +{
> + struct btrfs_path path = { 0 };
> + struct walk_control wc;
> +
> + memset(&wc, 0, sizeof(wc));
You can just do
struct walk_contro wc = { 0 };
as well.
> + cache_tree_init(&wc.shared);
> +
> + struct btrfs_key key;
> + struct extent_buffer *leaf;
> + struct btrfs_root *log_root = gfs_info->log_root_tree;
> + int ret;
> + int err = 0;
We tend to prefer the declarations first, then code, so move this above please.
> +
> + key.objectid = BTRFS_TREE_LOG_OBJECTID;
> + key.type = BTRFS_ROOT_ITEM_KEY;
> + key.offset = 0;
> + ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
> + if (ret < 0) {
> + err = 1;
> + goto out;
> + }
> +
> + while (1) {
> + leaf = path.nodes[0];
> + if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> + ret = btrfs_next_leaf(log_root, &path);
> + if (ret) {
> + if (ret < 0)
> + err = 1;
> + break;
> + }
> + leaf = path.nodes[0];
> + }
> + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> + if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
> + key.type > BTRFS_ROOT_ITEM_KEY)
> + break;
> +
> + if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
> + key.type == BTRFS_ROOT_ITEM_KEY &&
> + fs_root_objectid(key.offset)) {
> + struct btrfs_root tmp_root;
> + struct extent_buffer *l;
> + struct btrfs_tree_parent_check check = { 0 };
> +
> + memset(&tmp_root, 0, sizeof(tmp_root));
> +
> + btrfs_setup_root(&tmp_root, gfs_info, key.offset);
> +
> + l = path.nodes[0];
> + read_extent_buffer(l, &tmp_root.root_item,
> + btrfs_item_ptr_offset(l, path.slots[0]),
> + sizeof(tmp_root.root_item));
> +
> + tmp_root.root_key.objectid = key.offset;
> + tmp_root.root_key.type = BTRFS_ROOT_ITEM_KEY;
> + tmp_root.root_key.offset = 0;
> +
> + check.owner_root = btrfs_root_id(&tmp_root);
> + check.transid = btrfs_root_generation(&tmp_root.root_item);
> + check.level = btrfs_root_level(&tmp_root.root_item);
> +
> + tmp_root.node = read_tree_block(gfs_info,
> + btrfs_root_bytenr(&tmp_root.root_item),
> + &check);
> + if (IS_ERR(tmp_root.node)) {
> + tmp_root.node = NULL;
> + err = 1;
> + goto next;
> + }
> +
> + if (btrfs_header_level(tmp_root.node) != btrfs_root_level(&tmp_root.root_item)) {
> + error("root [%llu %llu] level %d does not match %d",
> + tmp_root.root_key.objectid,
> + tmp_root.root_key.offset,
> + btrfs_header_level(tmp_root.node),
> + btrfs_root_level(&tmp_root.root_item));
> + err = 1;
> + goto next;
> + }
Turn the above into a helper. Thanks,
Josef
next prev parent reply other threads:[~2024-07-03 17:14 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-03 16:28 [PATCH] btrfs-progs: add rudimentary log checking Mark Harmstone
2024-07-03 17:13 ` Josef Bacik [this message]
2024-07-03 23:14 ` Qu Wenruo
2024-07-11 14:12 ` Mark Harmstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240703171359.GA736953@perftesting \
--to=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=maharmstone@fb.com \
--cc=maharmstone@meta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox