* [PATCH 1/2] btrfs: concentrate all tree block parentness check parameters into one structure
2022-09-14 5:32 [PATCH 0/2] btrfs: do most metadata parentnesss check at endio time Qu Wenruo
@ 2022-09-14 5:32 ` Qu Wenruo
2022-11-11 16:09 ` David Sterba
2022-09-14 5:32 ` [PATCH 2/2] btrfs: move tree block parentness check into validate_extent_buffer() Qu Wenruo
2022-11-11 16:07 ` [PATCH 0/2] btrfs: do most metadata parentnesss check at endio time David Sterba
2 siblings, 1 reply; 5+ messages in thread
From: Qu Wenruo @ 2022-09-14 5:32 UTC (permalink / raw)
To: linux-btrfs
There are several different tree block parentness check parameters used
across several helpers:
- level
Mandatory
- transid
Under most cases it's mandatory, but there are several backref cases
which skips this check.
- owner_root
- first_key
Utilized by most top-down tree search routine. Otherwise can be
skipped.
Those four members are not always mandatory checks, and some of them are
the same u64, which means if some arguments got swapped compiler will
not catch it.
Furthermore if we're going to further expand the parentness check, we
need to modify quite some helpers just to add one more parameter.
This patch will concentrate all these members into a structure called
btrfs_tree_parent_check, and pass that structure for the following
helpers:
- btrfs_read_extent_buffer()
- read_tree_block()
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/backref.c | 15 ++++++++---
fs/btrfs/ctree.c | 28 +++++++++++--------
fs/btrfs/disk-io.c | 59 ++++++++++++++++++++++++-----------------
fs/btrfs/disk-io.h | 36 ++++++++++++++++++++++---
fs/btrfs/extent-tree.c | 12 ++++++---
fs/btrfs/print-tree.c | 13 +++++----
fs/btrfs/qgroup.c | 18 ++++++++++---
fs/btrfs/relocation.c | 11 +++++---
fs/btrfs/tree-log.c | 25 ++++++++++++-----
fs/btrfs/tree-mod-log.c | 9 +++++--
10 files changed, 158 insertions(+), 68 deletions(-)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 9d06f8c18b15..abaed23edc7c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -777,6 +777,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
struct rb_node *node;
while ((node = rb_first_cached(&tree->root))) {
+ struct btrfs_tree_parent_check check = {0};
+
ref = rb_entry(node, struct prelim_ref, rbnode);
rb_erase_cached(node, &tree->root);
@@ -784,8 +786,10 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(ref->key_for_search.type);
BUG_ON(!ref->wanted_disk_byte);
- eb = read_tree_block(fs_info, ref->wanted_disk_byte,
- ref->root_id, 0, ref->level - 1, NULL);
+ check.level = ref->level - 1;
+ check.owner_root = ref->root_id;
+
+ eb = read_tree_block(fs_info, ref->wanted_disk_byte, &check);
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
@@ -1330,10 +1334,13 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
if (ref->count && ref->parent) {
if (extent_item_pos && !ref->inode_list &&
ref->level == 0) {
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *eb;
- eb = read_tree_block(fs_info, ref->parent, 0,
- 0, ref->level, NULL);
+ check.level = ref->level;
+
+ eb = read_tree_block(fs_info, ref->parent,
+ &check);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ebfa35fe1c38..44dd9ed3fe63 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -834,19 +834,22 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
int slot)
{
int level = btrfs_header_level(parent);
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *eb;
- struct btrfs_key first_key;
if (slot < 0 || slot >= btrfs_header_nritems(parent))
return ERR_PTR(-ENOENT);
BUG_ON(level == 0);
- btrfs_node_key_to_cpu(parent, &first_key, slot);
+ check.level = level - 1;
+ check.transid = btrfs_node_ptr_generation(parent, slot);
+ check.owner_root = btrfs_header_owner(parent);
+ check.has_first_key = true;
+ btrfs_node_key_to_cpu(parent, &check.first_key, slot);
+
eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
- btrfs_header_owner(parent),
- btrfs_node_ptr_generation(parent, slot),
- level - 1, &first_key);
+ &check);
if (IS_ERR(eb))
return eb;
if (!extent_buffer_uptodate(eb)) {
@@ -1405,10 +1408,10 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
const struct btrfs_key *key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_tree_parent_check check = {0};
u64 blocknr;
u64 gen;
struct extent_buffer *tmp;
- struct btrfs_key first_key;
int ret;
int parent_level;
bool unlock_up;
@@ -1417,7 +1420,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
blocknr = btrfs_node_blockptr(*eb_ret, slot);
gen = btrfs_node_ptr_generation(*eb_ret, slot);
parent_level = btrfs_header_level(*eb_ret);
- btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
+ btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot);
+ check.has_first_key = true;
+ check.level = parent_level - 1;
+ check.transid = gen;
+ check.owner_root = root->root_key.objectid;
/*
* If we need to read an extent buffer from disk and we are holding locks
@@ -1439,7 +1446,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* parents (shared tree blocks).
*/
if (btrfs_verify_level_key(tmp,
- parent_level - 1, &first_key, gen)) {
+ parent_level - 1, &check.first_key, gen)) {
free_extent_buffer(tmp);
return -EUCLEAN;
}
@@ -1451,7 +1458,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_unlock_up_safe(p, level + 1);
/* now we're allowed to do a blocking uptodate check */
- ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key);
+ ret = btrfs_read_extent_buffer(tmp, &check);
if (ret) {
free_extent_buffer(tmp);
btrfs_release_path(p);
@@ -1479,8 +1486,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
- tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
- gen, parent_level - 1, &first_key);
+ tmp = read_tree_block(fs_info, blocknr, &check);
if (IS_ERR(tmp)) {
btrfs_release_path(p);
return PTR_ERR(tmp);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 54b5784a59e5..80aa0ba4ac55 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -253,13 +253,11 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*
- * @parent_transid: expected transid, skip check if 0
- * @level: expected level, mandatory check
- * @first_key: expected key of first slot, skip check if NULL
+ * @check: expected tree parentness check, see the comments of the
+ * structure for details.
*/
int btrfs_read_extent_buffer(struct extent_buffer *eb,
- u64 parent_transid, int level,
- struct btrfs_key *first_key)
+ struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct extent_io_tree *io_tree;
@@ -269,16 +267,20 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
int mirror_num = 0;
int failed_mirror = 0;
+ ASSERT(check);
+
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
if (!ret) {
if (verify_parent_transid(io_tree, eb,
- parent_transid, 0))
+ check->transid, 0))
ret = -EIO;
- else if (btrfs_verify_level_key(eb, level,
- first_key, parent_transid))
+ else if (btrfs_verify_level_key(eb, check->level,
+ check->has_first_key ?
+ &check->first_key : NULL,
+ check->transid))
ret = -EUCLEAN;
else
break;
@@ -922,28 +924,28 @@ struct extent_buffer *btrfs_find_create_tree_block(
* Read tree block at logical address @bytenr and do variant basic but critical
* verification.
*
- * @owner_root: the objectid of the root owner for this block.
- * @parent_transid: expected transid of this tree block, skip check if 0
- * @level: expected level, mandatory check
- * @first_key: expected key in slot 0, skip check if NULL
+ * @check: expected tree parentness check, see comments of the
+ * structure for details.
*/
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 owner_root, u64 parent_transid,
- int level, struct btrfs_key *first_key)
+ struct btrfs_tree_parent_check *check)
{
struct extent_buffer *buf = NULL;
int ret;
- buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
+ ASSERT(check);
+
+ buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root,
+ check->level);
if (IS_ERR(buf))
return buf;
- ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key);
+ ret = btrfs_read_extent_buffer(buf, check);
if (ret) {
free_extent_buffer_stale(buf);
return ERR_PTR(ret);
}
- if (btrfs_check_eb_owner(buf, owner_root)) {
+ if (btrfs_check_eb_owner(buf, check->owner_root)) {
free_extent_buffer_stale(buf);
return ERR_PTR(-EUCLEAN);
}
@@ -1355,6 +1357,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_key *key)
{
struct btrfs_root *root;
+ struct btrfs_tree_parent_check check = {0};
struct btrfs_fs_info *fs_info = tree_root->fs_info;
u64 generation;
int ret;
@@ -1374,9 +1377,12 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
generation = btrfs_root_generation(&root->root_item);
level = btrfs_root_level(&root->root_item);
+ check.level = level;
+ check.transid = generation;
+ check.owner_root = key->objectid;
root->node = read_tree_block(fs_info,
btrfs_root_bytenr(&root->root_item),
- key->objectid, generation, level, NULL);
+ &check);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
@@ -2351,6 +2357,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices)
{
int ret;
+ struct btrfs_tree_parent_check check = {0};
struct btrfs_root *log_tree_root;
struct btrfs_super_block *disk_super = fs_info->super_copy;
u64 bytenr = btrfs_super_log_root(disk_super);
@@ -2366,10 +2373,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
if (!log_tree_root)
return -ENOMEM;
- log_tree_root->node = read_tree_block(fs_info, bytenr,
- BTRFS_TREE_LOG_OBJECTID,
- fs_info->generation + 1, level,
- NULL);
+ check.level = level;
+ check.transid = fs_info->generation + 1;
+ check.owner_root = BTRFS_TREE_LOG_OBJECTID;
+ log_tree_root->node = read_tree_block(fs_info, bytenr, &check);
if (IS_ERR(log_tree_root->node)) {
btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
@@ -2845,10 +2852,14 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
{
+ struct btrfs_tree_parent_check check = {0};
int ret = 0;
- root->node = read_tree_block(root->fs_info, bytenr,
- root->root_key.objectid, gen, level, NULL);
+ check.level = level;
+ check.transid = gen;
+ check.owner_root = root->root_key.objectid;
+
+ root->node = read_tree_block(root->fs_info, bytenr, &check);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 7e545ec09a10..e415d44a6948 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -25,6 +25,35 @@ static inline u64 btrfs_sb_offset(int mirror)
return BTRFS_SUPER_INFO_OFFSET;
}
+/* All the extra info needed to verify the parentness of a tree block. */
+struct btrfs_tree_parent_check {
+ /*
+ * The owner check against the tree block.
+ *
+ * Can be 0 to skip the owner check.
+ */
+ u64 owner_root;
+
+ /*
+ * Expected transid, can be 0 to skip the check, but such skip
+ * should only be utlized for backref walk related code.
+ */
+ u64 transid;
+
+ /*
+ * The expected first key.
+ *
+ * This check can be skipped if @has_first_key is false, such skip
+ * can happen for case where we don't have the parent node key,
+ * e.g. reading the tree root, doing backref walk.
+ */
+ struct btrfs_key first_key;
+ bool has_first_key;
+
+ /* The expected level. Should always be set. */
+ u8 level;
+};
+
struct btrfs_device;
struct btrfs_fs_devices;
@@ -33,8 +62,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 owner_root, u64 parent_transid,
- int level, struct btrfs_key *first_key);
+ struct btrfs_tree_parent_check *check);
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root,
@@ -114,8 +142,8 @@ void btrfs_put_root(struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
-int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
- int level, struct btrfs_key *first_key);
+int btrfs_read_extent_buffer(struct extent_buffer *buf,
+ struct btrfs_tree_parent_check *check);
bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9818285dface..fe2157e1bdf4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5245,8 +5245,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 bytenr;
u64 generation;
u64 parent;
+ struct btrfs_tree_parent_check check = {0};
struct btrfs_key key;
- struct btrfs_key first_key;
struct btrfs_ref ref = { 0 };
struct extent_buffer *next;
int level = wc->level;
@@ -5268,7 +5268,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
- btrfs_node_key_to_cpu(path->nodes[level], &first_key,
+
+ check.level = level - 1;
+ check.transid = generation;
+ check.owner_root = root->root_key.objectid;
+ check.has_first_key = true;
+ btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
path->slots[level]);
next = find_extent_buffer(fs_info, bytenr);
@@ -5330,8 +5335,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (!next) {
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
- next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
- generation, level - 1, &first_key);
+ next = read_tree_block(fs_info, bytenr, &check);
if (IS_ERR(next)) {
return PTR_ERR(next);
} else if (!extent_buffer_uptodate(next)) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index dd8777872143..9a2d1ec5d3fe 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -384,14 +384,17 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
if (!follow)
return;
for (i = 0; i < nr; i++) {
- struct btrfs_key first_key;
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *next;
- btrfs_node_key_to_cpu(c, &first_key, i);
+ check.level = level - 1;
+ check.transid = btrfs_node_ptr_generation(c, i);
+ check.owner_root = btrfs_header_owner(c);
+ check.has_first_key = true;
+ btrfs_node_key_to_cpu(c, &check.first_key, i);
+
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
- btrfs_header_owner(c),
- btrfs_node_ptr_generation(c, i),
- level - 1, &first_key);
+ &check);
if (IS_ERR(next))
continue;
if (!extent_buffer_uptodate(next)) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index ba323dcb0a0b..ef74706ed670 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2305,7 +2305,13 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
return 0;
if (!extent_buffer_uptodate(root_eb)) {
- ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL);
+ struct btrfs_tree_parent_check check = {0};
+
+ check.has_first_key = false;
+ check.transid = root_gen;
+ check.level = root_level;
+
+ ret = btrfs_read_extent_buffer(root_eb, &check);
if (ret)
goto out;
}
@@ -4262,6 +4268,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct extent_buffer *subvol_eb)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_tree_parent_check check = {0};
struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
struct btrfs_qgroup_swapped_block *block;
struct extent_buffer *reloc_eb = NULL;
@@ -4310,10 +4317,13 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
blocks->swapped = swapped;
spin_unlock(&blocks->lock);
+ check.level = block->level;
+ check.transid = block->reloc_generation;
+ check.has_first_key = true;
+ memcpy(&check.first_key, &block->first_key, sizeof(check.first_key));
+
/* Read out reloc subtree root */
- reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0,
- block->reloc_generation, block->level,
- &block->first_key);
+ reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check);
if (IS_ERR(reloc_eb)) {
ret = PTR_ERR(reloc_eb);
reloc_eb = NULL;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 45c02aba2492..231284c15224 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2597,10 +2597,14 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
static int get_tree_block_key(struct btrfs_fs_info *fs_info,
struct tree_block *block)
{
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *eb;
- eb = read_tree_block(fs_info, block->bytenr, block->owner,
- block->key.offset, block->level, NULL);
+ check.level = block->level;
+ check.owner_root = block->owner;
+ check.transid = block->key.offset;
+
+ eb = read_tree_block(fs_info, block->bytenr, &check);
if (IS_ERR(eb))
return PTR_ERR(eb);
if (!extent_buffer_uptodate(eb)) {
@@ -3411,9 +3415,10 @@ int add_data_references(struct reloc_control *rc,
ULIST_ITER_INIT(&leaf_uiter);
while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *eb;
- eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
+ eb = read_tree_block(fs_info, ref_node->val, &check);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
break;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 020e01ea44bc..9749397273c2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -333,7 +333,12 @@ static int process_one_buffer(struct btrfs_root *log,
* pin down any logged extents, so we have to read the block.
*/
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
- ret = btrfs_read_extent_buffer(eb, gen, level, NULL);
+ struct btrfs_tree_parent_check check = {0};
+
+ check.level = level;
+ check.transid = gen;
+
+ ret = btrfs_read_extent_buffer(eb, &check);
if (ret)
return ret;
}
@@ -2430,13 +2435,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct walk_control *wc, u64 gen, int level)
{
int nritems;
+ struct btrfs_tree_parent_check check = {0};
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
int i;
int ret;
- ret = btrfs_read_extent_buffer(eb, gen, level, NULL);
+ check.transid = gen;
+ check.level = level;
+
+ ret = btrfs_read_extent_buffer(eb, &check);
if (ret)
return ret;
@@ -2616,7 +2625,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
int ret = 0;
while (*level > 0) {
- struct btrfs_key first_key;
+ struct btrfs_tree_parent_check check = {0};
cur = path->nodes[*level];
@@ -2628,7 +2637,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
- btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
+ check.transid = ptr_gen;
+ check.level = *level - 1;
+ check.has_first_key = true;
+ btrfs_node_key_to_cpu(cur, &check.first_key, path->slots[*level]);
blocksize = fs_info->nodesize;
next = btrfs_find_create_tree_block(fs_info, bytenr,
@@ -2647,8 +2659,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
path->slots[*level]++;
if (wc->free) {
- ret = btrfs_read_extent_buffer(next, ptr_gen,
- *level - 1, &first_key);
+ ret = btrfs_read_extent_buffer(next, &check);
if (ret) {
free_extent_buffer(next);
return ret;
@@ -2676,7 +2687,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
free_extent_buffer(next);
continue;
}
- ret = btrfs_read_extent_buffer(next, ptr_gen, *level - 1, &first_key);
+ ret = btrfs_read_extent_buffer(next, &check);
if (ret) {
free_extent_buffer(next);
return ret;
diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c
index 8a3a14686d3e..a2ee149b9cf2 100644
--- a/fs/btrfs/tree-mod-log.c
+++ b/fs/btrfs/tree-mod-log.c
@@ -819,10 +819,15 @@ struct extent_buffer *btrfs_get_old_root(struct btrfs_root *root, u64 time_seq)
tm = tree_mod_log_search(fs_info, logical, time_seq);
if (old_root && tm && tm->op != BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+ struct btrfs_tree_parent_check check = {0};
+
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
- old = read_tree_block(fs_info, logical, root->root_key.objectid,
- 0, level, NULL);
+
+ check.level = level;
+ check.owner_root = root->root_key.objectid;
+
+ old = read_tree_block(fs_info, logical, &check);
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
if (!IS_ERR(old))
free_extent_buffer(old);
--
2.37.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] btrfs: move tree block parentness check into validate_extent_buffer()
2022-09-14 5:32 [PATCH 0/2] btrfs: do most metadata parentnesss check at endio time Qu Wenruo
2022-09-14 5:32 ` [PATCH 1/2] btrfs: concentrate all tree block parentness check parameters into one structure Qu Wenruo
@ 2022-09-14 5:32 ` Qu Wenruo
2022-11-11 16:07 ` [PATCH 0/2] btrfs: do most metadata parentnesss check at endio time David Sterba
2 siblings, 0 replies; 5+ messages in thread
From: Qu Wenruo @ 2022-09-14 5:32 UTC (permalink / raw)
To: linux-btrfs
[BACKGROUND]
Although both btrfs metadata and data has their read time verification
done at endio time (btrfs_validate_metadata_buffer() and
btrfs_verify_data_csum()), metadata has extra verification, mostly
parentness check including first key/transid/owner_root/level, done at
read_tree_block() and btrfs_read_extent_buffer().
On the other hand, all data verification is done at endio context.
[ENHANCEMENT]
This patch will make a new union in btrfs_bio, taking the space of the
old data checksums, thus it will not increase the memory usage.
With that extra btrfs_tree_parent_check inside btrfs_bio, we can just
pass the check parameter into read_extent_buffer_pages(), and before
submitting the bio, we can copy the check structure into btrfs_bio.
And finally at endio time, we can grab btrfs_bio::parent_check and pass
it to validate_extent_buffer(), to move the remaining checks into it.
This brings the following benefits:
- Much simpler btrfs_read_extent_buffer()
Now it only needs to iterate through all mirrors.
- Simpler read-time transid check
Previously we go verify_parent_transid() after reading out the extent
buffer.
Now the transid check is done inside the endio function, no other
code can modify the content.
Thus no need to use the extent lock anymore.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/disk-io.c | 76 ++++++++++++++++++++++++++++++++------------
fs/btrfs/extent_io.c | 18 ++++++++---
fs/btrfs/extent_io.h | 5 +--
fs/btrfs/volumes.h | 25 +++++++++++++--
4 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 80aa0ba4ac55..0b7d297f9367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -260,7 +260,6 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- struct extent_io_tree *io_tree;
int failed = 0;
int ret;
int num_copies = 0;
@@ -269,22 +268,12 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
- io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
- if (!ret) {
- if (verify_parent_transid(io_tree, eb,
- check->transid, 0))
- ret = -EIO;
- else if (btrfs_verify_level_key(eb, check->level,
- check->has_first_key ?
- &check->first_key : NULL,
- check->transid))
- ret = -EUCLEAN;
- else
- break;
- }
+ ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num,
+ check);
+ if (!ret)
+ break;
num_copies = btrfs_num_copies(fs_info,
eb->start, eb->len);
@@ -460,7 +449,8 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
}
/* Do basic extent buffer checks at read time */
-static int validate_extent_buffer(struct extent_buffer *eb)
+static int validate_extent_buffer(struct extent_buffer *eb,
+ struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start;
@@ -470,6 +460,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
const u8 *header_csum;
int ret = 0;
+ ASSERT(check);
+
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
btrfs_err_rl(fs_info,
@@ -508,6 +500,45 @@ static int validate_extent_buffer(struct extent_buffer *eb)
goto out;
}
+ if (found_level != check->level) {
+ ret = -EIO;
+ goto out;
+ }
+ if (check->transid && btrfs_header_generation(eb) != check->transid) {
+ btrfs_err_rl(eb->fs_info,
+"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
+ eb->start, eb->read_mirror,
+ check->transid,
+ btrfs_header_generation(eb));
+ ret = -EIO;
+ goto out;
+ }
+ if (check->has_first_key) {
+ struct btrfs_key *expect_key = &check->first_key;
+ struct btrfs_key found_key;
+
+ if (found_level)
+ btrfs_node_key_to_cpu(eb, &found_key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &found_key, 0);
+ if (btrfs_comp_cpu_keys(expect_key, &found_key)) {
+ btrfs_err(fs_info,
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+ eb->start, check->transid,
+ expect_key->objectid,
+ expect_key->type, expect_key->offset,
+ found_key.objectid, found_key.type,
+ found_key.offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ if (check->owner_root) {
+ ret = btrfs_check_eb_owner(eb, check->owner_root);
+ if (ret < 0)
+ goto out;
+ }
+
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
* that we don't try and read the other copies of this block, just
@@ -532,13 +563,16 @@ static int validate_extent_buffer(struct extent_buffer *eb)
}
static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
- int mirror)
+ int mirror,
+ struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
struct extent_buffer *eb;
bool reads_done;
int ret = 0;
+ ASSERT(check);
+
/*
* We don't allow bio merge for subpage metadata read, so we should
* only get one eb for each endio hook.
@@ -562,7 +596,7 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
ret = -EIO;
goto err;
}
- ret = validate_extent_buffer(eb);
+ ret = validate_extent_buffer(eb, check);
if (ret < 0)
goto err;
@@ -592,7 +626,8 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
ASSERT(page->private);
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
- return validate_subpage_buffer(page, start, end, mirror);
+ return validate_subpage_buffer(page, start, end, mirror,
+ &bbio->parent_check);
eb = (struct extent_buffer *)page->private;
@@ -611,7 +646,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
ret = -EIO;
goto err;
}
- ret = validate_extent_buffer(eb);
+ ret = validate_extent_buffer(eb, &bbio->parent_check);
err:
if (ret) {
/*
@@ -761,6 +796,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
blk_status_t ret;
bio->bi_opf |= REQ_META;
+ bbio->is_metadata = 1;
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
btrfs_submit_bio(fs_info, bio, mirror_num);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2499a01f7638..abd8dbd80065 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6749,7 +6749,8 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
- int mirror_num)
+ int mirror_num,
+ struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct extent_io_tree *io_tree;
@@ -6761,6 +6762,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
ASSERT(PagePrivate(page));
+ ASSERT(check);
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
if (wait == WAIT_NONE) {
@@ -6801,6 +6803,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
*/
atomic_dec(&eb->io_pages);
}
+ memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE)
return ret;
@@ -6811,7 +6814,8 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
return ret;
}
-int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
+ struct btrfs_tree_parent_check *check)
{
int i;
struct page *page;
@@ -6837,7 +6841,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
return -EIO;
if (eb->fs_info->nodesize < PAGE_SIZE)
- return read_extent_buffer_subpage(eb, wait, mirror_num);
+ return read_extent_buffer_subpage(eb, wait, mirror_num, check);
num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
@@ -6914,6 +6918,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
}
}
+ memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check));
submit_one_bio(&bio_ctrl);
if (ret || wait != WAIT_COMPLETE)
@@ -7646,9 +7651,14 @@ int try_release_extent_buffer(struct page *page)
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root, u64 gen, int level)
{
+ struct btrfs_tree_parent_check check = {0};
struct extent_buffer *eb;
int ret;
+ check.has_first_key = 0;
+ check.level = level;
+ check.transid = gen;
+
eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
if (IS_ERR(eb))
return;
@@ -7658,7 +7668,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
return;
}
- ret = read_extent_buffer_pages(eb, WAIT_NONE, 0);
+ ret = read_extent_buffer_pages(eb, WAIT_NONE, 0, &check);
if (ret < 0)
free_extent_buffer_stale(eb);
else
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 69a86ae6fd50..d4b4794595b5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -63,6 +63,7 @@ struct btrfs_inode;
struct btrfs_fs_info;
struct io_failure_record;
struct extent_io_tree;
+struct btrfs_tree_parent_check;
typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num,
@@ -171,8 +172,8 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
#define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2
-int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
- int mirror_num);
+int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
+ struct btrfs_tree_parent_check *parent_check);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root, u64 gen, int level);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f19a1cd1bfcf..6e0d9565b36d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -10,6 +10,7 @@
#include <linux/sort.h>
#include <linux/btrfs.h>
#include "async-thread.h"
+#include "disk-io.h"
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
@@ -369,15 +370,31 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
* Mostly for btrfs specific features like csum and mirror_num.
*/
struct btrfs_bio {
- unsigned int mirror_num;
+ unsigned int mirror_num:7;
+
+ /*
+ * Extra indicator for metadata bios.
+ * For some btrfs bios they use pages without a mapping, thus
+ * we can not rely on page->mapping->host to determine if
+ * it's a metadata bio.
+ */
+ unsigned int is_metadata:1;
/* for direct I/O */
u64 file_offset;
/* @device is for stripe IO submission. */
struct btrfs_device *device;
- u8 *csum;
- u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+ union {
+ /* For data checksum verification. */
+ struct {
+ u8 *csum;
+ u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+ };
+
+ /* For metadata parentness verification. */
+ struct btrfs_tree_parent_check parent_check;
+ };
struct bvec_iter iter;
/* End I/O information supplied to btrfs_bio_alloc */
@@ -415,6 +432,8 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
{
+ if (bbio->is_metadata)
+ return;
if (bbio->csum != bbio->csum_inline) {
kfree(bbio->csum);
bbio->csum = NULL;
--
2.37.3
^ permalink raw reply related [flat|nested] 5+ messages in thread