* [PATCH 1/3] btrfs-progs: record and report leaf/node corruption in fs/subvol tree
2014-12-04 2:52 [PATCH 0/3] Add repair function for btree and missing inode Qu Wenruo
@ 2014-12-04 2:52 ` Qu Wenruo
2014-12-04 2:52 ` [PATCH 2/3] btrfs-progs:fsck: Recover btree with some corrupted leaf/node Qu Wenruo
2014-12-04 2:52 ` [PATCH 3/3] btrfs-progs: Add inode item rebuild function Qu Wenruo
2 siblings, 0 replies; 8+ messages in thread
From: Qu Wenruo @ 2014-12-04 2:52 UTC (permalink / raw)
To: linux-btrfs
When leaf/node is corrupted in fs/subvolume root, btrfsck can ignore it
without much pain except some stderr messages complaining about it.
But this works fine doing read-only works, if we want to do deeper
recovery like rebuild missing inodes in the b+tree, it will cause
problem.
At least, info user that there is something wrong in the btree,
and this patch provides the base for later btree repair.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
cmds-check.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 50 insertions(+), 10 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index 627b794..02d9b32 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -1475,6 +1475,15 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
next = read_tree_block(root, bytenr, blocksize,
ptr_gen);
if (!next) {
+ struct btrfs_key node_key;
+
+ btrfs_node_key_to_cpu(path->nodes[*level],
+ &node_key,
+ path->slots[*level]);
+ btrfs_add_corrupt_extent_record(root->fs_info,
+ &node_key,
+ path->nodes[*level]->start,
+ root->leafsize, *level);
err = -EIO;
goto out;
}
@@ -2614,6 +2623,16 @@ static int process_root_ref(struct extent_buffer *eb, int slot,
return 0;
}
+static void free_corrupt_block(struct cache_extent *cache)
+{
+ struct btrfs_corrupt_block *corrupt;
+
+ corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+ free(corrupt);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
+
static int check_fs_root(struct btrfs_root *root,
struct cache_tree *root_cache,
struct walk_control *wc)
@@ -2626,8 +2645,17 @@ static int check_fs_root(struct btrfs_root *root,
struct shared_node root_node;
struct root_record *rec;
struct btrfs_root_item *root_item = &root->root_item;
+ struct cache_tree corrupt_blocks;
enum btrfs_tree_block_status status;
+ /*
+ * Reuse the corrupt_block cache tree to record corrupted tree block
+ *
+ * Unlike the usage in extent tree check, here we do it in a per
+ * fs/subvol tree base.
+ */
+ cache_tree_init(&corrupt_blocks);
+ root->fs_info->corrupt_blocks = &corrupt_blocks;
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
rec = get_root_rec(root_cache, root->root_key.objectid);
if (btrfs_root_refs(root_item) > 0)
@@ -2690,6 +2718,25 @@ static int check_fs_root(struct btrfs_root *root,
skip_walking:
btrfs_release_path(&path);
+ if (!cache_tree_empty(&corrupt_blocks)) {
+ struct cache_extent *cache;
+ struct btrfs_corrupt_block *corrupt;
+
+ printf("The following tree block(s) is corrupted in tree %llu:\n",
+ root->root_key.objectid);
+ cache = first_cache_extent(&corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache,
+ struct btrfs_corrupt_block,
+ cache);
+ printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
+ cache->start, corrupt->level,
+ corrupt->key.objectid, corrupt->key.type,
+ corrupt->key.offset);
+ cache = next_cache_extent(cache);
+ }
+ }
+
err = merge_root_recs(root, &root_node.root_cache, root_cache);
if (err < 0)
ret = err;
@@ -2703,6 +2750,9 @@ skip_walking:
err = check_inode_recs(root, &root_node.inode_cache);
if (!ret)
ret = err;
+
+ free_corrupt_blocks_tree(&corrupt_blocks);
+ root->fs_info->corrupt_blocks = NULL;
return ret;
}
@@ -6274,16 +6324,6 @@ static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
return 0;
}
-static void free_corrupt_block(struct cache_extent *cache)
-{
- struct btrfs_corrupt_block *corrupt;
-
- corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
- free(corrupt);
-}
-
-FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
-
static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_group_cache *cache;
--
2.1.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/3] btrfs-progs:fsck: Recover btree with some corrupted leaf/node.
2014-12-04 2:52 [PATCH 0/3] Add repair function for btree and missing inode Qu Wenruo
2014-12-04 2:52 ` [PATCH 1/3] btrfs-progs: record and report leaf/node corruption in fs/subvol tree Qu Wenruo
@ 2014-12-04 2:52 ` Qu Wenruo
2014-12-04 17:35 ` David Sterba
2014-12-04 2:52 ` [PATCH 3/3] btrfs-progs: Add inode item rebuild function Qu Wenruo
2 siblings, 1 reply; 8+ messages in thread
From: Qu Wenruo @ 2014-12-04 2:52 UTC (permalink / raw)
To: linux-btrfs
Current btrfsck can skip corrupted leaf and even repair some corrupted
one if their bytenr or key order is wrong.
However when it comes to csum error leaf, btrfsck can only skip them,
which is OK for read-only iteration, but is bad for write.
This patch introduce the new repair_btree() function to recover the
btree, deleting all the corrupted leaf/node including corresponding
extent, allowing later write into the btree.
This patch provides the basis for later recovery with corrupted leaves.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
cmds-check.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 102 insertions(+)
diff --git a/cmds-check.c b/cmds-check.c
index 02d9b32..79dc6f6 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -2633,6 +2633,97 @@ static void free_corrupt_block(struct cache_extent *cache)
FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
+/*
+ * Repair the btree of the given root.
+ *
+ * The fix is to remove the node key in corrupt_blocks cache_tree.
+ * and rebalance the tree.
+ * After the fix, the btree should be writeable.
+ */
+static int repair_btree(struct btrfs_root *root,
+ struct cache_tree *corrupt_blocks)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_corrupt_block *corrupt;
+ struct cache_extent *cache;
+ struct btrfs_key key;
+ u64 offset;
+ int level;
+ int ret = 0;
+
+ if (cache_tree_empty(corrupt_blocks))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, -1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ fprintf(stderr, "Error starting transaction: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+ cache = first_cache_extent(corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache, struct btrfs_corrupt_block,
+ cache);
+ level = corrupt->level;
+ path->lowest_level = level;
+ key.objectid = corrupt->key.objectid;
+ key.type = corrupt->key.type;
+ key.offset = corrupt->key.offset;
+
+ /*
+ * Here we don't want to do any tree balance, since it may
+ * cause a balance with corrupted brother leaf/node,
+ * so ins_len set to 0 here.
+ * Balance will be done after all corrupt node/leaf is deleted.
+ */
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+ offset = btrfs_node_blockptr(path->nodes[level],
+ path->slots[level]);
+
+ /* Remove the ptr */
+ ret = btrfs_del_ptr(trans, root, path, level,
+ path->slots[level]);
+ if (ret < 0)
+ goto out;
+ /*
+ * Remove the corresponding extent
+ * return value is not concerned.
+ */
+ btrfs_release_path(path);
+ ret = btrfs_free_extent(trans, root, offset, root->nodesize,
+ 0, root->root_key.objectid,
+ level - 1, 0);
+ cache = next_cache_extent(cache);
+ }
+
+ /* Balance the btree using btrfs_search_slot() */
+ cache = first_cache_extent(corrupt_blocks);
+ while (cache) {
+ corrupt = container_of(cache, struct btrfs_corrupt_block,
+ cache);
+ memcpy(&key, &corrupt->key, sizeof(key));
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ /* return will always >0 since it won't find the item */
+ ret = 0;
+ btrfs_release_path(path);
+ cache = next_cache_extent(cache);
+ }
+out:
+ btrfs_free_path(path);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
static int check_fs_root(struct btrfs_root *root,
struct cache_tree *root_cache,
struct walk_control *wc)
@@ -2735,6 +2826,17 @@ skip_walking:
corrupt->key.offset);
cache = next_cache_extent(cache);
}
+ if (repair) {
+ printf("Try to repair the btree for root %llu\n",
+ root->root_key.objectid);
+ ret = repair_btree(root, &corrupt_blocks);
+ if (ret < 0)
+ fprintf(stderr, "Failed to repair btree: %s\n",
+ strerror(-ret));
+ if (!ret)
+ printf("Btree for root %llu is fixed\n",
+ root->root_key.objectid);
+ }
}
err = merge_root_recs(root, &root_node.root_cache, root_cache);
--
2.1.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/3] btrfs-progs: Add inode item rebuild function.
2014-12-04 2:52 [PATCH 0/3] Add repair function for btree and missing inode Qu Wenruo
2014-12-04 2:52 ` [PATCH 1/3] btrfs-progs: record and report leaf/node corruption in fs/subvol tree Qu Wenruo
2014-12-04 2:52 ` [PATCH 2/3] btrfs-progs:fsck: Recover btree with some corrupted leaf/node Qu Wenruo
@ 2014-12-04 2:52 ` Qu Wenruo
2014-12-04 17:41 ` David Sterba
2 siblings, 1 reply; 8+ messages in thread
From: Qu Wenruo @ 2014-12-04 2:52 UTC (permalink / raw)
To: linux-btrfs
Add a basic inode item rebuild function for I_ERR_NO_INODE_ITEM.
The main use case is to repair btrfs which fs root has corrupted leaf,
but it is already working for case if the corrupteed fs root leaf/node
contains no inode extent_data.
The repair needs 3 elements for inode rebuild:
1. inode number
This is quite easy, existing inode_record codes will detect it quite
well.
2. inode name
At least one inode name is needed. this can be recovered from its
backref or parent inode's dir_index/item.
If all not found, fallback will be the inode number.
3. inode type
This is the trick part. The only reliable method is to recovery it from
parent's dir_index/item.
The remaining method will search for regular file extent for FILE
type or child's backref for DIR(todo).
Fallback will be FILE.
This is just a fundamental implement, some details recovery can be
improved later with btrfs-progs infrastructure change.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
cmds-check.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
ctree.h | 3 +
inode.c | 6 +-
3 files changed, 180 insertions(+), 7 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index 79dc6f6..f4fb304 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -2100,6 +2100,173 @@ out:
return ret;
}
+/*
+ * Check if there is any normal(reg or prealloc) file extent for given
+ * ino.
+ * This is used to determine the file type when neither its dir_index/item or
+ * inode_item exists.
+ *
+ * This will not report error, if any error happens, just consider it does
+ * not have any normal file extent.
+ */
+static int find_normal_file_extent(struct btrfs_root *root,
+ u64 ino)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_file_extent_item *fi;
+ u8 type;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+ while (1) {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid != ino ||
+ found_key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(path->nodes[0], fi);
+ if (type != BTRFS_FILE_EXTENT_INLINE)
+ ret = 1;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static u32 btrfs_type_to_imode(u8 type)
+{
+ static u32 imode_by_btrfs_type[] = {
+ [BTRFS_FT_REG_FILE] = S_IFREG,
+ [BTRFS_FT_DIR] = S_IFDIR,
+ [BTRFS_FT_CHRDEV] = S_IFCHR,
+ [BTRFS_FT_BLKDEV] = S_IFBLK,
+ [BTRFS_FT_FIFO] = S_IFIFO,
+ [BTRFS_FT_SOCK] = S_IFSOCK,
+ [BTRFS_FT_SYMLINK] = S_IFLNK,
+ };
+
+ return imode_by_btrfs_type[(type)];
+}
+
+static int repair_inode_no_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct inode_record *rec)
+{
+ struct inode_backref *backref;
+ char namebuf[BTRFS_NAME_LEN];
+ char *lost_found_name = "lost+found";
+ u8 filetype;
+ u32 mode = 0700;
+ u64 lost_found_ino = 0;
+ int namelen;
+ int name_recover = 0;
+ int type_recover = 0;
+ int ret = 0;
+
+ /*
+ * TODO:
+ * 1. salvage data from existing file extent and
+ * punch hole to keep fi ext consistent.
+ * 2. salvage data from extent tree
+ */
+
+ /* Searching for filename */
+ list_for_each_entry(backref, &rec->backrefs, list) {
+ /*
+ * If dir_item/index if found, name and type can both
+ * be recovered.
+ * If only inode_ref is found, only name can be recovered.
+ */
+ if (!type_recover && (backref->found_dir_index ||
+ backref->found_dir_item)) {
+ type_recover = 1;
+ name_recover = 1;
+ memcpy(namebuf, backref->name, backref->namelen);
+ namelen = backref->namelen;
+ filetype = backref->filetype;
+ /* Best match found, break */
+ break;
+ }
+ if (!name_recover && backref->found_inode_ref) {
+ name_recover = 1;
+ memcpy(namebuf, backref->name, backref->namelen);
+ namelen = backref->namelen;
+ /* Continue searching, there may be better match */
+ }
+ }
+
+ printf("Trying to rebuild inode:%llu\n", rec->ino);
+
+ if (!name_recover) {
+ printf("Can't find the filename for inode:%llu, use its inode number as filename\n",
+ rec->ino);
+ name_recover = 1;
+ namelen = (u16)log10(rec->ino) + 1;
+
+ /* Plus one for the ending '\0' */
+ snprintf(namebuf, namelen + 1, "%llu", rec->ino);
+ }
+ /*
+ * Try to determine inode type if type not found.
+ *
+ * For found regular file extent, it must be FILE.
+ * For found dir_item/index, it must be DIR.
+ *
+ * For undetermined one, use FILE as fallback.
+ *
+ * TODO:
+ * 1. If found extent belong to it in extent tree, it must be FILE
+ * Need extra hook in extent tree scan.
+ * 2. If found backref(inode_index/item is already handled) to it,
+ * it must be DIR.
+ * Need new inode-inode ref structure to allow search for that.
+ */
+ if (!type_recover) {
+ if (rec->found_file_extent &&
+ find_normal_file_extent(root, rec->ino)) {
+ type_recover = 1;
+ filetype = BTRFS_FT_REG_FILE;
+ } else if (rec->found_dir_item) {
+ type_recover = 1;
+ filetype = BTRFS_FT_DIR;
+ } else {
+ printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
+ rec->ino);
+ type_recover = 1;
+ filetype = BTRFS_FT_REG_FILE;
+ }
+ }
+
+ ret = btrfs_mkdir(trans, root, lost_found_name, strlen(lost_found_name),
+ BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino, mode);
+ if (ret < 0)
+ goto out;
+ ret = btrfs_new_inode(trans, root, rec->ino,
+ mode | btrfs_type_to_imode(filetype));
+ if (ret < 0)
+ goto out;
+ ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
+ namebuf, namelen, filetype, NULL, 1);
+out:
+ return ret;
+}
+
static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
{
struct btrfs_trans_handle *trans;
@@ -2108,7 +2275,8 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
I_ERR_NO_ORPHAN_ITEM |
- I_ERR_LINK_COUNT_WRONG)))
+ I_ERR_LINK_COUNT_WRONG |
+ I_ERR_NO_INODE_ITEM)))
return rec->errors;
path = btrfs_alloc_path();
@@ -2128,7 +2296,9 @@ static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
return PTR_ERR(trans);
}
- if (rec->errors & I_ERR_DIR_ISIZE_WRONG)
+ if (rec->errors & I_ERR_NO_INODE_ITEM)
+ ret = repair_inode_no_item(trans, root, path, rec);
+ if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
ret = repair_inode_isize(trans, root, path, rec);
if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
ret = repair_inode_orphan_item(trans, root, path, rec);
@@ -2269,6 +2439,8 @@ static int check_inode_recs(struct btrfs_root *root,
}
}
+ if (!rec->found_inode_item)
+ rec->errors |= I_ERR_NO_INODE_ITEM;
if (rec->found_link != rec->nlink)
rec->errors |= I_ERR_LINK_COUNT_WRONG;
if (repair) {
@@ -2281,8 +2453,6 @@ static int check_inode_recs(struct btrfs_root *root,
}
error++;
- if (!rec->found_inode_item)
- rec->errors |= I_ERR_NO_INODE_ITEM;
print_inode_error(root, rec);
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->found_dir_item)
diff --git a/ctree.h b/ctree.h
index 682255c..dbe9b39 100644
--- a/ctree.h
+++ b/ctree.h
@@ -2449,6 +2449,9 @@ static inline int is_fstree(u64 rootid)
int check_dir_conflict(struct btrfs_root *root,
char *name, int namelen,
u64 dir, u64 index);
+int btrfs_new_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 ino, u32 mode);
int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 ino, u64 parent_ino, char *name, int namelen,
u8 type, u64 *index, int add_backref);
diff --git a/inode.c b/inode.c
index b354f5a..a4cb8fb 100644
--- a/inode.c
+++ b/inode.c
@@ -463,9 +463,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* its backref.
* The backref is added by btrfs_add_link().
*/
-static int btrfs_new_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 ino, u32 mode)
+int btrfs_new_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 ino, u32 mode)
{
struct btrfs_inode_item inode_item = {0};
int ret = 0;
--
2.1.3
^ permalink raw reply related [flat|nested] 8+ messages in thread