* [PATCH v2 1/7] added helper functions to iterate backrefs
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
@ 2011-06-18 11:45 ` Jan Schmidt
2011-06-21 14:37 ` David Sterba
2011-06-18 11:45 ` [PATCH v2 2/7] scrub: added unverified_errors Jan Schmidt
` (5 subsequent siblings)
6 siblings, 1 reply; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:45 UTC (permalink / raw)
To: chris.mason, linux-btrfs
These helper functions iterate back references and call a function for each
backref. There is also a function to resolve an inode to a path in the
file system.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/Makefile | 3 +-
fs/btrfs/backref.c | 445 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/backref.h | 59 +++++++
3 files changed, 506 insertions(+), 1 deletions(-)
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf..c63f649 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
- compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
+ compression.o delayed-ref.o relocation.o delayed-inode.o backref.o \
+ scrub.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
new file mode 100644
index 0000000..64611e6
--- /dev/null
+++ b/fs/btrfs/backref.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "backref.h"
+
+static int __inode_info(u64 inum, u64 ioff, u8 key_type,
+ struct btrfs_root *fs_root, struct btrfs_path *path,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+
+ key.type = key_type;
+ key.objectid = inum;
+ key.offset = ioff;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ return ret;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type || found_key->objectid != key.objectid)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * this makes the path point to (inum INODE_ITEM ioff)
+ */
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path,
+ &key);
+}
+
+static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path, int strict,
+ u64 *out_parent_inum,
+ struct extent_buffer **out_iref_eb,
+ int *out_slot)
+{
+ int ret;
+ struct btrfs_key found_key;
+
+ ret = __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path,
+ &found_key);
+
+ if (!ret) {
+ if (out_slot)
+ *out_slot = path->slots[0];
+ if (out_iref_eb)
+ *out_iref_eb = path->nodes[0];
+ if (out_parent_inum)
+ *out_parent_inum = found_key.offset;
+ }
+
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
+ * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
+ * of the path are separated by '/' and the path is guaranteed to be
+ * 0-terminated. the path is only given within the current file system.
+ * Therefore, it never starts with a '/'. the caller is responsible to provide
+ * "size" bytes in "dest". the dest buffer will be filled backwards! the idea is
+ * that in case of an overflow, the lower part in the hierarchie is most
+ * important to the user. finally, the start point of the resulting string is
+ * returned. in case the path would overflow, "..." is added at the front of
+ * the string and iteration stops regularly.
+ */
+static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ struct btrfs_inode_ref *iref,
+ struct extent_buffer *eb, u64 parent,
+ char *dest, u32 size)
+{
+ u32 len;
+ int slot;
+ u64 inum;
+ int ret;
+ u32 bytes_left = size - 1;
+
+ dest[bytes_left] = '\0';
+
+ while (1) {
+ len = btrfs_inode_ref_name_len(eb, iref);
+ if (len > bytes_left) {
+ if (size < 4)
+ break;
+ if (bytes_left > 3)
+ bytes_left -= 3;
+ else
+ bytes_left = 0;
+ memcpy(dest + bytes_left, "...", 3);
+ break;
+ }
+ bytes_left -= len;
+ read_extent_buffer(eb, dest + bytes_left,
+ (unsigned long)(iref + 1), len);
+
+ ret = inode_item_info(parent, 0, fs_root, path);
+ if (ret)
+ return ERR_PTR(ret);
+ eb = path->nodes[0];
+ btrfs_release_path(path);
+
+ ret = inode_ref_info(parent, 0, fs_root, path, 0,
+ &inum, NULL, &slot);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /* regular exit ahead */
+ if (parent == inum)
+ break;
+
+ iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
+ parent = inum;
+ if (bytes_left > 0) {
+ --bytes_left;
+ dest[bytes_left] = '/';
+ }
+ }
+
+ return dest + bytes_left;
+}
+
+/*
+ * this makes the path point to (logical EXTENT_ITEM *)
+ * returns 0 for data blocks, 1 for tree blocks and <0 on error
+ */
+int data_extent_from_logical(struct btrfs_root *root, u64 logical,
+ struct btrfs_path *path,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ u64 flags;
+ u32 item_size;
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ struct btrfs_key key;
+
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.objectid = logical;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+ ret = btrfs_previous_item(root->fs_info->extent_root, path,
+ 0, BTRFS_EXTENT_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+ btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
+ if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
+ found_key->objectid > logical ||
+ found_key->objectid + found_key->offset <= logical)
+ return -ENOENT;
+
+ eb = path->nodes[0];
+ item_size = btrfs_item_size_nr(eb, path->slots[0]);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
+ flags = btrfs_extent_flags(eb, ei);
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * helper function to iterate extent backrefs. ptr must point to a 0 value for
+ * the first call and may be modified. it is used to track state.
+ * if more backrefs exist, 0 is returned and the next call to __get_extent_ref
+ * must pass the modified ptr parameter to get to the next backref.
+ * after the last backref was processed, 1 is returned.
+ * returns <0 on error
+ */
+static int __get_extent_ref(u64 flags_wanted, u8 type_wanted,
+ unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **eiref)
+{
+ int type;
+ unsigned long end;
+ u64 flags;
+ struct btrfs_tree_block_info *info;
+
+ if (!*ptr) {
+ /* first call */
+ flags = btrfs_extent_flags(eb, ei);
+ if (!(flags & flags_wanted))
+ return -EINVAL;
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *eiref = (struct btrfs_extent_inline_ref *)(info + 1);
+ } else {
+ *eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ }
+ *ptr = (unsigned long)*eiref;
+ }
+
+ end = (unsigned long)ei + item_size;
+
+ do {
+ *eiref = (struct btrfs_extent_inline_ref *)*ptr;
+ type = btrfs_extent_inline_ref_type(eb, *eiref);
+
+ *ptr += btrfs_extent_inline_ref_size(type);
+
+ WARN_ON(*ptr > end);
+ if (*ptr == end)
+ return 1; /* last */
+ } while (type != type_wanted);
+
+ return 0;
+}
+
+/*
+ * reads the tree block backref for an extent. tree level and root are returned
+ * through out_level and out_root. ptr must point to a 0 value for the first
+ * call and may be modified (see __get_extent_ref comment).
+ * returns 0 on success, <0 on error. note: in contrast to __get_extent_ref this
+ * one never returns 1!
+ */
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_extent_item *ei, u32 item_size,
+ u64 *out_root, u8 *out_level)
+{
+ int ret;
+ struct btrfs_tree_block_info *info;
+ struct btrfs_extent_inline_ref *eiref;
+
+ ret = __get_extent_ref(BTRFS_EXTENT_FLAG_TREE_BLOCK,
+ BTRFS_TREE_BLOCK_REF_KEY, ptr, eb, ei,
+ item_size, &eiref);
+ if (ret < 0)
+ return ret;
+
+ if (!ret) {
+ printk(KERN_ERR "btrfs: tree_backtef_for_extent detected "
+ "multiple tree backrefs for an extent at %llu\n",
+ (unsigned long long)eb->start);
+ WARN_ON(1);
+ }
+
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
+ *out_level = btrfs_tree_block_level(eb, info);
+
+ return 0;
+}
+
+static int data_inode_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_inum,
+ u64 *out_ioff)
+{
+ int ret;
+ struct btrfs_extent_inline_ref *eiref;
+ struct btrfs_extent_data_ref *dref;
+
+ ret = __get_extent_ref(BTRFS_EXTENT_FLAG_DATA,
+ BTRFS_EXTENT_DATA_REF_KEY, ptr, eb, ei,
+ item_size, &eiref);
+ if (ret < 0)
+ return ret;
+
+ dref = (struct btrfs_extent_data_ref *)(&eiref->offset);
+ if (btrfs_extent_data_ref_root(eb, dref) != BTRFS_FS_TREE_OBJECTID) {
+ WARN_ON(1);
+ return -EIO;
+ }
+
+ *out_inum = btrfs_extent_data_ref_objectid(eb, dref);
+ *out_ioff = btrfs_extent_data_ref_offset(eb, dref);
+
+ return ret;
+}
+
+/*
+ * calls iterate() for every inode that references the extent identified by
+ * the given parameters.
+ * when the iterator function returns a non-zero value, iteration stops.
+ */
+int iterate_extent_inodes(struct extent_buffer *eb,
+ struct btrfs_extent_item *ei,
+ u64 extent_item_offset, u32 item_size,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int last;
+ u64 inum;
+ unsigned long ptr = 0;
+ u64 extent_data_item_offset;
+ int ret;
+
+ do {
+ last = data_inode_for_extent(&ptr, eb, ei, item_size, &inum,
+ &extent_data_item_offset);
+ if (last < 0)
+ return last;
+
+ ret = iterate(inum, extent_item_offset+extent_data_item_offset,
+ ctx);
+ if (ret)
+ return ret;
+
+ } while (!last);
+
+ return 0;
+}
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx)
+{
+ int ret;
+ u32 item_size;
+ struct extent_buffer *l;
+ struct btrfs_extent_item *extent;
+ u64 offset;
+ struct btrfs_key found_key;
+
+ ret = data_extent_from_logical(fs_info->extent_root, logical, path,
+ &found_key);
+ if (ret)
+ return ret;
+
+ offset = logical - found_key.objectid;
+ l = path->nodes[0];
+ extent = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
+ item_size = btrfs_item_size_nr(l, path->slots[0]);
+ btrfs_release_path(path);
+
+ ret = iterate_extent_inodes(l, extent, offset, item_size, iterate, ctx);
+
+ return ret;
+}
+
+static int iterate_irefs(u64 inum, struct extent_buffer *eb_i,
+ struct btrfs_root *fs_root,
+ struct btrfs_path *path,
+ iterate_irefs_t *iterate, void *ctx)
+{
+ int ret;
+ int slot;
+ u32 cur;
+ u32 len;
+ u32 name_len;
+ u64 parent = 0;
+ struct extent_buffer *eb_ir;
+ struct btrfs_item *item;
+ struct btrfs_inode_ref *iref;
+
+ while (1) {
+ ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
+ 1, &parent, &eb_ir, &slot);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+
+ item = btrfs_item_nr(eb_i, slot);
+ iref = btrfs_item_ptr(eb_i, slot, struct btrfs_inode_ref);
+
+ for (cur = 0; cur < btrfs_item_size(eb_i, item); cur += len) {
+ name_len = btrfs_inode_ref_name_len(eb_i, iref);
+ ret = iterate(parent, iref, eb_ir, slot, ctx);
+ if (ret)
+ return ret;
+ len = sizeof(*iref) + name_len;
+ iref = (struct btrfs_inode_ref *)((char *)iref + len);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * returns 0 if the path could be dumped (probably truncated)
+ * returns <0 in case of an error
+ */
+static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
+ struct extent_buffer *eb_ir, int slot,
+ void *ctx)
+{
+ struct inode_fs_paths *ipath = ctx;
+ struct extent_buffer *eb_i = ipath->eb;
+ u32 path_len;
+ char *fs_path;
+
+ if (ipath->bytes_left < 2)
+ return -EOVERFLOW;
+
+ *ipath->dest++ = ' ';
+ --ipath->bytes_left;
+
+ fs_path = iref_to_path(ipath->fs_root, ipath->path, iref, eb_i,
+ inum, ipath->scratch_buf, ipath->bytes_left);
+ if (IS_ERR(fs_path))
+ return PTR_ERR(fs_path);
+ path_len = ipath->scratch_buf + ipath->bytes_left - fs_path - 1;
+ if (path_len+1 > ipath->bytes_left)
+ return -EOVERFLOW;
+ memcpy(ipath->dest, fs_path, path_len+1);
+ ipath->bytes_left -= path_len;
+ ipath->dest += path_len;
+
+ return 0;
+}
+
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
+{
+ return iterate_irefs(inum, ipath->eb, ipath->fs_root, ipath->path,
+ inode_to_path, ipath);
+}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
new file mode 100644
index 0000000..d208321
--- /dev/null
+++ b/fs/btrfs/backref.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_BACKREF__
+#define __BTRFS_BACKREF__
+
+struct inode_fs_paths {
+ int bytes_left;
+ char *dest;
+ struct btrfs_path *path;
+ char *scratch_buf;
+ struct btrfs_root *fs_root;
+ int scratch_bufsize;
+ struct extent_buffer *eb;
+};
+
+typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, void *ctx);
+typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref,
+ struct extent_buffer *eb_ir,
+ int slot, void *ctx);
+
+int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
+ struct btrfs_path *path);
+
+int data_extent_from_logical(struct btrfs_root *root, u64 logical,
+ struct btrfs_path *path,
+ struct btrfs_key *found_key);
+
+int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
+ struct btrfs_extent_item *ei, u32 item_size,
+ u64 *out_root, u8 *out_level);
+
+int iterate_extent_inodes(struct extent_buffer *eb,
+ struct btrfs_extent_item *ei,
+ u64 extent_item_offset, u32 item_size,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ iterate_extent_inodes_t *iterate, void *ctx);
+
+int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+
+#endif
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/7] added helper functions to iterate backrefs
2011-06-18 11:45 ` [PATCH v2 1/7] added helper functions to iterate backrefs Jan Schmidt
@ 2011-06-21 14:37 ` David Sterba
2011-06-21 15:12 ` Jan Schmidt
0 siblings, 1 reply; 11+ messages in thread
From: David Sterba @ 2011-06-21 14:37 UTC (permalink / raw)
To: Jan Schmidt; +Cc: chris.mason, linux-btrfs
On Sat, Jun 18, 2011 at 01:45:58PM +0200, Jan Schmidt wrote:
> +/*
> + * returns 0 if the path could be dumped (probably truncated)
> + * returns <0 in case of an error
> + */
> +static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
> + struct extent_buffer *eb_ir, int slot,
> + void *ctx)
> +{
> + struct inode_fs_paths *ipath = ctx;
> + struct extent_buffer *eb_i = ipath->eb;
> + u32 path_len;
> + char *fs_path;
> +
> + if (ipath->bytes_left < 2)
> + return -EOVERFLOW;
> +
> + *ipath->dest++ = ' ';
> + --ipath->bytes_left;
this actually prepends a space before the first entry.
I've played a bit with this interface, it's the missing piece to
implement the 'inode nubmer -> directory names' search :)
All the names are placed into one buffer, 4k sized, separated by a
space. Not all files fit, so I suggest to print one filename at a time.
Me-user wants to see all the filenames, even if the list is potentially
long.
I've also noticed that in iref_to_path()
130 len = btrfs_inode_ref_name_len(eb, iref);
returns very large values, which do not fit into the 4k buffer and the
function returns. When trying to print the data by read_extent_buffer to
a temporary array, a pagefault occurs (and seems to come from reading
the extent buffer). The numbers are like 19000 or higher, which are way
above path or filename maximum size. I don't think this is the
intentional behaviour, it relies on some side effect rather than clear
logic.
Example:
ipath buffer and scratch are 32K, each, ie. the overly sized
ref_name_len will fit there:
[ 8766.928232] btrfs: ino2name: 266 p1/
[ 8767.440964] i2p: [4] namelen 10, left 32766
[ 8767.446417] i2p: [7] path:
[ 8767.450445] i2p: [4] namelen 2, left 32755
[ 8767.455733] i2p: [7] path: /
[ 8767.459834] i2p: [2] p1/
[ 8767.463593] i2p: [4] namelen 0, left 32752
[ 8767.468903] i2p: [7] path:
[ 8767.472908] i2p: [4] namelen 2, left 32751
[ 8767.478188] i2p: [7] path: /
[ 8767.482280] i2p: [2] p1/
[ 8767.486024] i2p: [4] namelen 0, left 32748
[ 8767.491293] i2p: [7] path:
[ 8767.495283] i2p: [4] namelen 2, left 32747
[ 8767.500587] i2p: [7] path: /
[ 8767.504680] i2p: [2] p1/
[ 8767.508430] i2p: [4] namelen 0, left 32744
[ 8767.513708] i2p: [7] path:
[ 8767.517702] i2p: [4] namelen 2, left 32743
[ 8767.522969] i2p: [7] path: /
[ 8767.527042] i2p: [2] p1/
[ 8767.530787] i2p: [4] namelen 0, left 32740
[ 8767.536049] i2p: [7] path:
[ 8767.539991] i2p: [4] namelen 2, left 32739
[ 8767.545282] i2p: [7] path: /
[ 8767.549374] i2p: [2] p1/
[ 8767.553109] i2p: [4] namelen 0, left 32736
[ 8767.558377] i2p: [7] path:
[ 8767.562354] i2p: [4] namelen 2, left 32735
[ 8767.567615] i2p: [7] path: /
[ 8767.571713] i2p: [2] p1/
[ 8767.575443] i2p: [4] namelen 0, left 32732
[ 8767.580701] i2p: [7] path:
[ 8767.584678] i2p: [4] namelen 2, left 32731
[ 8767.589933] i2p: [7] path: /
[ 8767.594007] i2p: [2] p1/
[ 8767.597713] i2p: [4] namelen 0, left 32728
[ 8767.602908] i2p: [7] path:
[ 8767.606832] i2p: [4] namelen 2, left 32727
[ 8767.612030] i2p: [7] path: /
[ 8767.616050] i2p: [2] p1/
[ 8767.619676] i2p: [4] namelen 0, left 32724
[ 8767.624873] i2p: [7] path:
[ 8767.628782] i2p: [4] namelen 2, left 32723
[ 8767.633970] i2p: [7] path: /
[ 8767.637962] i2p: [2] p1/
[ 8767.641639] i2p: [4] namelen 0, left 32720
[ 8767.646838] i2p: [7] path:
[ 8767.650757] i2p: [4] namelen 2, left 32719
[ 8767.655952] i2p: [7] path: /
[ 8767.659940] i2p: [2] p1/
[ 8767.663604] i2p: [4] namelen 0, left 32716
[ 8767.668790] i2p: [7] path:
[ 8767.672696] i2p: [4] namelen 2, left 32715
[ 8767.677881] i2p: [7] path: /
[ 8767.681878] i2p: [2] p1/
[ 8767.685549] i2p: [4] namelen 0, left 32712
[ 8767.690742] i2p: [7] path:
[ 8767.694655] i2p: [4] namelen 2, left 32711
[ 8767.699843] i2p: [7] path: /
[ 8767.703840] i2p: [2] p1/
[ 8767.707520] i2p: [4] namelen 19967, left 32708
[ 8767.713057] i2p: [7] path:
[ 8767.716955] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 8767.720932] IP: [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
[ 8767.720932] PGD 77bd0067 PUD 79d35067 PMD 0
[ 8767.720932] Oops: 0000 [#1] SMP
[ 8767.720932] CPU 1
[ 8767.720932] Modules linked in: btrfs loop
[ 8767.720932]
[ 8767.720932] Pid: 10859, comm: btrfs-ino2path Not tainted 3.0.0-rc3-default+ #75 Intel Corporation Santa Rosa platform/Matanzas
[ 8767.720932] RIP: 0010:[<ffffffffa005f1d2>] [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
[ 8767.720932] RSP: 0018:ffff880074acbc58 EFLAGS: 00010202
[ 8767.720932] RAX: 0000000000000000 RBX: 0000000000003deb RCX: 0000000000001000
[ 8767.720932] RDX: 00000000000001e0 RSI: 0000000000000000 RDI: 0000000000000246
[ 8767.720932] RBP: ffff880074acbcb8 R08: 0000000000000000 R09: 0000000000000001
[ 8767.720932] R10: 0000000000000000 R11: 0000000000001c04 R12: 0000000000000002
[ 8767.720932] R13: 0000000000000000 R14: ffff880079bac1d9 R15: ffff880074acbfd8
[ 8767.720932] FS: 00007f1399198740(0000) GS:ffff88007de00000(0000) knlGS:0000000000000000
[ 8767.720932] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 8767.720932] CR2: 0000000000000000 CR3: 0000000074b13000 CR4: 00000000000006e0
[ 8767.720932] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 8767.720932] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 8767.720932] Process btrfs-ino2path (pid: 10859, threadinfo ffff880074aca000, task ffff880077085340)
[ 8767.720932] Stack:
[ 8767.720932] ffffffffa005f282 ffffffff81b21e74 ffff88002f2ef668 0000000000000000
[ 8767.720932] ffff880073f94dd8 ffff880074acbfd8 ffff8800780bd000 00000000000031c5
[ 8767.720932] 00000000000031c5 0000000000000104 ffff880073f94dd8 ffff880074132130
[ 8767.720932] Call Trace:
[ 8767.720932] [<ffffffffa005f282>] ? read_extent_buffer+0x152/0x220 [btrfs]
[ 8767.720932] [<ffffffff81b21e74>] ? printk+0x68/0x6c
[ 8767.720932] [<ffffffffa008924d>] inode_to_path+0x10d/0x2d0 [btrfs]
[ 8767.720932] [<ffffffffa0089883>] paths_from_inode+0xe3/0x120 [btrfs]
[ 8767.720932] [<ffffffffa006de9f>] btrfs_ioctl+0xb5f/0xf80 [btrfs]
[ 8767.720932] [<ffffffff810d3e55>] ? trace_hardirqs_on_caller+0x155/0x1a0
[ 8767.720932] [<ffffffff81080e4c>] ? finish_task_switch+0x7c/0x110
[ 8767.720932] [<ffffffff81080e0f>] ? finish_task_switch+0x3f/0x110
[ 8767.720932] [<ffffffff81193578>] do_vfs_ioctl+0x98/0x570
[ 8767.720932] [<ffffffff811828be>] ? fget_light+0x33e/0x430
[ 8767.720932] [<ffffffff81b2ed3a>] ? sysret_check+0x2e/0x69
[ 8767.720932] [<ffffffff81193a9f>] sys_ioctl+0x4f/0x80
[ 8767.720932] [<ffffffff81b2ed02>] system_call_fastpath+0x16/0x1b
[ 8767.720932] Code: 66 0f 1f 84 00 00 00 00 00 48 8b 55 c0 48 8b 42 30 48 89 c6 b9 00 10 00 00 4c 29 e9 48 39 d9 48 0f 47 cb 41 83 87 44 e0 ff ff 01
[ 8767.720932] 8b 00 48 c1 e8 2d 48 89 c2 48 c1 ea 08 48 8b 3c d5 00 95 f7
[ 8767.720932] RIP [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
[ 8767.720932] RSP <ffff880074acbc58>
[ 8767.720932] CR2: 0000000000000000
[ 8768.067994] ---[ end trace b9afe483f6289b6f ]---
Let's see the inode 266 by hand:
dsterba@:/mnt/sda10$ find . -inum 266
./p1/d6/d7/d1f/c41
something is wrong ...
The file hierarchy is a leftover from fs_mark runs, directories to depth 9,
short names.
david
> +
> + fs_path = iref_to_path(ipath->fs_root, ipath->path, iref, eb_i,
> + inum, ipath->scratch_buf, ipath->bytes_left);
> + if (IS_ERR(fs_path))
> + return PTR_ERR(fs_path);
> + path_len = ipath->scratch_buf + ipath->bytes_left - fs_path - 1;
> + if (path_len+1 > ipath->bytes_left)
> + return -EOVERFLOW;
> + memcpy(ipath->dest, fs_path, path_len+1);
> + ipath->bytes_left -= path_len;
> + ipath->dest += path_len;
> +
> + return 0;
> +}
> +
> +int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
> +{
> + return iterate_irefs(inum, ipath->eb, ipath->fs_root, ipath->path,
> + inode_to_path, ipath);
> +}
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/7] added helper functions to iterate backrefs
2011-06-21 14:37 ` David Sterba
@ 2011-06-21 15:12 ` Jan Schmidt
2011-06-23 8:20 ` Jan Schmidt
0 siblings, 1 reply; 11+ messages in thread
From: Jan Schmidt @ 2011-06-21 15:12 UTC (permalink / raw)
To: chris.mason, linux-btrfs
On 21.06.2011 16:37, David Sterba wrote:
> On Sat, Jun 18, 2011 at 01:45:58PM +0200, Jan Schmidt wrote:
>> +/*
>> + * returns 0 if the path could be dumped (probably truncated)
>> + * returns <0 in case of an error
>> + */
>> +static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
>> + struct extent_buffer *eb_ir, int slot,
>> + void *ctx)
>> +{
>> + struct inode_fs_paths *ipath = ctx;
>> + struct extent_buffer *eb_i = ipath->eb;
>> + u32 path_len;
>> + char *fs_path;
>> +
>> + if (ipath->bytes_left < 2)
>> + return -EOVERFLOW;
>> +
>> + *ipath->dest++ = ' ';
>> + --ipath->bytes_left;
>
> this actually prepends a space before the first entry.
Which is fine because we print the buffer with "(path:%s)" in
scrub.c:257, and a space is well appropriate there. That way we don't
need to handle the first path differently. (Makes only sense as long as
we stick to multiple files per line).
> I've played a bit with this interface, it's the missing piece to
> implement the 'inode nubmer -> directory names' search :)
> All the names are placed into one buffer, 4k sized, separated by a
> space. Not all files fit, so I suggest to print one filename at a time.
It does print all references to one *inode* to the same buffer, right.
If there are more inodes refering to that extent, those paths are
printed to a separate buffer. You can make some reflinks to try that out.
> Me-user wants to see all the filenames, even if the list is potentially
> long.
Well, maybe. However, for me, the number of hardlinks created is either
very low, or I know why a file has a whole bunch of hardlinks myself
(e.g. some kind of backup strategy). In both cases, this approach would
fit. I don't want to see some thousands of printk messages for that one
file I know to have a lot of hardlinks - maybe even missing some error
of a non-hardlinked file due to rate limiting.
On the other hand, you could argue, when I deliberately create a bunch
of reflinks, I will get the same problem. Another approach would be to
put it all into one buffer, no matter which inode it comes from. Either
way shouldn't be hard to accomplish.
> I've also noticed that in iref_to_path()
>
> 130 len = btrfs_inode_ref_name_len(eb, iref);
>
> returns very large values, which do not fit into the 4k buffer and the
> function returns. When trying to print the data by read_extent_buffer to
> a temporary array, a pagefault occurs (and seems to come from reading
> the extent buffer). The numbers are like 19000 or higher, which are way
> above path or filename maximum size. I don't think this is the
> intentional behaviour, it relies on some side effect rather than clear
> logic.
Something is going wrong here:
> Example:
> ipath buffer and scratch are 32K, each, ie. the overly sized
> ref_name_len will fit there:
>
> [ 8766.928232] btrfs: ino2name: 266 p1/
> [ 8767.440964] i2p: [4] namelen 10, left 32766
> [ 8767.446417] i2p: [7] path:
> [ 8767.450445] i2p: [4] namelen 2, left 32755
> [ 8767.455733] i2p: [7] path: /
> [ 8767.459834] i2p: [2] p1/
Do I get that right? This is inode 266, which should refer to
./p1/d6/d7/d1f/c41 (as you state below). Already on second iteration,
we're at something named "pl", which shouldn't happen as we construct
the path bottom-up. And those namelen 0 in the following lines shouldn't
happen, either. Furthermore, the path should change on every iteration
(I guess, that might depend on the printk behind, of course).
> [ 8767.463593] i2p: [4] namelen 0, left 32752
> [ 8767.468903] i2p: [7] path:
> [ 8767.472908] i2p: [4] namelen 2, left 32751
> [ 8767.478188] i2p: [7] path: /
> [ 8767.482280] i2p: [2] p1/
> [ 8767.486024] i2p: [4] namelen 0, left 32748
> [ 8767.491293] i2p: [7] path:
> [ 8767.495283] i2p: [4] namelen 2, left 32747
> [ 8767.500587] i2p: [7] path: /
> [ 8767.504680] i2p: [2] p1/
> [ 8767.508430] i2p: [4] namelen 0, left 32744
> [ 8767.513708] i2p: [7] path:
> [ 8767.517702] i2p: [4] namelen 2, left 32743
> [ 8767.522969] i2p: [7] path: /
> [ 8767.527042] i2p: [2] p1/
> [ 8767.530787] i2p: [4] namelen 0, left 32740
> [ 8767.536049] i2p: [7] path:
> [ 8767.539991] i2p: [4] namelen 2, left 32739
> [ 8767.545282] i2p: [7] path: /
> [ 8767.549374] i2p: [2] p1/
> [ 8767.553109] i2p: [4] namelen 0, left 32736
> [ 8767.558377] i2p: [7] path:
> [ 8767.562354] i2p: [4] namelen 2, left 32735
> [ 8767.567615] i2p: [7] path: /
> [ 8767.571713] i2p: [2] p1/
> [ 8767.575443] i2p: [4] namelen 0, left 32732
> [ 8767.580701] i2p: [7] path:
> [ 8767.584678] i2p: [4] namelen 2, left 32731
> [ 8767.589933] i2p: [7] path: /
> [ 8767.594007] i2p: [2] p1/
> [ 8767.597713] i2p: [4] namelen 0, left 32728
> [ 8767.602908] i2p: [7] path:
> [ 8767.606832] i2p: [4] namelen 2, left 32727
> [ 8767.612030] i2p: [7] path: /
> [ 8767.616050] i2p: [2] p1/
> [ 8767.619676] i2p: [4] namelen 0, left 32724
> [ 8767.624873] i2p: [7] path:
> [ 8767.628782] i2p: [4] namelen 2, left 32723
> [ 8767.633970] i2p: [7] path: /
> [ 8767.637962] i2p: [2] p1/
> [ 8767.641639] i2p: [4] namelen 0, left 32720
> [ 8767.646838] i2p: [7] path:
> [ 8767.650757] i2p: [4] namelen 2, left 32719
> [ 8767.655952] i2p: [7] path: /
> [ 8767.659940] i2p: [2] p1/
> [ 8767.663604] i2p: [4] namelen 0, left 32716
> [ 8767.668790] i2p: [7] path:
> [ 8767.672696] i2p: [4] namelen 2, left 32715
> [ 8767.677881] i2p: [7] path: /
> [ 8767.681878] i2p: [2] p1/
> [ 8767.685549] i2p: [4] namelen 0, left 32712
> [ 8767.690742] i2p: [7] path:
> [ 8767.694655] i2p: [4] namelen 2, left 32711
> [ 8767.699843] i2p: [7] path: /
> [ 8767.703840] i2p: [2] p1/
> [ 8767.707520] i2p: [4] namelen 19967, left 32708
> [ 8767.713057] i2p: [7] path:
> [ 8767.716955] BUG: unable to handle kernel NULL pointer dereference at (null)
> [ 8767.720932] IP: [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
> [ 8767.720932] PGD 77bd0067 PUD 79d35067 PMD 0
> [ 8767.720932] Oops: 0000 [#1] SMP
> [ 8767.720932] CPU 1
> [ 8767.720932] Modules linked in: btrfs loop
> [ 8767.720932]
> [ 8767.720932] Pid: 10859, comm: btrfs-ino2path Not tainted 3.0.0-rc3-default+ #75 Intel Corporation Santa Rosa platform/Matanzas
> [ 8767.720932] RIP: 0010:[<ffffffffa005f1d2>] [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
> [ 8767.720932] RSP: 0018:ffff880074acbc58 EFLAGS: 00010202
> [ 8767.720932] RAX: 0000000000000000 RBX: 0000000000003deb RCX: 0000000000001000
> [ 8767.720932] RDX: 00000000000001e0 RSI: 0000000000000000 RDI: 0000000000000246
> [ 8767.720932] RBP: ffff880074acbcb8 R08: 0000000000000000 R09: 0000000000000001
> [ 8767.720932] R10: 0000000000000000 R11: 0000000000001c04 R12: 0000000000000002
> [ 8767.720932] R13: 0000000000000000 R14: ffff880079bac1d9 R15: ffff880074acbfd8
> [ 8767.720932] FS: 00007f1399198740(0000) GS:ffff88007de00000(0000) knlGS:0000000000000000
> [ 8767.720932] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [ 8767.720932] CR2: 0000000000000000 CR3: 0000000074b13000 CR4: 00000000000006e0
> [ 8767.720932] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 8767.720932] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [ 8767.720932] Process btrfs-ino2path (pid: 10859, threadinfo ffff880074aca000, task ffff880077085340)
> [ 8767.720932] Stack:
> [ 8767.720932] ffffffffa005f282 ffffffff81b21e74 ffff88002f2ef668 0000000000000000
> [ 8767.720932] ffff880073f94dd8 ffff880074acbfd8 ffff8800780bd000 00000000000031c5
> [ 8767.720932] 00000000000031c5 0000000000000104 ffff880073f94dd8 ffff880074132130
> [ 8767.720932] Call Trace:
> [ 8767.720932] [<ffffffffa005f282>] ? read_extent_buffer+0x152/0x220 [btrfs]
> [ 8767.720932] [<ffffffff81b21e74>] ? printk+0x68/0x6c
> [ 8767.720932] [<ffffffffa008924d>] inode_to_path+0x10d/0x2d0 [btrfs]
> [ 8767.720932] [<ffffffffa0089883>] paths_from_inode+0xe3/0x120 [btrfs]
> [ 8767.720932] [<ffffffffa006de9f>] btrfs_ioctl+0xb5f/0xf80 [btrfs]
> [ 8767.720932] [<ffffffff810d3e55>] ? trace_hardirqs_on_caller+0x155/0x1a0
> [ 8767.720932] [<ffffffff81080e4c>] ? finish_task_switch+0x7c/0x110
> [ 8767.720932] [<ffffffff81080e0f>] ? finish_task_switch+0x3f/0x110
> [ 8767.720932] [<ffffffff81193578>] do_vfs_ioctl+0x98/0x570
> [ 8767.720932] [<ffffffff811828be>] ? fget_light+0x33e/0x430
> [ 8767.720932] [<ffffffff81b2ed3a>] ? sysret_check+0x2e/0x69
> [ 8767.720932] [<ffffffff81193a9f>] sys_ioctl+0x4f/0x80
> [ 8767.720932] [<ffffffff81b2ed02>] system_call_fastpath+0x16/0x1b
> [ 8767.720932] Code: 66 0f 1f 84 00 00 00 00 00 48 8b 55 c0 48 8b 42 30 48 89 c6 b9 00 10 00 00 4c 29 e9 48 39 d9 48 0f 47 cb 41 83 87 44 e0 ff ff 01
> [ 8767.720932] 8b 00 48 c1 e8 2d 48 89 c2 48 c1 ea 08 48 8b 3c d5 00 95 f7
> [ 8767.720932] RIP [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
> [ 8767.720932] RSP <ffff880074acbc58>
> [ 8767.720932] CR2: 0000000000000000
> [ 8768.067994] ---[ end trace b9afe483f6289b6f ]---
>
> Let's see the inode 266 by hand:
>
> dsterba@:/mnt/sda10$ find . -inum 266
> ./p1/d6/d7/d1f/c41
>
> something is wrong ...
>
> The file hierarchy is a leftover from fs_mark runs, directories to depth 9,
> short names.
Sounds like I missed something. There is definitely a bug in the path
construction or lower iteration code I added.
I used a fs_mark created btrfs for my tests as well, but those worked
well. If you could share that file system (image or debug-tree output),
that'd be great help debugging this.
Thanks!
Jan
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/7] added helper functions to iterate backrefs
2011-06-21 15:12 ` Jan Schmidt
@ 2011-06-23 8:20 ` Jan Schmidt
0 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-23 8:20 UTC (permalink / raw)
To: linux-btrfs; +Cc: Chris Mason
On 21.06.2011 17:12, Jan Schmidt wrote:
> On 21.06.2011 16:37, David Sterba wrote:
>> [...]
> Something is going wrong here:
>
>> Example:
>> ipath buffer and scratch are 32K, each, ie. the overly sized
>> ref_name_len will fit there:
>>
>> [ 8766.928232] btrfs: ino2name: 266 p1/
>> [ 8767.440964] i2p: [4] namelen 10, left 32766
>> [ 8767.446417] i2p: [7] path:
>> [ 8767.450445] i2p: [4] namelen 2, left 32755
>> [ 8767.455733] i2p: [7] path: /
>> [ 8767.459834] i2p: [2] p1/
>
> Do I get that right? This is inode 266, which should refer to
> ./p1/d6/d7/d1f/c41 (as you state below). Already on second iteration,
> we're at something named "pl", which shouldn't happen as we construct
> the path bottom-up. And those namelen 0 in the following lines shouldn't
> happen, either. Furthermore, the path should change on every iteration
> (I guess, that might depend on the printk behind, of course).
>
>> [ 8767.463593] i2p: [4] namelen 0, left 32752
>> [ 8767.468903] i2p: [7] path:
>> [ 8767.472908] i2p: [4] namelen 2, left 32751
>> [ 8767.478188] i2p: [7] path: /
>> [ 8767.482280] i2p: [2] p1/
>> [ 8767.486024] i2p: [4] namelen 0, left 32748
>> [ 8767.491293] i2p: [7] path:
>> [ 8767.495283] i2p: [4] namelen 2, left 32747
>> [ 8767.500587] i2p: [7] path: /
>> [ 8767.504680] i2p: [2] p1/
>> [ 8767.508430] i2p: [4] namelen 0, left 32744
>> [ 8767.513708] i2p: [7] path:
>> [ 8767.517702] i2p: [4] namelen 2, left 32743
>> [ 8767.522969] i2p: [7] path: /
>> [ 8767.527042] i2p: [2] p1/
>> [ 8767.530787] i2p: [4] namelen 0, left 32740
>> [ 8767.536049] i2p: [7] path:
>> [ 8767.539991] i2p: [4] namelen 2, left 32739
>> [ 8767.545282] i2p: [7] path: /
>> [ 8767.549374] i2p: [2] p1/
>> [ 8767.553109] i2p: [4] namelen 0, left 32736
>> [ 8767.558377] i2p: [7] path:
>> [ 8767.562354] i2p: [4] namelen 2, left 32735
>> [ 8767.567615] i2p: [7] path: /
>> [ 8767.571713] i2p: [2] p1/
>> [ 8767.575443] i2p: [4] namelen 0, left 32732
>> [ 8767.580701] i2p: [7] path:
>> [ 8767.584678] i2p: [4] namelen 2, left 32731
>> [ 8767.589933] i2p: [7] path: /
>> [ 8767.594007] i2p: [2] p1/
>> [ 8767.597713] i2p: [4] namelen 0, left 32728
>> [ 8767.602908] i2p: [7] path:
>> [ 8767.606832] i2p: [4] namelen 2, left 32727
>> [ 8767.612030] i2p: [7] path: /
>> [ 8767.616050] i2p: [2] p1/
>> [ 8767.619676] i2p: [4] namelen 0, left 32724
>> [ 8767.624873] i2p: [7] path:
>> [ 8767.628782] i2p: [4] namelen 2, left 32723
>> [ 8767.633970] i2p: [7] path: /
>> [ 8767.637962] i2p: [2] p1/
>> [ 8767.641639] i2p: [4] namelen 0, left 32720
>> [ 8767.646838] i2p: [7] path:
>> [ 8767.650757] i2p: [4] namelen 2, left 32719
>> [ 8767.655952] i2p: [7] path: /
>> [ 8767.659940] i2p: [2] p1/
>> [ 8767.663604] i2p: [4] namelen 0, left 32716
>> [ 8767.668790] i2p: [7] path:
>> [ 8767.672696] i2p: [4] namelen 2, left 32715
>> [ 8767.677881] i2p: [7] path: /
>> [ 8767.681878] i2p: [2] p1/
>> [ 8767.685549] i2p: [4] namelen 0, left 32712
>> [ 8767.690742] i2p: [7] path:
>> [ 8767.694655] i2p: [4] namelen 2, left 32711
>> [ 8767.699843] i2p: [7] path: /
>> [ 8767.703840] i2p: [2] p1/
>> [ 8767.707520] i2p: [4] namelen 19967, left 32708
>> [ 8767.713057] i2p: [7] path:
>> [ 8767.716955] BUG: unable to handle kernel NULL pointer dereference at (null)
>> [ 8767.720932] IP: [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
>> [ 8767.720932] PGD 77bd0067 PUD 79d35067 PMD 0
>> [ 8767.720932] Oops: 0000 [#1] SMP
>> [ 8767.720932] CPU 1
>> [ 8767.720932] Modules linked in: btrfs loop
>> [ 8767.720932]
>> [ 8767.720932] Pid: 10859, comm: btrfs-ino2path Not tainted 3.0.0-rc3-default+ #75 Intel Corporation Santa Rosa platform/Matanzas
>> [ 8767.720932] RIP: 0010:[<ffffffffa005f1d2>] [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
>> [ 8767.720932] RSP: 0018:ffff880074acbc58 EFLAGS: 00010202
>> [ 8767.720932] RAX: 0000000000000000 RBX: 0000000000003deb RCX: 0000000000001000
>> [ 8767.720932] RDX: 00000000000001e0 RSI: 0000000000000000 RDI: 0000000000000246
>> [ 8767.720932] RBP: ffff880074acbcb8 R08: 0000000000000000 R09: 0000000000000001
>> [ 8767.720932] R10: 0000000000000000 R11: 0000000000001c04 R12: 0000000000000002
>> [ 8767.720932] R13: 0000000000000000 R14: ffff880079bac1d9 R15: ffff880074acbfd8
>> [ 8767.720932] FS: 00007f1399198740(0000) GS:ffff88007de00000(0000) knlGS:0000000000000000
>> [ 8767.720932] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> [ 8767.720932] CR2: 0000000000000000 CR3: 0000000074b13000 CR4: 00000000000006e0
>> [ 8767.720932] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> [ 8767.720932] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>> [ 8767.720932] Process btrfs-ino2path (pid: 10859, threadinfo ffff880074aca000, task ffff880077085340)
>> [ 8767.720932] Stack:
>> [ 8767.720932] ffffffffa005f282 ffffffff81b21e74 ffff88002f2ef668 0000000000000000
>> [ 8767.720932] ffff880073f94dd8 ffff880074acbfd8 ffff8800780bd000 00000000000031c5
>> [ 8767.720932] 00000000000031c5 0000000000000104 ffff880073f94dd8 ffff880074132130
>> [ 8767.720932] Call Trace:
>> [ 8767.720932] [<ffffffffa005f282>] ? read_extent_buffer+0x152/0x220 [btrfs]
>> [ 8767.720932] [<ffffffff81b21e74>] ? printk+0x68/0x6c
>> [ 8767.720932] [<ffffffffa008924d>] inode_to_path+0x10d/0x2d0 [btrfs]
>> [ 8767.720932] [<ffffffffa0089883>] paths_from_inode+0xe3/0x120 [btrfs]
>> [ 8767.720932] [<ffffffffa006de9f>] btrfs_ioctl+0xb5f/0xf80 [btrfs]
>> [ 8767.720932] [<ffffffff810d3e55>] ? trace_hardirqs_on_caller+0x155/0x1a0
>> [ 8767.720932] [<ffffffff81080e4c>] ? finish_task_switch+0x7c/0x110
>> [ 8767.720932] [<ffffffff81080e0f>] ? finish_task_switch+0x3f/0x110
>> [ 8767.720932] [<ffffffff81193578>] do_vfs_ioctl+0x98/0x570
>> [ 8767.720932] [<ffffffff811828be>] ? fget_light+0x33e/0x430
>> [ 8767.720932] [<ffffffff81b2ed3a>] ? sysret_check+0x2e/0x69
>> [ 8767.720932] [<ffffffff81193a9f>] sys_ioctl+0x4f/0x80
>> [ 8767.720932] [<ffffffff81b2ed02>] system_call_fastpath+0x16/0x1b
>> [ 8767.720932] Code: 66 0f 1f 84 00 00 00 00 00 48 8b 55 c0 48 8b 42 30 48 89 c6 b9 00 10 00 00 4c 29 e9 48 39 d9 48 0f 47 cb 41 83 87 44 e0 ff ff 01
>> [ 8767.720932] 8b 00 48 c1 e8 2d 48 89 c2 48 c1 ea 08 48 8b 3c d5 00 95 f7
>> [ 8767.720932] RIP [<ffffffffa005f1d2>] read_extent_buffer+0xa2/0x220 [btrfs]
>> [ 8767.720932] RSP <ffff880074acbc58>
>> [ 8767.720932] CR2: 0000000000000000
>> [ 8768.067994] ---[ end trace b9afe483f6289b6f ]---
>>
>> Let's see the inode 266 by hand:
>>
>> dsterba@:/mnt/sda10$ find . -inum 266
>> ./p1/d6/d7/d1f/c41
>>
>> something is wrong ...
>>
>> The file hierarchy is a leftover from fs_mark runs, directories to depth 9,
>> short names.
>
> Sounds like I missed something. There is definitely a bug in the path
> construction or lower iteration code I added.
>
> I used a fs_mark created btrfs for my tests as well, but those worked
> well. If you could share that file system (image or debug-tree output),
> that'd be great help debugging this.
I got that fs from David (thanks) and it turned out that there was a
silly mistake in iref_to_path() in my code. I used the wrong extent
buffer which worked only as long as INODE_ITEM and INODE_REF resided in
the same leaf.
That's fixed now and I'll send a new version later. This will also
contain a fix to find EXTENT_DATA_REF that are not inline refs but
separately in the tree (v2 will miss some paths once you have more than
6 reflinks).
Thanks,
-Jan
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 2/7] scrub: added unverified_errors
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
2011-06-18 11:45 ` [PATCH v2 1/7] added helper functions to iterate backrefs Jan Schmidt
@ 2011-06-18 11:45 ` Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 3/7] scrub: print paths of corrupted files Jan Schmidt
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:45 UTC (permalink / raw)
To: chris.mason, linux-btrfs
In normal operation, scrub is reading data sequentially in large portions.
In case of an i/o error, we try to find the corrupted area(s) by issuing
page sized read requests. With this commit we increment the
unverified_errors counter if all of the small size requests succeed.
Userland patches carrying such conspicous events to the administrator should
already be around.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/scrub.c | 37 ++++++++++++++++++++++++++-----------
1 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index d5a4108..00e4e58 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -207,18 +207,25 @@ nomem:
* recheck_error gets called for every page in the bio, even though only
* one may be bad
*/
-static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
+static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
{
+ struct scrub_dev *sdev = sbio->sdev;
+ u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
+
if (sbio->err) {
- if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
- (sbio->physical + ix * PAGE_SIZE) >> 9,
+ if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
sbio->bio->bi_io_vec[ix].bv_page) == 0) {
if (scrub_fixup_check(sbio, ix) == 0)
- return;
+ return 0;
}
}
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.read_errors;
+ spin_unlock(&sdev->stat_lock);
+
scrub_fixup(sbio, ix);
+ return 1;
}
static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
@@ -388,8 +395,14 @@ static void scrub_checksum(struct btrfs_work *work)
int ret;
if (sbio->err) {
+ ret = 0;
for (i = 0; i < sbio->count; ++i)
- scrub_recheck_error(sbio, i);
+ ret |= scrub_recheck_error(sbio, i);
+ if (!ret) {
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.unverified_errors;
+ spin_unlock(&sdev->stat_lock);
+ }
sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
@@ -402,10 +415,6 @@ static void scrub_checksum(struct btrfs_work *work)
bi->bv_offset = 0;
bi->bv_len = PAGE_SIZE;
}
-
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.read_errors;
- spin_unlock(&sdev->stat_lock);
goto out;
}
for (i = 0; i < sbio->count; ++i) {
@@ -426,8 +435,14 @@ static void scrub_checksum(struct btrfs_work *work)
WARN_ON(1);
}
kunmap_atomic(buffer, KM_USER0);
- if (ret)
- scrub_recheck_error(sbio, i);
+ if (ret) {
+ ret = scrub_recheck_error(sbio, i);
+ if (!ret) {
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.unverified_errors;
+ spin_unlock(&sdev->stat_lock);
+ }
+ }
}
out:
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 3/7] scrub: print paths of corrupted files
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
2011-06-18 11:45 ` [PATCH v2 1/7] added helper functions to iterate backrefs Jan Schmidt
2011-06-18 11:45 ` [PATCH v2 2/7] scrub: added unverified_errors Jan Schmidt
@ 2011-06-18 11:46 ` Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 4/7] scrub: bugfix: mirror_num off by one Jan Schmidt
` (3 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:46 UTC (permalink / raw)
To: chris.mason, linux-btrfs
While scrubbing, we may encounter various errors. Previously, a logical
address was printed to the log only. Now, all paths belonging to that
address are resolved and printed separately. That should work for hardlinks
as well as reflinks.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/scrub.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 142 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 00e4e58..55b722c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -23,10 +23,12 @@
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include <linux/ratelimit.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
#include "ordered-data.h"
+#include "backref.h"
/*
* This is only the first step towards a full-features scrub. It reads all
@@ -106,6 +108,19 @@ struct scrub_dev {
spinlock_t stat_lock;
};
+struct scrub_warning {
+ struct btrfs_path *path;
+ u64 extent_item_size;
+ char *scratch_buf;
+ char *msg_buf;
+ const char *errstr;
+ sector_t sector;
+ u64 logical;
+ struct btrfs_device *dev;
+ int msg_bufsize;
+ int scratch_bufsize;
+};
+
static void scrub_free_csums(struct scrub_dev *sdev)
{
while (!list_empty(&sdev->csum_list)) {
@@ -201,6 +216,122 @@ nomem:
return ERR_PTR(-ENOMEM);
}
+static int scrub_print_warning_inode(u64 inum, loff_t offset, void *ctx)
+{
+ u64 isize;
+ int ret;
+ struct extent_buffer *eb;
+ struct btrfs_inode_item *inode_item;
+ struct scrub_warning *swarn = ctx;
+ struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
+ struct inode_fs_paths ipath;
+
+ ret = inode_item_info(inum, 0, fs_info->fs_root, swarn->path);
+ if (ret)
+ goto err;
+ eb = swarn->path->nodes[0];
+ inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_release_path(swarn->path);
+
+ isize = btrfs_inode_size(eb, inode_item);
+
+ ipath.bytes_left = swarn->msg_bufsize - 1;
+ ipath.dest = swarn->msg_buf;
+ ipath.path = swarn->path;
+ ipath.scratch_buf = swarn->scratch_buf;
+ ipath.scratch_bufsize = swarn->scratch_bufsize;
+ ipath.fs_root = fs_info->fs_root;
+ ipath.eb = eb;
+
+ ret = paths_from_inode(inum, &ipath);
+
+ if (ret >= 0) {
+ printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+ "%s, sector %llu, inode %llu, offset %llu, "
+ "length %llu, links %u (path:%s)\n", swarn->errstr,
+ swarn->logical, swarn->dev->name,
+ (unsigned long long)swarn->sector, inum, offset,
+ min(isize - offset, (u64)PAGE_SIZE),
+ btrfs_inode_nlink(eb, inode_item), swarn->msg_buf);
+ } else {
+err:
+ printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+ "%s, sector %llu, inode %llu, offset %llu: path "
+ "resolving failed with ret=%d\n", swarn->errstr,
+ swarn->logical, swarn->dev->name,
+ (unsigned long long)swarn->sector, inum, offset, ret);
+ }
+
+ return 0;
+}
+
+static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
+ int ix)
+{
+ struct btrfs_device *dev = sbio->sdev->dev;
+ struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_key found_key;
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ struct scrub_warning swarn;
+ u32 item_size;
+ int ret;
+ u64 ref_root;
+ u8 ref_level;
+ unsigned long ptr = 0;
+ const int bufsize = 4096;
+ loff_t extent_item_offset;
+
+ path = btrfs_alloc_path();
+
+ swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
+ swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
+ swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
+ swarn.logical = sbio->logical + ix * PAGE_SIZE;
+ swarn.errstr = errstr;
+ swarn.dev = dev;
+ swarn.msg_bufsize = bufsize;
+ swarn.scratch_bufsize = bufsize;
+
+ if (!path || !swarn.scratch_buf || !swarn.msg_buf)
+ goto out;
+
+ ret = data_extent_from_logical(fs_info->extent_root,
+ swarn.logical, path, &found_key);
+ if (ret < 0)
+ goto out;
+
+ extent_item_offset = swarn.logical - found_key.objectid;
+ swarn.extent_item_size = found_key.offset;
+
+ eb = path->nodes[0];
+ ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
+ item_size = btrfs_item_size_nr(eb, path->slots[0]);
+
+ if (ret) {
+ ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
+ &ref_root, &ref_level);
+ printk(KERN_WARNING "%s at logical %llu on dev %s, "
+ "sector %llu: metadata %s (level %d) in tree %llu\n",
+ errstr, swarn.logical, dev->name,
+ (unsigned long long)swarn.sector,
+ ref_level ? "node" : "leaf", ret < 0 ? -1 : ref_level,
+ ret < 0 ? -1 : ref_root);
+ } else {
+ btrfs_release_path(path);
+ swarn.path = path;
+ iterate_extent_inodes(eb, ei, extent_item_offset, item_size,
+ scrub_print_warning_inode, &swarn);
+ }
+
+out:
+ btrfs_free_path(path);
+ kfree(swarn.scratch_buf);
+ kfree(swarn.msg_buf);
+}
+
/*
* scrub_recheck_error gets called when either verification of the page
* failed or the bio failed to read, e.g. with EIO. In the latter case,
@@ -211,6 +342,8 @@ static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
{
struct scrub_dev *sdev = sbio->sdev;
u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
+ static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
if (sbio->err) {
if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
@@ -218,6 +351,11 @@ static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
if (scrub_fixup_check(sbio, ix) == 0)
return 0;
}
+ if (__ratelimit(&_rs))
+ scrub_print_warning("i/o error", sbio, ix);
+ } else {
+ if (__ratelimit(&_rs))
+ scrub_print_warning("checksum error", sbio, ix);
}
spin_lock(&sdev->stat_lock);
@@ -332,9 +470,8 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
++sdev->stat.corrected_errors;
spin_unlock(&sdev->stat_lock);
- if (printk_ratelimit())
- printk(KERN_ERR "btrfs: fixed up at %llu\n",
- (unsigned long long)logical);
+ printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n",
+ (unsigned long long)logical);
return;
uncorrectable:
@@ -343,9 +480,8 @@ uncorrectable:
++sdev->stat.uncorrectable_errors;
spin_unlock(&sdev->stat_lock);
- if (printk_ratelimit())
- printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
- (unsigned long long)logical);
+ printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at "
+ "logical %llu\n", (unsigned long long)logical);
}
static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 4/7] scrub: bugfix: mirror_num off by one
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
` (2 preceding siblings ...)
2011-06-18 11:46 ` [PATCH v2 3/7] scrub: print paths of corrupted files Jan Schmidt
@ 2011-06-18 11:46 ` Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 5/7] add mirror_num to extent_read_full_page Jan Schmidt
` (2 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:46 UTC (permalink / raw)
To: chris.mason, linux-btrfs
Fix the mirror_num determination in scrub_stripe. The rest of the scrub code
did not use mirror_num for anything important and that error went unnoticed.
The nodatasum fixup patch of this set depends on a correct mirror_num.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/scrub.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 55b722c..519bf4b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -915,21 +915,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
offset = map->stripe_len * num;
increment = map->stripe_len * map->num_stripes;
- mirror_num = 0;
+ mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
int factor = map->num_stripes / map->sub_stripes;
offset = map->stripe_len * (num / map->sub_stripes);
increment = map->stripe_len * factor;
- mirror_num = num % map->sub_stripes;
+ mirror_num = num % map->sub_stripes + 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
increment = map->stripe_len;
- mirror_num = num % map->num_stripes;
+ mirror_num = num % map->num_stripes + 1;
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
increment = map->stripe_len;
- mirror_num = num % map->num_stripes;
+ mirror_num = num % map->num_stripes + 1;
} else {
increment = map->stripe_len;
- mirror_num = 0;
+ mirror_num = 1;
}
path = btrfs_alloc_path();
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 5/7] add mirror_num to extent_read_full_page
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
` (3 preceding siblings ...)
2011-06-18 11:46 ` [PATCH v2 4/7] scrub: bugfix: mirror_num off by one Jan Schmidt
@ 2011-06-18 11:46 ` Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 6/7] scrub: use int for mirror_num, not u64 Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 7/7] scrub: add fixup code for errors on nodatasum files Jan Schmidt
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:46 UTC (permalink / raw)
To: chris.mason, linux-btrfs
Currently, extent_read_full_page always assumes we are trying to read mirror
0, which generally is the best we can do. To add flexibility, pass it as a
parameter. This will be needed by scrub fixup code.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/extent_io.c | 6 +++---
fs/btrfs/extent_io.h | 2 +-
fs/btrfs/inode.c | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d36..daf1e47 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -874,7 +874,7 @@ static int btree_readpage(struct file *file, struct page *page)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_read_full_page(tree, page, btree_get_extent);
+ return extent_read_full_page(tree, page, btree_get_extent, 0);
}
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b181a94..b78f665 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2111,16 +2111,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent)
+ get_extent_t *get_extent, int mirror_num)
{
struct bio *bio = NULL;
unsigned long bio_flags = 0;
int ret;
- ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
+ ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
&bio_flags);
if (bio)
- ret = submit_one_bio(READ, bio, 0, bio_flags);
+ ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a..2fef77f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -177,7 +177,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent);
+ get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 113913a..938fc10 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6238,7 +6238,7 @@ int btrfs_readpage(struct file *file, struct page *page)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_read_full_page(tree, page, btrfs_get_extent);
+ return extent_read_full_page(tree, page, btrfs_get_extent, 0);
}
static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 6/7] scrub: use int for mirror_num, not u64
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
` (4 preceding siblings ...)
2011-06-18 11:46 ` [PATCH v2 5/7] add mirror_num to extent_read_full_page Jan Schmidt
@ 2011-06-18 11:46 ` Jan Schmidt
2011-06-18 11:46 ` [PATCH v2 7/7] scrub: add fixup code for errors on nodatasum files Jan Schmidt
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:46 UTC (permalink / raw)
To: chris.mason, linux-btrfs
the rest of the code uses int mirror_num, and so should scrub
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/scrub.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 519bf4b..de918ff 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -71,7 +71,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix);
struct scrub_page {
u64 flags; /* extent flags */
u64 generation;
- u64 mirror_num;
+ int mirror_num;
int have_csum;
u8 csum[BTRFS_CSUM_SIZE];
};
@@ -761,7 +761,7 @@ nomem:
}
static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, u64 mirror_num,
+ u64 physical, u64 flags, u64 gen, int mirror_num,
u8 *csum, int force)
{
struct scrub_bio *sbio;
@@ -858,7 +858,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
/* scrub extent tries to collect up to 64 kB for each bio */
static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, u64 mirror_num)
+ u64 physical, u64 flags, u64 gen, int mirror_num)
{
int ret;
u8 csum[BTRFS_CSUM_SIZE];
@@ -904,7 +904,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
u64 physical;
u64 logical;
u64 generation;
- u64 mirror_num;
+ int mirror_num;
u64 increment = map->stripe_len;
u64 offset;
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 7/7] scrub: add fixup code for errors on nodatasum files
2011-06-18 11:45 [PATCH v2 0/7] Btrfs: scrub: print path to corrupted files and trigger nodatasum fixup Jan Schmidt
` (5 preceding siblings ...)
2011-06-18 11:46 ` [PATCH v2 6/7] scrub: use int for mirror_num, not u64 Jan Schmidt
@ 2011-06-18 11:46 ` Jan Schmidt
6 siblings, 0 replies; 11+ messages in thread
From: Jan Schmidt @ 2011-06-18 11:46 UTC (permalink / raw)
To: chris.mason, linux-btrfs
This removes a FIXME comment and introduces the first part of nodatasum
fixup: It gets the corresponding inode for a logical address and triggers a
regular readpage for the corrupted sector.
Once we have on-the-fly error correction our error will be automatically
corrected. The correction code is expected to clear the newly introduced
EXTENT_DAMAGED flag, making scrub report that error as "corrected" instead
of "uncorrectable" eventually.
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
fs/btrfs/extent_io.h | 1 +
fs/btrfs/scrub.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 178 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2fef77f..906ea42 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -17,6 +17,7 @@
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
+#define EXTENT_DAMAGED (1 << 13)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index de918ff..211606f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -28,6 +28,7 @@
#include "volumes.h"
#include "disk-io.h"
#include "ordered-data.h"
+#include "transaction.h"
#include "backref.h"
/*
@@ -95,6 +96,7 @@ struct scrub_dev {
int first_free;
int curr;
atomic_t in_flight;
+ atomic_t fixup_cnt;
spinlock_t list_lock;
wait_queue_head_t list_wait;
u16 csum_size;
@@ -108,6 +110,14 @@ struct scrub_dev {
spinlock_t stat_lock;
};
+struct scrub_fixup_nodatasum {
+ struct scrub_dev *sdev;
+ u64 logical;
+ struct btrfs_root *root;
+ struct btrfs_work work;
+ int mirror_num;
+};
+
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
@@ -196,12 +206,13 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
if (i != SCRUB_BIOS_PER_DEV-1)
sdev->bios[i]->next_free = i + 1;
- else
+ else
sdev->bios[i]->next_free = -1;
}
sdev->first_free = 0;
sdev->curr = -1;
atomic_set(&sdev->in_flight, 0);
+ atomic_set(&sdev->fixup_cnt, 0);
atomic_set(&sdev->cancel_req, 0);
sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
INIT_LIST_HEAD(&sdev->csum_list);
@@ -216,7 +227,7 @@ nomem:
return ERR_PTR(-ENOMEM);
}
-static int scrub_print_warning_inode(u64 inum, loff_t offset, void *ctx)
+static int scrub_print_warning_inode(u64 inum, u64 offset, void *ctx)
{
u64 isize;
int ret;
@@ -282,7 +293,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
u8 ref_level;
unsigned long ptr = 0;
const int bufsize = 4096;
- loff_t extent_item_offset;
+ u64 extent_item_offset;
path = btrfs_alloc_path();
@@ -332,6 +343,144 @@ out:
kfree(swarn.msg_buf);
}
+static int scrub_fixup_readpage(u64 inum, u64 offset, void *ctx)
+{
+ struct page *page;
+ unsigned long index;
+ struct scrub_fixup_nodatasum *fixup = ctx;
+ int ret;
+ int corrected;
+ struct btrfs_key key;
+ struct inode *inode;
+ u64 end = offset + PAGE_SIZE - 1;
+
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.objectid = inum;
+ key.offset = 0;
+ inode = btrfs_iget(fixup->root->fs_info->sb, &key,
+ fixup->root->fs_info->fs_root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, offset, end,
+ EXTENT_DAMAGED, 0, NULL, NULL, GFP_NOFS);
+
+ /* set_extent_bit should either succeed or give proper error */
+ WARN_ON(ret > 0);
+ if (ret)
+ return ret < 0 ? ret : -EFAULT;
+
+ index = offset >> PAGE_CACHE_SHIFT;
+
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+
+ ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
+ btrfs_get_extent, fixup->mirror_num);
+ wait_on_page_locked(page);
+ corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, end,
+ EXTENT_DAMAGED, 0, NULL);
+
+ if (corrected)
+ WARN_ON(!PageUptodate(page));
+ else
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, end,
+ EXTENT_DAMAGED, 0, 0, NULL, GFP_NOFS);
+
+ put_page(page);
+ iput(inode);
+
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0 && corrected) {
+ /*
+ * we only need to call readpage for one of the inodes belonging
+ * to this extent. so make iterate_extent_inodes stop
+ */
+ return 1;
+ }
+
+ return -EIO;
+}
+
+static void scrub_fixup_nodatasum(struct btrfs_work *work)
+{
+ int ret;
+ struct scrub_fixup_nodatasum *fixup;
+ struct scrub_dev *sdev;
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_path *path;
+ int uncorrectable = 0;
+
+ fixup = container_of(work, struct scrub_fixup_nodatasum, work);
+ sdev = fixup->sdev;
+ fs_info = fixup->root->fs_info;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.malloc_errors;
+ spin_unlock(&sdev->stat_lock);
+ uncorrectable = 1;
+ goto out;
+ }
+
+ trans = btrfs_join_transaction(fixup->root);
+ if (IS_ERR(trans)) {
+ uncorrectable = 1;
+ goto out;
+ }
+
+ /*
+ * the idea is to trigger a regular read through the standard path. we
+ * read a page from the (failed) logical address by specifying the
+ * corresponding copynum of the failed sector. thus, that readpage is
+ * expected to fail.
+ * that is the point where on-the-fly error correction will kick in
+ * (once it's finished) and rewrite the failed sector if a good copy
+ * can be found.
+ */
+ ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
+ path, scrub_fixup_readpage,
+ fixup);
+ if (ret < 0) {
+ uncorrectable = 1;
+ goto out;
+ }
+ WARN_ON(ret != 1);
+
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.corrected_errors;
+ spin_unlock(&sdev->stat_lock);
+
+out:
+ if (trans && !IS_ERR(trans))
+ btrfs_end_transaction(trans, fixup->root);
+ if (uncorrectable) {
+ spin_lock(&sdev->stat_lock);
+ ++sdev->stat.uncorrectable_errors;
+ spin_unlock(&sdev->stat_lock);
+ printk_ratelimited(KERN_ERR "btrfs: unable to fixup "
+ "(nodatasum) error at logical %llu\n",
+ fixup->logical);
+ }
+
+ btrfs_free_path(path);
+ kfree(fixup);
+
+ /* see caller why we're pretending to be paused in the scrub counters */
+ mutex_lock(&fs_info->scrub_lock);
+ atomic_dec(&fs_info->scrubs_running);
+ atomic_dec(&fs_info->scrubs_paused);
+ mutex_unlock(&fs_info->scrub_lock);
+ atomic_dec(&sdev->fixup_cnt);
+ wake_up(&fs_info->scrub_pause_wait);
+ wake_up(&sdev->list_wait);
+}
+
/*
* scrub_recheck_error gets called when either verification of the page
* failed or the bio failed to read, e.g. with EIO. In the latter case,
@@ -402,6 +551,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_multi_bio *multi = NULL;
+ struct scrub_fixup_nodatasum *fixup;
u64 logical = sbio->logical + ix * PAGE_SIZE;
u64 length;
int i;
@@ -410,12 +560,30 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
(sbio->spag[ix].have_csum == 0)) {
+ fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
+ if (!fixup)
+ goto uncorrectable;
+ fixup->sdev = sdev;
+ fixup->logical = logical;
+ fixup->root = fs_info->extent_root;
+ fixup->mirror_num = sbio->spag[ix].mirror_num;
/*
- * nodatasum, don't try to fix anything
- * FIXME: we can do better, open the inode and trigger a
- * writeback
+ * increment scrubs_running to prevent cancel requests from
+ * completing as long as a fixup worker is running. we must also
+ * increment scrubs_paused to prevent deadlocking on pause
+ * requests used for transactions commits (as the worker uses a
+ * transaction context). it is safe to regard the fixup worker
+ * as paused for all matters practical. effectively, we only
+ * avoid cancellation requests from completing.
*/
- goto uncorrectable;
+ mutex_lock(&fs_info->scrub_lock);
+ atomic_inc(&fs_info->scrubs_running);
+ atomic_inc(&fs_info->scrubs_paused);
+ mutex_unlock(&fs_info->scrub_lock);
+ atomic_inc(&sdev->fixup_cnt);
+ fixup->work.func = scrub_fixup_nodatasum;
+ btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
+ return;
}
length = PAGE_SIZE;
@@ -1405,7 +1573,8 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
if (!ret)
ret = scrub_enumerate_chunks(sdev, start, end);
- wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
+ wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0 &&
+ atomic_read(&sdev->fixup_cnt) == 0);
atomic_dec(&fs_info->scrubs_running);
wake_up(&fs_info->scrub_pause_wait);
--
1.7.3.4
^ permalink raw reply related [flat|nested] 11+ messages in thread