From: jeffm@suse.com
To: linux-btrfs@vger.kernel.org
Cc: Jeff Mahoney <jeffm@suse.com>
Subject: [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree
Date: Tue, 25 Jul 2017 16:51:32 -0400 [thread overview]
Message-ID: <20170725205138.28376-1-jeffm@suse.com> (raw)
From: Jeff Mahoney <jeffm@suse.com>
For the pathlogical case, like xfstests generic/297 that creates a
large file consisting of one, repeating reflinked extent, fsck can
take hours. The root cause is that calling find_data_backref while
iterating the extent records is an O(n^2) algorithm. For my
example test run, n was 2*2^20 and fsck was at 8 hours and counting.
This patch supplements the list with an rbtree and drops the runtime
of that testcase to about 20 seconds.
A previous version of this patch introduced a regression that would
have corrupted file systems during repair. It was traced to the
compare algorithm honoring ->bytes regardless of whether the
reference had been found and a failure to reinsert nodes after
the target reference was found.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
cmds-check.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 180 insertions(+), 4 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index 23adc03..6a04553 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -87,6 +87,7 @@ static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
struct extent_backref {
struct list_head list;
+ struct rb_node node;
unsigned int is_data:1;
unsigned int found_extent_tree:1;
unsigned int full_backref:1;
@@ -99,6 +100,11 @@ static inline struct extent_backref* to_extent_backref(struct list_head *entry)
return list_entry(entry, struct extent_backref, list);
}
+static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
+{
+ return rb_entry(node, struct extent_backref, node);
+}
+
struct data_backref {
struct extent_backref node;
union {
@@ -137,6 +143,51 @@ static inline struct data_backref* to_data_backref(struct extent_backref *back)
return container_of(back, struct data_backref, node);
}
+static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct data_backref *back1 = to_data_backref(ext1);
+ struct data_backref *back2 = to_data_backref(ext2);
+
+ WARN_ON(!ext1->is_data);
+ WARN_ON(!ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ /* This is a full backref and the parents match. */
+ if (back1->node.full_backref)
+ return 0;
+
+ if (back1->owner > back2->owner)
+ return 1;
+ if (back1->owner < back2->owner)
+ return -1;
+
+ if (back1->offset > back2->offset)
+ return 1;
+ if (back1->offset < back2->offset)
+ return -1;
+
+ if (back1->found_ref && back2->found_ref) {
+ if (back1->disk_bytenr > back2->disk_bytenr)
+ return 1;
+ if (back1->disk_bytenr < back2->disk_bytenr)
+ return -1;
+
+ if (back1->bytes > back2->bytes)
+ return 1;
+ if (back1->bytes < back2->bytes)
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* Much like data_backref, just removed the undetermined members
* and change it to use list_head.
@@ -165,12 +216,54 @@ static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
return container_of(back, struct tree_backref, node);
}
+static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct tree_backref *back1 = to_tree_backref(ext1);
+ struct tree_backref *back2 = to_tree_backref(ext2);
+
+ WARN_ON(ext1->is_data);
+ WARN_ON(ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ return 0;
+}
+
+static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+
+ if (ext1->is_data > ext2->is_data)
+ return 1;
+
+ if (ext1->is_data < ext2->is_data)
+ return -1;
+
+ if (ext1->full_backref > ext2->full_backref)
+ return 1;
+ if (ext1->full_backref < ext2->full_backref)
+ return -1;
+
+ if (ext1->is_data)
+ return compare_data_backref(node1, node2);
+ else
+ return compare_tree_backref(node1, node2);
+}
+
/* Explicit initialization for extent_record::flag_block_full_backref */
enum { FLAG_UNSET = 2 };
struct extent_record {
struct list_head backrefs;
struct list_head dups;
+ struct rb_root backref_tree;
struct list_head list;
struct cache_extent cache;
struct btrfs_disk_key parent_key;
@@ -5051,6 +5144,65 @@ out:
return ret;
}
+static struct tree_backref *find_tree_backref(struct extent_record *rec,
+ u64 parent, u64 root)
+{
+ struct rb_node *node;
+ struct tree_backref *back = NULL;
+ struct tree_backref match = {
+ .node = {
+ .is_data = 0,
+ },
+ };
+
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else {
+ match.root = root;
+ }
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_tree_backref(rb_node_to_extent_backref(node));
+
+ return back;
+}
+
+static struct data_backref *find_data_backref(struct extent_record *rec,
+ u64 parent, u64 root,
+ u64 owner, u64 offset,
+ int found_ref,
+ u64 disk_bytenr, u64 bytes)
+{
+ struct rb_node *node;
+ struct data_backref *back = NULL;
+ struct data_backref match = {
+ .node = {
+ .is_data = 1,
+ },
+ .owner = owner,
+ .offset = offset,
+ .bytes = bytes,
+ .found_ref = found_ref,
+ .disk_bytenr = disk_bytenr,
+ };
+
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else {
+ match.root = root;
+ }
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_data_backref(rb_node_to_extent_backref(node));
+
+ return back;
+}
/*
* Iterate all item on the tree and call check_inode_item() to check.
*
@@ -5316,7 +5468,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
goto out;
if (back->is_data) {
dback = to_data_backref(back);
- fprintf(stderr, "Backref %llu %s %llu"
+ fprintf(stderr, "Data backref %llu %s %llu"
" owner %llu offset %llu num_refs %lu"
" not found in extent tree\n",
(unsigned long long)rec->start,
@@ -5330,7 +5482,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
(unsigned long)dback->num_refs);
} else {
tback = to_tree_backref(back);
- fprintf(stderr, "Backref %llu parent %llu"
+ fprintf(stderr, "Tree backref %llu parent %llu"
" root %llu not found in extent tree\n",
(unsigned long long)rec->start,
(unsigned long long)tback->parent,
@@ -5888,6 +6040,7 @@ static int check_block(struct btrfs_root *root,
return ret;
}
+#if 0
static struct tree_backref *find_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
{
@@ -5915,6 +6068,7 @@ static struct tree_backref *find_tree_backref(struct extent_record *rec,
}
return NULL;
}
+#endif
static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
@@ -5936,6 +6090,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
return ref;
}
+#if 0
static struct data_backref *find_data_backref(struct extent_record *rec,
u64 parent, u64 root,
u64 owner, u64 offset,
@@ -5972,6 +6127,7 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
}
return NULL;
}
+#endif
static struct data_backref *alloc_data_backref(struct extent_record *rec,
u64 parent, u64 root,
@@ -6088,6 +6244,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->dups);
INIT_LIST_HEAD(&rec->list);
+ rec->backref_tree = RB_ROOT;
memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
rec->cache.start = tmpl->start;
rec->cache.size = tmpl->nr;
@@ -6220,6 +6377,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
struct tree_backref *back;
struct cache_extent *cache;
int ret;
+ int insert = false;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache) {
@@ -6254,6 +6412,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
back = alloc_tree_backref(rec, parent, root);
if (!back)
return -ENOMEM;
+ insert = true;
}
if (found_ref) {
@@ -6275,6 +6434,9 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
}
back->node.found_extent_tree = 1;
}
+ if (insert)
+ WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
+ compare_extent_backref));
check_extent_type(rec);
maybe_free_extent_rec(extent_cache, rec);
return 0;
@@ -6288,6 +6450,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
struct data_backref *back;
struct cache_extent *cache;
int ret;
+ int insert = false;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache) {
@@ -6327,6 +6490,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
back = alloc_data_backref(rec, parent, root, owner, offset,
max_size);
BUG_ON(!back);
+ insert = true;
}
if (found_ref) {
@@ -6335,8 +6499,16 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
BUG_ON(back->bytes != max_size);
back->node.found_ref = 1;
back->found_ref += 1;
- back->bytes = max_size;
- back->disk_bytenr = bytenr;
+ if (back->bytes != max_size || back->disk_bytenr != bytenr) {
+ back->bytes = max_size;
+ back->disk_bytenr = bytenr;
+
+ /* Need to reinsert if not already in the tree */
+ if (!insert) {
+ rb_erase(&back->node.node, &rec->backref_tree);
+ insert = true;
+ }
+ }
rec->refs += 1;
rec->content_checked = 1;
rec->owner_ref_checked = 1;
@@ -6355,6 +6527,10 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
back->num_refs = num_refs;
back->node.found_extent_tree = 1;
}
+ if (insert)
+ WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
+ compare_extent_backref));
+
maybe_free_extent_rec(extent_cache, rec);
return 0;
}
--
2.11.0
next reply other threads:[~2017-07-25 20:51 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-25 20:51 jeffm [this message]
2017-07-25 20:51 ` [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree jeffm
2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
2017-07-26 7:00 ` Nikolay Borisov
2017-07-26 13:21 ` Jeff Mahoney
2017-08-22 15:44 ` David Sterba
2017-07-25 20:51 ` [PATCH 4/7] btrfs-progs: backref: push state tracking into a helper structure jeffm
2017-07-25 20:51 ` [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper jeffm
2017-07-26 7:08 ` Nikolay Borisov
2017-07-26 13:22 ` Jeff Mahoney
2017-07-26 13:25 ` Jeff Mahoney
2017-07-25 20:51 ` [PATCH 6/7] btrfs-progs: backref: use separate list for missing keys jeffm
2017-07-25 20:51 ` [PATCH 7/7] btrfs-progs: backref: use separate list for indirect refs jeffm
2017-09-29 17:21 ` [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170725205138.28376-1-jeffm@suse.com \
--to=jeffm@suse.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).