From: Liu Bo <bo.li.liu@oracle.com>
To: Edmund Nadolski <enadolski@suse.com>
Cc: dsterba@suse.cz, jeffm@suse.com, linux-btrfs@vger.kernel.org,
lufq.fnst@cn.fujitsu.com
Subject: Re: [PATCH v3 08/13] btrfs: convert prelimary reference tracking to use rbtrees
Date: Fri, 28 Jul 2017 12:25:02 -0600 [thread overview]
Message-ID: <20170728182502.GC15815@localhost.localdomain> (raw)
In-Reply-To: <20170712222011.26705-2-enadolski@suse.com>
On Wed, Jul 12, 2017 at 04:20:06PM -0600, Edmund Nadolski wrote:
> It's been known for a while that the use of multiple lists
> that are periodically merged was an algorithmic problem within
> btrfs. There are several workloads that don't complete in any
> reasonable amount of time (e.g. btrfs/130) and others that cause
> soft lockups.
>
> The solution is to use a set of rbtrees that do insertion merging
> for both indirect and direct refs, with the former converting
> refs into the latter. The result is a btrfs/130 workload that
> used to take several hours now takes about half of that. This
> runtime still isn't acceptable and a future patch will address that
> by moving the rbtrees higher in the stack so the lookups can be
> shared across multiple calls to find_parent_nodes.
>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Thanks,
-liubo
> Signed-off-by: Edmund Nadolski <enadolski@suse.com>
> Signed-off-by: Jeff Mahoney <jeffm@suse.com>
> ---
> fs/btrfs/backref.c | 441 ++++++++++++++++++++++++++++++++++-------------------
> 1 file changed, 284 insertions(+), 157 deletions(-)
>
> diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
> index 6cac5ab..1edb107 100644
> --- a/fs/btrfs/backref.c
> +++ b/fs/btrfs/backref.c
> @@ -26,11 +26,6 @@
> #include "delayed-ref.h"
> #include "locking.h"
>
> -enum merge_mode {
> - MERGE_IDENTICAL_KEYS = 1,
> - MERGE_IDENTICAL_PARENTS,
> -};
> -
> /* Just an arbitrary number so we can be sure this happened */
> #define BACKREF_FOUND_SHARED 6
>
> @@ -129,7 +124,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
> * this structure records all encountered refs on the way up to the root
> */
> struct prelim_ref {
> - struct list_head list;
> + struct rb_node rbnode;
> u64 root_id;
> struct btrfs_key key_for_search;
> int level;
> @@ -139,6 +134,18 @@ struct prelim_ref {
> u64 wanted_disk_byte;
> };
>
> +struct preftree {
> + struct rb_root root;
> +};
> +
> +#define PREFTREE_INIT { .root = RB_ROOT }
> +
> +struct preftrees {
> + struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
> + struct preftree indirect; /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */
> + struct preftree indirect_missing_keys;
> +};
> +
> static struct kmem_cache *btrfs_prelim_ref_cache;
>
> int __init btrfs_prelim_ref_init(void)
> @@ -158,6 +165,108 @@ void btrfs_prelim_ref_exit(void)
> kmem_cache_destroy(btrfs_prelim_ref_cache);
> }
>
> +static void free_pref(struct prelim_ref *ref)
> +{
> + kmem_cache_free(btrfs_prelim_ref_cache, ref);
> +}
> +
> +/*
> + * Return 0 when both refs are for the same block (and can be merged).
> + * A -1 return indicates ref1 is a 'lower' block than ref2, while 1
> + * indicates a 'higher' block.
> + */
> +static int prelim_ref_compare(struct prelim_ref *ref1,
> + struct prelim_ref *ref2)
> +{
> + if (ref1->level < ref2->level)
> + return -1;
> + if (ref1->level > ref2->level)
> + return 1;
> + if (ref1->root_id < ref2->root_id)
> + return -1;
> + if (ref1->root_id > ref2->root_id)
> + return 1;
> + if (ref1->key_for_search.type < ref2->key_for_search.type)
> + return -1;
> + if (ref1->key_for_search.type > ref2->key_for_search.type)
> + return 1;
> + if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
> + return -1;
> + if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
> + return 1;
> + if (ref1->key_for_search.offset < ref2->key_for_search.offset)
> + return -1;
> + if (ref1->key_for_search.offset > ref2->key_for_search.offset)
> + return 1;
> + if (ref1->parent < ref2->parent)
> + return -1;
> + if (ref1->parent > ref2->parent)
> + return 1;
> +
> + return 0;
> +}
> +
> +/*
> + * Add @newref to the @root rbtree, merging identical refs.
> + *
> + * Callers should assumed that newref has been freed after calling.
> + */
> +static void prelim_ref_insert(struct preftree *preftree,
> + struct prelim_ref *newref)
> +{
> + struct rb_root *root;
> + struct rb_node **p;
> + struct rb_node *parent = NULL;
> + struct prelim_ref *ref;
> + int result;
> +
> + root = &preftree->root;
> + p = &root->rb_node;
> +
> + while (*p) {
> + parent = *p;
> + ref = rb_entry(parent, struct prelim_ref, rbnode);
> + result = prelim_ref_compare(ref, newref);
> + if (result < 0) {
> + p = &(*p)->rb_left;
> + } else if (result > 0) {
> + p = &(*p)->rb_right;
> + } else {
> + /* Identical refs, merge them and free @newref */
> + struct extent_inode_elem *eie = ref->inode_list;
> +
> + while (eie && eie->next)
> + eie = eie->next;
> +
> + if (!eie)
> + ref->inode_list = newref->inode_list;
> + else
> + eie->next = newref->inode_list;
> + ref->count += newref->count;
> + free_pref(newref);
> + return;
> + }
> + }
> +
> + rb_link_node(&newref->rbnode, parent, p);
> + rb_insert_color(&newref->rbnode, root);
> +}
> +
> +/*
> + * Release the entire tree. We don't care about internal consistency so
> + * just free everything and then reset the tree root.
> + */
> +static void prelim_release(struct preftree *preftree)
> +{
> + struct prelim_ref *ref, *next_ref;
> +
> + rbtree_postorder_for_each_entry_safe(ref, next_ref, &preftree->root,
> + rbnode)
> + free_pref(ref);
> +
> + preftree->root = RB_ROOT;
> +}
> +
> /*
> * the rules for all callers of this function are:
> * - obtaining the parent is the goal
> @@ -196,7 +305,7 @@ void btrfs_prelim_ref_exit(void)
> * additional information that's available but not required to find the parent
> * block might help in merging entries to gain some speed.
> */
> -static int add_prelim_ref(struct list_head *head, u64 root_id,
> +static int add_prelim_ref(struct preftree *preftree, u64 root_id,
> const struct btrfs_key *key, int level, u64 parent,
> u64 wanted_disk_byte, int count, gfp_t gfp_mask)
> {
> @@ -243,11 +352,31 @@ static int add_prelim_ref(struct list_head *head, u64 root_id,
> ref->count = count;
> ref->parent = parent;
> ref->wanted_disk_byte = wanted_disk_byte;
> - list_add_tail(&ref->list, head);
> + prelim_ref_insert(preftree, ref);
>
> return 0;
> }
>
> +/* direct refs use root == 0, key == NULL */
> +static int add_direct_ref(struct preftrees *preftrees, int level, u64 parent,
> + u64 wanted_disk_byte, int count, gfp_t gfp_mask)
> +{
> + return add_prelim_ref(&preftrees->direct, 0, NULL, level, parent,
> + wanted_disk_byte, count, gfp_mask);
> +}
> +
> +/* indirect refs use parent == 0 */
> +static int add_indirect_ref(struct preftrees *preftrees, u64 root_id,
> + const struct btrfs_key *key, int level,
> + u64 wanted_disk_byte, int count, gfp_t gfp_mask)
> +{
> + struct preftree *tree = &preftrees->indirect;
> + if (!key)
> + tree = &preftrees->indirect_missing_keys;
> + return add_prelim_ref(tree, root_id, key, level, 0,
> + wanted_disk_byte, count, gfp_mask);
> +}
> +
> static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
> struct ulist *parents, struct prelim_ref *ref,
> int level, u64 time_seq, const u64 *extent_item_pos,
> @@ -429,38 +558,63 @@ unode_aux_to_inode_list(struct ulist_node *node)
> }
>
> /*
> - * resolve all indirect backrefs from the list
> + * We maintain three seperate rbtrees: one for direct refs, one for
> + * indirect refs which have a key, and one for indirect refs which do not
> + * have a key. Each tree does merge on insertion.
> + *
> + * Once all of the references are located, we iterate over the tree of
> + * indirect refs with missing keys. An appropriate key is located and
> + * the ref is moved onto the tree for indirect refs. After all missing
> + * keys are thus located, we iterate over the indirect ref tree, resolve
> + * each reference, and then insert the resolved reference onto the
> + * direct tree (merging there too).
> + *
> + * New backrefs (i.e., for parent nodes) are added to the appropriate
> + * rbtree as they are encountered. The new backrefs are subsequently
> + * resolved as above.
> */
> static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
> struct btrfs_path *path, u64 time_seq,
> - struct list_head *head,
> + struct preftrees *preftrees,
> const u64 *extent_item_pos, u64 total_refs,
> u64 root_objectid)
> {
> int err;
> int ret = 0;
> - struct prelim_ref *ref;
> - struct prelim_ref *ref_safe;
> - struct prelim_ref *new_ref;
> struct ulist *parents;
> struct ulist_node *node;
> struct ulist_iterator uiter;
> + struct rb_node *rnode;
>
> parents = ulist_alloc(GFP_NOFS);
> if (!parents)
> return -ENOMEM;
>
> /*
> - * _safe allows us to insert directly after the current item without
> - * iterating over the newly inserted items.
> - * we're also allowed to re-assign ref during iteration.
> + * We could trade memory usage for performance here by iterating
> + * the tree, allocating new refs for each insertion, and then
> + * freeing the entire indirect tree when we're done. In some test
> + * cases, the tree can grow quite large (~200k objects).
> */
> - list_for_each_entry_safe(ref, ref_safe, head, list) {
> - if (ref->parent) /* already direct */
> - continue;
> - if (ref->count == 0)
> + while ((rnode = rb_first(&preftrees->indirect.root))) {
> + struct prelim_ref *ref;
> +
> + ref = rb_entry(rnode, struct prelim_ref, rbnode);
> + if (WARN(ref->parent,
> + "BUG: direct ref found in indirect tree")) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + rb_erase(&ref->rbnode, &preftrees->indirect.root);
> +
> + if (ref->count == 0) {
> + free_pref(ref);
> continue;
> + }
> +
> if (root_objectid && ref->root_id != root_objectid) {
> + free_pref(ref);
> ret = BACKREF_FOUND_SHARED;
> goto out;
> }
> @@ -472,8 +626,10 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
> * and return directly.
> */
> if (err == -ENOENT) {
> + prelim_ref_insert(&preftrees->direct, ref);
> continue;
> } else if (err) {
> + free_pref(ref);
> ret = err;
> goto out;
> }
> @@ -484,19 +640,26 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
> ref->parent = node ? node->val : 0;
> ref->inode_list = unode_aux_to_inode_list(node);
>
> - /* additional parents require new refs being added here */
> + /* Add a prelim_ref(s) for any other parent(s). */
> while ((node = ulist_next(parents, &uiter))) {
> + struct prelim_ref *new_ref;
> +
> new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
> GFP_NOFS);
> if (!new_ref) {
> + free_pref(ref);
> ret = -ENOMEM;
> goto out;
> }
> memcpy(new_ref, ref, sizeof(*ref));
> new_ref->parent = node->val;
> new_ref->inode_list = unode_aux_to_inode_list(node);
> - list_add(&new_ref->list, &ref->list);
> + prelim_ref_insert(&preftrees->direct, new_ref);
> }
> +
> + /* Now it's a direct ref, put it in the the direct tree */
> + prelim_ref_insert(&preftrees->direct, ref);
> +
> ulist_reinit(parents);
> }
> out:
> @@ -504,44 +667,31 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
> return ret;
> }
>
> -static inline int ref_for_same_block(struct prelim_ref *ref1,
> - struct prelim_ref *ref2)
> -{
> - if (ref1->level != ref2->level)
> - return 0;
> - if (ref1->root_id != ref2->root_id)
> - return 0;
> - if (ref1->key_for_search.type != ref2->key_for_search.type)
> - return 0;
> - if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
> - return 0;
> - if (ref1->key_for_search.offset != ref2->key_for_search.offset)
> - return 0;
> - if (ref1->parent != ref2->parent)
> - return 0;
> -
> - return 1;
> -}
> -
> /*
> * read tree blocks and add keys where required.
> */
> static int add_missing_keys(struct btrfs_fs_info *fs_info,
> - struct list_head *head)
> + struct preftrees *preftrees)
> {
> struct prelim_ref *ref;
> struct extent_buffer *eb;
> + struct preftree *tree = &preftrees->indirect_missing_keys;
> + struct rb_node *node;
>
> - list_for_each_entry(ref, head, list) {
> - if (ref->parent)
> - continue;
> - if (ref->key_for_search.type)
> - continue;
> + while ((node = rb_first(&tree->root))) {
> + ref = rb_entry(node, struct prelim_ref, rbnode);
> + rb_erase(node, &tree->root);
> +
> + BUG_ON(ref->parent); /* should not be a direct ref */
> + BUG_ON(ref->key_for_search.type);
> BUG_ON(!ref->wanted_disk_byte);
> +
> eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
> if (IS_ERR(eb)) {
> + free_pref(ref);
> return PTR_ERR(eb);
> } else if (!extent_buffer_uptodate(eb)) {
> + free_pref(ref);
> free_extent_buffer(eb);
> return -EIO;
> }
> @@ -552,73 +702,31 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
> btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
> btrfs_tree_read_unlock(eb);
> free_extent_buffer(eb);
> + prelim_ref_insert(&preftrees->indirect, ref);
> }
> return 0;
> }
>
> /*
> - * merge backrefs and adjust counts accordingly
> - *
> - * FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in add_prelim_ref
> - * then we can merge more here. Additionally, we could even add a key
> - * range for the blocks we looked into to merge even more (-> replace
> - * unresolved refs by those having a parent).
> - */
> -static void merge_refs(struct list_head *head, enum merge_mode mode)
> -{
> - struct prelim_ref *pos1;
> -
> - list_for_each_entry(pos1, head, list) {
> - struct prelim_ref *pos2 = pos1, *tmp;
> -
> - list_for_each_entry_safe_continue(pos2, tmp, head, list) {
> - struct prelim_ref *ref1 = pos1, *ref2 = pos2;
> - struct extent_inode_elem *eie;
> -
> - if (!ref_for_same_block(ref1, ref2))
> - continue;
> - if (mode == MERGE_IDENTICAL_KEYS) {
> - if (!ref1->parent && ref2->parent)
> - swap(ref1, ref2);
> - } else {
> - if (ref1->parent != ref2->parent)
> - continue;
> - }
> -
> - eie = ref1->inode_list;
> - while (eie && eie->next)
> - eie = eie->next;
> - if (eie)
> - eie->next = ref2->inode_list;
> - else
> - ref1->inode_list = ref2->inode_list;
> - ref1->count += ref2->count;
> -
> - list_del(&ref2->list);
> - kmem_cache_free(btrfs_prelim_ref_cache, ref2);
> - cond_resched();
> - }
> -
> - }
> -}
> -
> -/*
> * add all currently queued delayed refs from this head whose seq nr is
> * smaller or equal that seq to the list
> */
> static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
> - struct list_head *prefs, u64 *total_refs,
> + struct preftrees *preftrees, u64 *total_refs,
> u64 inum)
> {
> struct btrfs_delayed_ref_node *node;
> struct btrfs_delayed_extent_op *extent_op = head->extent_op;
> struct btrfs_key key;
> - struct btrfs_key op_key = {0};
> + struct btrfs_key tmp_op_key;
> + struct btrfs_key *op_key = NULL;
> int sgn;
> int ret = 0;
>
> - if (extent_op && extent_op->update_key)
> - btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
> + if (extent_op && extent_op->update_key) {
> + btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
> + op_key = &tmp_op_key;
> + }
>
> spin_lock(&head->lock);
> list_for_each_entry(node, &head->ref_list, list) {
> @@ -642,24 +750,30 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
> *total_refs += (node->ref_mod * sgn);
> switch (node->type) {
> case BTRFS_TREE_BLOCK_REF_KEY: {
> + /* NORMAL INDIRECT METADATA backref */
> struct btrfs_delayed_tree_ref *ref;
>
> ref = btrfs_delayed_node_to_tree_ref(node);
> - ret = add_prelim_ref(prefs, ref->root, &op_key,
> - ref->level + 1, 0, node->bytenr,
> - node->ref_mod * sgn, GFP_ATOMIC);
> + ret = add_indirect_ref(preftrees, ref->root, &tmp_op_key,
> + ref->level + 1, node->bytenr,
> + node->ref_mod * sgn,
> + GFP_ATOMIC);
> break;
> }
> case BTRFS_SHARED_BLOCK_REF_KEY: {
> + /* SHARED DIRECT METADATA backref */
> struct btrfs_delayed_tree_ref *ref;
>
> ref = btrfs_delayed_node_to_tree_ref(node);
> - ret = add_prelim_ref(prefs, 0, NULL, ref->level + 1,
> +
> + ret = add_direct_ref(preftrees, ref->level + 1,
> ref->parent, node->bytenr,
> - node->ref_mod * sgn, GFP_ATOMIC);
> + node->ref_mod * sgn,
> + GFP_ATOMIC);
> break;
> }
> case BTRFS_EXTENT_DATA_REF_KEY: {
> + /* NORMAL INDIRECT DATA backref */
> struct btrfs_delayed_data_ref *ref;
> ref = btrfs_delayed_node_to_data_ref(node);
>
> @@ -676,17 +790,21 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
> break;
> }
>
> - ret = add_prelim_ref(prefs, ref->root, &key, 0, 0,
> - node->bytenr, node->ref_mod * sgn,
> - GFP_ATOMIC);
> + ret = add_indirect_ref(preftrees, ref->root, &key, 0,
> + node->bytenr,
> + node->ref_mod * sgn,
> + GFP_ATOMIC);
> break;
> }
> case BTRFS_SHARED_DATA_REF_KEY: {
> + /* SHARED DIRECT FULL backref */
> struct btrfs_delayed_data_ref *ref;
>
> ref = btrfs_delayed_node_to_data_ref(node);
> - ret = add_prelim_ref(prefs, 0, NULL, 0, ref->parent,
> - node->bytenr, node->ref_mod * sgn,
> +
> + ret = add_direct_ref(preftrees, 0, ref->parent,
> + node->bytenr,
> + node->ref_mod * sgn,
> GFP_ATOMIC);
> break;
> }
> @@ -704,7 +822,7 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
> * add all inline backrefs for bytenr to the list
> */
> static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
> - int *info_level, struct list_head *prefs,
> + int *info_level, struct preftrees *preftrees,
> u64 *total_refs, u64 inum)
> {
> int ret = 0;
> @@ -760,8 +878,8 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
>
> switch (type) {
> case BTRFS_SHARED_BLOCK_REF_KEY:
> - ret = add_prelim_ref(prefs, 0, NULL, *info_level + 1,
> - offset, bytenr, 1, GFP_NOFS);
> + ret = add_direct_ref(preftrees, *info_level + 1, offset,
> + bytenr, 1, GFP_NOFS);
> break;
> case BTRFS_SHARED_DATA_REF_KEY: {
> struct btrfs_shared_data_ref *sdref;
> @@ -769,14 +887,15 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
>
> sdref = (struct btrfs_shared_data_ref *)(iref + 1);
> count = btrfs_shared_data_ref_count(leaf, sdref);
> - ret = add_prelim_ref(prefs, 0, NULL, 0, offset,
> +
> + ret = add_direct_ref(preftrees, 0, offset,
> bytenr, count, GFP_NOFS);
> break;
> }
> case BTRFS_TREE_BLOCK_REF_KEY:
> - ret = add_prelim_ref(prefs, offset, NULL,
> - *info_level + 1, 0,
> - bytenr, 1, GFP_NOFS);
> + ret = add_indirect_ref(preftrees, offset, NULL,
> + *info_level + 1, bytenr, 1,
> + GFP_NOFS);
> break;
> case BTRFS_EXTENT_DATA_REF_KEY: {
> struct btrfs_extent_data_ref *dref;
> @@ -796,8 +915,9 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
> }
>
> root = btrfs_extent_data_ref_root(leaf, dref);
> - ret = add_prelim_ref(prefs, root, &key, 0, 0,
> - bytenr, count, GFP_NOFS);
> +
> + ret = add_indirect_ref(preftrees, root, &key, 0, bytenr,
> + count, GFP_NOFS);
> break;
> }
> default:
> @@ -816,7 +936,8 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
> */
> static int add_keyed_refs(struct btrfs_fs_info *fs_info,
> struct btrfs_path *path, u64 bytenr,
> - int info_level, struct list_head *prefs, u64 inum)
> + int info_level, struct preftrees *preftrees,
> + u64 inum)
> {
> struct btrfs_root *extent_root = fs_info->extent_root;
> int ret;
> @@ -846,26 +967,31 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
>
> switch (key.type) {
> case BTRFS_SHARED_BLOCK_REF_KEY:
> - ret = add_prelim_ref(prefs, 0, NULL, info_level + 1,
> - key.offset, bytenr, 1, GFP_NOFS);
> + /* SHARED DIRECT METADATA backref */
> + ret = add_direct_ref(preftrees, info_level + 1,
> + key.offset, bytenr, 1,
> + GFP_NOFS);
> break;
> case BTRFS_SHARED_DATA_REF_KEY: {
> + /* SHARED DIRECT FULL backref */
> struct btrfs_shared_data_ref *sdref;
> int count;
>
> sdref = btrfs_item_ptr(leaf, slot,
> struct btrfs_shared_data_ref);
> count = btrfs_shared_data_ref_count(leaf, sdref);
> - ret = add_prelim_ref(prefs, 0, NULL, 0, key.offset,
> - bytenr, count, GFP_NOFS);
> + ret = add_direct_ref(preftrees, 0, key.offset, bytenr,
> + count, GFP_NOFS);
> break;
> }
> case BTRFS_TREE_BLOCK_REF_KEY:
> - ret = add_prelim_ref(prefs, key.offset, NULL,
> - info_level + 1, 0,
> - bytenr, 1, GFP_NOFS);
> + /* NORMAL INDIRECT METADATA backref */
> + ret = add_indirect_ref(preftrees, key.offset, NULL,
> + info_level + 1, bytenr, 1,
> + GFP_NOFS);
> break;
> case BTRFS_EXTENT_DATA_REF_KEY: {
> + /* NORMAL INDIRECT DATA backref */
> struct btrfs_extent_data_ref *dref;
> int count;
> u64 root;
> @@ -884,8 +1010,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
> }
>
> root = btrfs_extent_data_ref_root(leaf, dref);
> - ret = add_prelim_ref(prefs, root, &key, 0, 0,
> - bytenr, count, GFP_NOFS);
> + ret = add_indirect_ref(preftrees, root, &key, 0, bytenr,
> + count, GFP_NOFS);
> break;
> }
> default:
> @@ -926,14 +1052,16 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
> struct btrfs_delayed_ref_head *head;
> int info_level = 0;
> int ret;
> - struct list_head prefs_delayed;
> - struct list_head prefs;
> struct prelim_ref *ref;
> + struct rb_node *node;
> struct extent_inode_elem *eie = NULL;
> + /* total of both direct AND indirect refs! */
> u64 total_refs = 0;
> -
> - INIT_LIST_HEAD(&prefs);
> - INIT_LIST_HEAD(&prefs_delayed);
> + struct preftrees preftrees = {
> + .direct = PREFTREE_INIT,
> + .indirect = PREFTREE_INIT,
> + .indirect_missing_keys = PREFTREE_INIT
> + };
>
> key.objectid = bytenr;
> key.offset = (u64)-1;
> @@ -996,9 +1124,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
> goto again;
> }
> spin_unlock(&delayed_refs->lock);
> - ret = add_delayed_refs(head, time_seq,
> - &prefs_delayed, &total_refs,
> - inum);
> + ret = add_delayed_refs(head, time_seq, &preftrees,
> + &total_refs, inum);
> mutex_unlock(&head->mutex);
> if (ret)
> goto out;
> @@ -1019,35 +1146,43 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
> (key.type == BTRFS_EXTENT_ITEM_KEY ||
> key.type == BTRFS_METADATA_ITEM_KEY)) {
> ret = add_inline_refs(path, bytenr, &info_level,
> - &prefs, &total_refs, inum);
> + &preftrees, &total_refs, inum);
> if (ret)
> goto out;
> ret = add_keyed_refs(fs_info, path, bytenr, info_level,
> - &prefs, inum);
> + &preftrees, inum);
> if (ret)
> goto out;
> }
> }
> - btrfs_release_path(path);
>
> - list_splice_init(&prefs_delayed, &prefs);
> + btrfs_release_path(path);
>
> - ret = add_missing_keys(fs_info, &prefs);
> + ret = add_missing_keys(fs_info, &preftrees);
> if (ret)
> goto out;
>
> - merge_refs(&prefs, MERGE_IDENTICAL_KEYS);
> + WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
>
> - ret = resolve_indirect_refs(fs_info, path, time_seq, &prefs,
> + ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
> extent_item_pos, total_refs,
> root_objectid);
> if (ret)
> goto out;
>
> - merge_refs(&prefs, MERGE_IDENTICAL_PARENTS);
> + WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root));
>
> - while (!list_empty(&prefs)) {
> - ref = list_first_entry(&prefs, struct prelim_ref, list);
> + /*
> + * This walks the tree of merged and resolved refs. Tree blocks are
> + * read in as needed. Unique entries are added to the ulist, and
> + * the list of found roots is updated.
> + *
> + * We release the entire tree in one go before returning.
> + */
> + node = rb_first(&preftrees.direct.root);
> + while (node) {
> + ref = rb_entry(node, struct prelim_ref, rbnode);
> + node = rb_next(&ref->rbnode);
> WARN_ON(ref->count < 0);
> if (roots && ref->count && ref->root_id && ref->parent == 0) {
> if (root_objectid && ref->root_id != root_objectid) {
> @@ -1101,23 +1236,15 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
> }
> eie = NULL;
> }
> - list_del(&ref->list);
> - kmem_cache_free(btrfs_prelim_ref_cache, ref);
> }
>
> out:
> btrfs_free_path(path);
> - while (!list_empty(&prefs)) {
> - ref = list_first_entry(&prefs, struct prelim_ref, list);
> - list_del(&ref->list);
> - kmem_cache_free(btrfs_prelim_ref_cache, ref);
> - }
> - while (!list_empty(&prefs_delayed)) {
> - ref = list_first_entry(&prefs_delayed, struct prelim_ref,
> - list);
> - list_del(&ref->list);
> - kmem_cache_free(btrfs_prelim_ref_cache, ref);
> - }
> +
> + prelim_release(&preftrees.direct);
> + prelim_release(&preftrees.indirect);
> + prelim_release(&preftrees.indirect_missing_keys);
> +
> if (ret < 0)
> free_inode_elem_list(eie);
> return ret;
> --
> 2.10.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-07-28 19:27 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-12 22:20 [PATCH v3 00/13] use rbtrees for preliminary backrefs Edmund Nadolski
2017-07-12 22:20 ` [PATCH v3 08/13] btrfs: convert prelimary reference tracking to use rbtrees Edmund Nadolski
2017-07-28 18:25 ` Liu Bo [this message]
2017-07-12 22:20 ` [PATCH v3 09/13] btrfs: add a node counter to each of the rbtrees Edmund Nadolski
2017-07-13 0:51 ` David Sterba
2017-07-12 22:20 ` [PATCH v3 10/13] btrfs: backref, add tracepoints for prelim_ref insertion and merging Edmund Nadolski
2017-07-13 0:18 ` David Sterba
2017-07-12 22:20 ` [PATCH v3 11/13] btrfs: add cond_resched() calls when resolving backrefs Edmund Nadolski
2017-07-13 0:08 ` David Sterba
2017-07-12 22:20 ` [PATCH v3 12/13] btrfs: allow backref search checks for shared extents Edmund Nadolski
2017-07-28 18:41 ` Liu Bo
2017-07-12 22:20 ` [PATCH v3 13/13] btrfs: clean up extraneous computations in add_delayed_refs Edmund Nadolski
2017-07-13 0:08 ` David Sterba
2017-07-28 14:54 ` [PATCH v3 00/13] use rbtrees for preliminary backrefs David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170728182502.GC15815@localhost.localdomain \
--to=bo.li.liu@oracle.com \
--cc=dsterba@suse.cz \
--cc=enadolski@suse.com \
--cc=jeffm@suse.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=lufq.fnst@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).