linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH]btrfs: speed up extent_io tree search
@ 2010-04-20  9:21 Shaohua Li
  2010-04-20 14:39 ` Josef Bacik
  0 siblings, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2010-04-20  9:21 UTC (permalink / raw)
  To: linux-btrfs; +Cc: chris.mason

searching extent_io_tree is frequently used and tooks a lot of cpu time.
We could cache last found extent_state to skip some full search. In my
test, the hit rate is from 30% to 70% depending on different workload,
which can speed up the search.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d2d0368..645f00c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 	spin_lock_init(&tree->lock);
 	spin_lock_init(&tree->buffer_lock);
 	tree->mapping = mapping;
+	tree->cached_state = NULL;
 }
 
 static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
 	return state;
 }
 
+static void remove_cached_extent(struct extent_io_tree *tree,
+	struct extent_state *state)
+{
+	if (!tree->cached_state)
+		return;
+	if (tree->cached_state == state)
+		tree->cached_state = NULL;
+}
+
+static void merge_cached_extent(struct extent_io_tree *tree,
+	struct extent_state *first, struct extent_state *last)
+{
+	if (tree->cached_state == first || tree->cached_state == last)
+		tree->cached_state = first;
+}
+
 static void free_extent_state(struct extent_state *state)
 {
 	if (!state)
@@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
 	struct rb_node *orig_prev = NULL;
 	struct tree_entry *entry;
 	struct tree_entry *prev_entry = NULL;
+	struct tree_entry *cached_entry =
+				(struct tree_entry *)tree->cached_state;
+
+	if (likely(cached_entry && offset >= cached_entry->start &&
+		offset <= cached_entry->end))
+		return &cached_entry->rb_node;
 
 	while (n) {
 		entry = rb_entry(n, struct tree_entry, rb_node);
@@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
 			n = n->rb_left;
 		else if (offset > entry->end)
 			n = n->rb_right;
-		else
+		else {
+			tree->cached_state = (struct extent_state *)entry;
 			return n;
+		}
 	}
 
 	if (prev_ret) {
@@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
 			merge_cb(tree, state, other);
 			state->start = other->start;
 			other->tree = NULL;
+			merge_cached_extent(tree, state, other);
 			rb_erase(&other->rb_node, &tree->state);
 			free_extent_state(other);
 		}
@@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
 			merge_cb(tree, state, other);
 			other->start = state->start;
 			state->tree = NULL;
+			merge_cached_extent(tree, other, state);
 			rb_erase(&state->rb_node, &tree->state);
 			free_extent_state(state);
 			state = NULL;
@@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
 		wake_up(&state->wq);
 	if (delete || state->state == 0) {
 		if (state->tree) {
+			remove_cached_extent(tree, state);
 			clear_state_cb(tree, state, state->state);
 			rb_erase(&state->rb_node, &tree->state);
 			state->tree = NULL;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bbab481..e60b367 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -89,6 +89,7 @@ struct extent_io_tree {
 	spinlock_t lock;
 	spinlock_t buffer_lock;
 	struct extent_io_ops *ops;
+	struct extent_state *cached_state;
 };
 
 struct extent_state {

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH]btrfs: speed up extent_io tree search
  2010-04-20  9:21 [PATCH]btrfs: speed up extent_io tree search Shaohua Li
@ 2010-04-20 14:39 ` Josef Bacik
  2010-04-21  1:48   ` Shaohua Li
  0 siblings, 1 reply; 6+ messages in thread
From: Josef Bacik @ 2010-04-20 14:39 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-btrfs, chris.mason

On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> searching extent_io_tree is frequently used and tooks a lot of cpu time.
> We could cache last found extent_state to skip some full search. In my
> test, the hit rate is from 30% to 70% depending on different workload,
> which can speed up the search.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index d2d0368..645f00c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
>  	spin_lock_init(&tree->lock);
>  	spin_lock_init(&tree->buffer_lock);
>  	tree->mapping = mapping;
> +	tree->cached_state = NULL;
>  }
>  
>  static struct extent_state *alloc_extent_state(gfp_t mask)
> @@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
>  	return state;
>  }
>  
> +static void remove_cached_extent(struct extent_io_tree *tree,
> +	struct extent_state *state)
> +{
> +	if (!tree->cached_state)
> +		return;
> +	if (tree->cached_state == state)
> +		tree->cached_state = NULL;
> +}
> +
> +static void merge_cached_extent(struct extent_io_tree *tree,
> +	struct extent_state *first, struct extent_state *last)
> +{
> +	if (tree->cached_state == first || tree->cached_state == last)
> +		tree->cached_state = first;
> +}
> +
>  static void free_extent_state(struct extent_state *state)
>  {
>  	if (!state)
> @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
>  	struct rb_node *orig_prev = NULL;
>  	struct tree_entry *entry;
>  	struct tree_entry *prev_entry = NULL;
> +	struct tree_entry *cached_entry =
> +				(struct tree_entry *)tree->cached_state;
> +
> +	if (likely(cached_entry && offset >= cached_entry->start &&
> +		offset <= cached_entry->end))
> +		return &cached_entry->rb_node;
>  
>  	while (n) {
>  		entry = rb_entry(n, struct tree_entry, rb_node);
> @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
>  			n = n->rb_left;
>  		else if (offset > entry->end)
>  			n = n->rb_right;
> -		else
> +		else {
> +			tree->cached_state = (struct extent_state *)entry;
>  			return n;
> +		}
>  	}
>  
>  	if (prev_ret) {
> @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
>  			merge_cb(tree, state, other);
>  			state->start = other->start;
>  			other->tree = NULL;
> +			merge_cached_extent(tree, state, other);
>  			rb_erase(&other->rb_node, &tree->state);
>  			free_extent_state(other);
>  		}
> @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
>  			merge_cb(tree, state, other);
>  			other->start = state->start;
>  			state->tree = NULL;
> +			merge_cached_extent(tree, other, state);
>  			rb_erase(&state->rb_node, &tree->state);
>  			free_extent_state(state);
>  			state = NULL;
> @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
>  		wake_up(&state->wq);
>  	if (delete || state->state == 0) {
>  		if (state->tree) {
> +			remove_cached_extent(tree, state);
>  			clear_state_cb(tree, state, state->state);
>  			rb_erase(&state->rb_node, &tree->state);
>  			state->tree = NULL;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index bbab481..e60b367 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -89,6 +89,7 @@ struct extent_io_tree {
>  	spinlock_t lock;
>  	spinlock_t buffer_lock;
>  	struct extent_io_ops *ops;
> +	struct extent_state *cached_state;
>  };
>  
>  struct extent_state {

Sorry I saw this earlier but then forgot about it.  So instead of doing a
per-tree thing, which will end up with misses if somebody else tries to search
the tree for a different offset, you will want to do something like this

http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f

So that way _anybody_ who does a search will have a cached state, and so all
subsequent searches won't be needed, instead of only working for the first guy
who gets their state cached.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]btrfs: speed up extent_io tree search
  2010-04-20 14:39 ` Josef Bacik
@ 2010-04-21  1:48   ` Shaohua Li
  2010-04-21  2:11     ` Josef Bacik
  0 siblings, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2010-04-21  1:48 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs@vger.kernel.org, chris.mason@oracle.com

On Tue, Apr 20, 2010 at 10:39:01PM +0800, Josef Bacik wrote:
> On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> > searching extent_io_tree is frequently used and tooks a lot of cpu time.
> > We could cache last found extent_state to skip some full search. In my
> > test, the hit rate is from 30% to 70% depending on different workload,
> > which can speed up the search.
> > 
> > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > 
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index d2d0368..645f00c 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
> >  	spin_lock_init(&tree->lock);
> >  	spin_lock_init(&tree->buffer_lock);
> >  	tree->mapping = mapping;
> > +	tree->cached_state = NULL;
> >  }
> >  
> >  static struct extent_state *alloc_extent_state(gfp_t mask)
> > @@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
> >  	return state;
> >  }
> >  
> > +static void remove_cached_extent(struct extent_io_tree *tree,
> > +	struct extent_state *state)
> > +{
> > +	if (!tree->cached_state)
> > +		return;
> > +	if (tree->cached_state == state)
> > +		tree->cached_state = NULL;
> > +}
> > +
> > +static void merge_cached_extent(struct extent_io_tree *tree,
> > +	struct extent_state *first, struct extent_state *last)
> > +{
> > +	if (tree->cached_state == first || tree->cached_state == last)
> > +		tree->cached_state = first;
> > +}
> > +
> >  static void free_extent_state(struct extent_state *state)
> >  {
> >  	if (!state)
> > @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> >  	struct rb_node *orig_prev = NULL;
> >  	struct tree_entry *entry;
> >  	struct tree_entry *prev_entry = NULL;
> > +	struct tree_entry *cached_entry =
> > +				(struct tree_entry *)tree->cached_state;
> > +
> > +	if (likely(cached_entry && offset >= cached_entry->start &&
> > +		offset <= cached_entry->end))
> > +		return &cached_entry->rb_node;
> >  
> >  	while (n) {
> >  		entry = rb_entry(n, struct tree_entry, rb_node);
> > @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> >  			n = n->rb_left;
> >  		else if (offset > entry->end)
> >  			n = n->rb_right;
> > -		else
> > +		else {
> > +			tree->cached_state = (struct extent_state *)entry;
> >  			return n;
> > +		}
> >  	}
> >  
> >  	if (prev_ret) {
> > @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
> >  			merge_cb(tree, state, other);
> >  			state->start = other->start;
> >  			other->tree = NULL;
> > +			merge_cached_extent(tree, state, other);
> >  			rb_erase(&other->rb_node, &tree->state);
> >  			free_extent_state(other);
> >  		}
> > @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
> >  			merge_cb(tree, state, other);
> >  			other->start = state->start;
> >  			state->tree = NULL;
> > +			merge_cached_extent(tree, other, state);
> >  			rb_erase(&state->rb_node, &tree->state);
> >  			free_extent_state(state);
> >  			state = NULL;
> > @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
> >  		wake_up(&state->wq);
> >  	if (delete || state->state == 0) {
> >  		if (state->tree) {
> > +			remove_cached_extent(tree, state);
> >  			clear_state_cb(tree, state, state->state);
> >  			rb_erase(&state->rb_node, &tree->state);
> >  			state->tree = NULL;
> > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > index bbab481..e60b367 100644
> > --- a/fs/btrfs/extent_io.h
> > +++ b/fs/btrfs/extent_io.h
> > @@ -89,6 +89,7 @@ struct extent_io_tree {
> >  	spinlock_t lock;
> >  	spinlock_t buffer_lock;
> >  	struct extent_io_ops *ops;
> > +	struct extent_state *cached_state;
> >  };
> >  
> >  struct extent_state {
> 
> Sorry I saw this earlier but then forgot about it.  So instead of doing a
> per-tree thing, which will end up with misses if somebody else tries to search
> the tree for a different offset, you will want to do something like this
> 
> http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f
> 
> So that way _anybody_ who does a search will have a cached state, and so all
> subsequent searches won't be needed, instead of only working for the first guy
> who gets their state cached.  Thanks,
Hmm, the patch you pointed out is already in upstream but I still saw the search
takes a lot of CPU.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]btrfs: speed up extent_io tree search
  2010-04-21  1:48   ` Shaohua Li
@ 2010-04-21  2:11     ` Josef Bacik
  2010-04-21  3:10       ` Shaohua Li
  0 siblings, 1 reply; 6+ messages in thread
From: Josef Bacik @ 2010-04-21  2:11 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Josef Bacik, linux-btrfs@vger.kernel.org, chris.mason@oracle.com

On Wed, Apr 21, 2010 at 09:48:17AM +0800, Shaohua Li wrote:
> On Tue, Apr 20, 2010 at 10:39:01PM +0800, Josef Bacik wrote:
> > On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> > > searching extent_io_tree is frequently used and tooks a lot of cpu time.
> > > We could cache last found extent_state to skip some full search. In my
> > > test, the hit rate is from 30% to 70% depending on different workload,
> > > which can speed up the search.
> > > 
> > > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > > 
> > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > > index d2d0368..645f00c 100644
> > > --- a/fs/btrfs/extent_io.c
> > > +++ b/fs/btrfs/extent_io.c
> > > @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
> > >  	spin_lock_init(&tree->lock);
> > >  	spin_lock_init(&tree->buffer_lock);
> > >  	tree->mapping = mapping;
> > > +	tree->cached_state = NULL;
> > >  }
> > >  
> > >  static struct extent_state *alloc_extent_state(gfp_t mask)
> > > @@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
> > >  	return state;
> > >  }
> > >  
> > > +static void remove_cached_extent(struct extent_io_tree *tree,
> > > +	struct extent_state *state)
> > > +{
> > > +	if (!tree->cached_state)
> > > +		return;
> > > +	if (tree->cached_state == state)
> > > +		tree->cached_state = NULL;
> > > +}
> > > +
> > > +static void merge_cached_extent(struct extent_io_tree *tree,
> > > +	struct extent_state *first, struct extent_state *last)
> > > +{
> > > +	if (tree->cached_state == first || tree->cached_state == last)
> > > +		tree->cached_state = first;
> > > +}
> > > +
> > >  static void free_extent_state(struct extent_state *state)
> > >  {
> > >  	if (!state)
> > > @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > >  	struct rb_node *orig_prev = NULL;
> > >  	struct tree_entry *entry;
> > >  	struct tree_entry *prev_entry = NULL;
> > > +	struct tree_entry *cached_entry =
> > > +				(struct tree_entry *)tree->cached_state;
> > > +
> > > +	if (likely(cached_entry && offset >= cached_entry->start &&
> > > +		offset <= cached_entry->end))
> > > +		return &cached_entry->rb_node;
> > >  
> > >  	while (n) {
> > >  		entry = rb_entry(n, struct tree_entry, rb_node);
> > > @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > >  			n = n->rb_left;
> > >  		else if (offset > entry->end)
> > >  			n = n->rb_right;
> > > -		else
> > > +		else {
> > > +			tree->cached_state = (struct extent_state *)entry;
> > >  			return n;
> > > +		}
> > >  	}
> > >  
> > >  	if (prev_ret) {
> > > @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
> > >  			merge_cb(tree, state, other);
> > >  			state->start = other->start;
> > >  			other->tree = NULL;
> > > +			merge_cached_extent(tree, state, other);
> > >  			rb_erase(&other->rb_node, &tree->state);
> > >  			free_extent_state(other);
> > >  		}
> > > @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
> > >  			merge_cb(tree, state, other);
> > >  			other->start = state->start;
> > >  			state->tree = NULL;
> > > +			merge_cached_extent(tree, other, state);
> > >  			rb_erase(&state->rb_node, &tree->state);
> > >  			free_extent_state(state);
> > >  			state = NULL;
> > > @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
> > >  		wake_up(&state->wq);
> > >  	if (delete || state->state == 0) {
> > >  		if (state->tree) {
> > > +			remove_cached_extent(tree, state);
> > >  			clear_state_cb(tree, state, state->state);
> > >  			rb_erase(&state->rb_node, &tree->state);
> > >  			state->tree = NULL;
> > > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > > index bbab481..e60b367 100644
> > > --- a/fs/btrfs/extent_io.h
> > > +++ b/fs/btrfs/extent_io.h
> > > @@ -89,6 +89,7 @@ struct extent_io_tree {
> > >  	spinlock_t lock;
> > >  	spinlock_t buffer_lock;
> > >  	struct extent_io_ops *ops;
> > > +	struct extent_state *cached_state;
> > >  };
> > >  
> > >  struct extent_state {
> > 
> > Sorry I saw this earlier but then forgot about it.  So instead of doing a
> > per-tree thing, which will end up with misses if somebody else tries to search
> > the tree for a different offset, you will want to do something like this
> > 
> > http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f
> > 
> > So that way _anybody_ who does a search will have a cached state, and so all
> > subsequent searches won't be needed, instead of only working for the first guy
> > who gets their state cached.  Thanks,
> Hmm, the patch you pointed out is already in upstream but I still saw the search
> takes a lot of CPU.
> 

I've probably missed some places where we could be using cached extent states, I
wasn't terribly thorough when I was checking.  It may be good to instrument the
cases where we come into test/clear/set bits and we not end up using the cached
state to see where the trouble spots are.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]btrfs: speed up extent_io tree search
  2010-04-21  2:11     ` Josef Bacik
@ 2010-04-21  3:10       ` Shaohua Li
  2010-04-21 13:42         ` Josef Bacik
  0 siblings, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2010-04-21  3:10 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs@vger.kernel.org, chris.mason@oracle.com

On Wed, Apr 21, 2010 at 10:11:01AM +0800, Josef Bacik wrote:
> On Wed, Apr 21, 2010 at 09:48:17AM +0800, Shaohua Li wrote:
> > On Tue, Apr 20, 2010 at 10:39:01PM +0800, Josef Bacik wrote:
> > > On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> > > > searching extent_io_tree is frequently used and tooks a lot of cpu time.
> > > > We could cache last found extent_state to skip some full search. In my
> > > > test, the hit rate is from 30% to 70% depending on different workload,
> > > > which can speed up the search.
> > > > 
> > > > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > > > 
> > > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > > > index d2d0368..645f00c 100644
> > > > --- a/fs/btrfs/extent_io.c
> > > > +++ b/fs/btrfs/extent_io.c
> > > > @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
> > > >  	spin_lock_init(&tree->lock);
> > > >  	spin_lock_init(&tree->buffer_lock);
> > > >  	tree->mapping = mapping;
> > > > +	tree->cached_state = NULL;
> > > >  }
> > > >  
> > > >  static struct extent_state *alloc_extent_state(gfp_t mask)
> > > > @@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
> > > >  	return state;
> > > >  }
> > > >  
> > > > +static void remove_cached_extent(struct extent_io_tree *tree,
> > > > +	struct extent_state *state)
> > > > +{
> > > > +	if (!tree->cached_state)
> > > > +		return;
> > > > +	if (tree->cached_state == state)
> > > > +		tree->cached_state = NULL;
> > > > +}
> > > > +
> > > > +static void merge_cached_extent(struct extent_io_tree *tree,
> > > > +	struct extent_state *first, struct extent_state *last)
> > > > +{
> > > > +	if (tree->cached_state == first || tree->cached_state == last)
> > > > +		tree->cached_state = first;
> > > > +}
> > > > +
> > > >  static void free_extent_state(struct extent_state *state)
> > > >  {
> > > >  	if (!state)
> > > > @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > > >  	struct rb_node *orig_prev = NULL;
> > > >  	struct tree_entry *entry;
> > > >  	struct tree_entry *prev_entry = NULL;
> > > > +	struct tree_entry *cached_entry =
> > > > +				(struct tree_entry *)tree->cached_state;
> > > > +
> > > > +	if (likely(cached_entry && offset >= cached_entry->start &&
> > > > +		offset <= cached_entry->end))
> > > > +		return &cached_entry->rb_node;
> > > >  
> > > >  	while (n) {
> > > >  		entry = rb_entry(n, struct tree_entry, rb_node);
> > > > @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > > >  			n = n->rb_left;
> > > >  		else if (offset > entry->end)
> > > >  			n = n->rb_right;
> > > > -		else
> > > > +		else {
> > > > +			tree->cached_state = (struct extent_state *)entry;
> > > >  			return n;
> > > > +		}
> > > >  	}
> > > >  
> > > >  	if (prev_ret) {
> > > > @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
> > > >  			merge_cb(tree, state, other);
> > > >  			state->start = other->start;
> > > >  			other->tree = NULL;
> > > > +			merge_cached_extent(tree, state, other);
> > > >  			rb_erase(&other->rb_node, &tree->state);
> > > >  			free_extent_state(other);
> > > >  		}
> > > > @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
> > > >  			merge_cb(tree, state, other);
> > > >  			other->start = state->start;
> > > >  			state->tree = NULL;
> > > > +			merge_cached_extent(tree, other, state);
> > > >  			rb_erase(&state->rb_node, &tree->state);
> > > >  			free_extent_state(state);
> > > >  			state = NULL;
> > > > @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
> > > >  		wake_up(&state->wq);
> > > >  	if (delete || state->state == 0) {
> > > >  		if (state->tree) {
> > > > +			remove_cached_extent(tree, state);
> > > >  			clear_state_cb(tree, state, state->state);
> > > >  			rb_erase(&state->rb_node, &tree->state);
> > > >  			state->tree = NULL;
> > > > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > > > index bbab481..e60b367 100644
> > > > --- a/fs/btrfs/extent_io.h
> > > > +++ b/fs/btrfs/extent_io.h
> > > > @@ -89,6 +89,7 @@ struct extent_io_tree {
> > > >  	spinlock_t lock;
> > > >  	spinlock_t buffer_lock;
> > > >  	struct extent_io_ops *ops;
> > > > +	struct extent_state *cached_state;
> > > >  };
> > > >  
> > > >  struct extent_state {
> > > 
> > > Sorry I saw this earlier but then forgot about it.  So instead of doing a
> > > per-tree thing, which will end up with misses if somebody else tries to search
> > > the tree for a different offset, you will want to do something like this
> > > 
> > > http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f
> > > 
> > > So that way _anybody_ who does a search will have a cached state, and so all
> > > subsequent searches won't be needed, instead of only working for the first guy
> > > who gets their state cached.  Thanks,
> > Hmm, the patch you pointed out is already in upstream but I still saw the search
> > takes a lot of CPU.
> > 
> 
> I've probably missed some places where we could be using cached extent states, I
> wasn't terribly thorough when I was checking.  It may be good to instrument the
> cases where we come into test/clear/set bits and we not end up using the cached
> state to see where the trouble spots are.  Thanks,
My test basically is in the code path of .readpage/.readpages and
.writepage/.writepages. In my first glance of the code, such places look not easily
to covert to use your scheme of cached extent states. But I need more check anyway.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]btrfs: speed up extent_io tree search
  2010-04-21  3:10       ` Shaohua Li
@ 2010-04-21 13:42         ` Josef Bacik
  0 siblings, 0 replies; 6+ messages in thread
From: Josef Bacik @ 2010-04-21 13:42 UTC (permalink / raw)
  To: Shaohua Li
  Cc: Josef Bacik, linux-btrfs@vger.kernel.org, chris.mason@oracle.com

On Wed, Apr 21, 2010 at 11:10:46AM +0800, Shaohua Li wrote:
> On Wed, Apr 21, 2010 at 10:11:01AM +0800, Josef Bacik wrote:
> > On Wed, Apr 21, 2010 at 09:48:17AM +0800, Shaohua Li wrote:
> > > On Tue, Apr 20, 2010 at 10:39:01PM +0800, Josef Bacik wrote:
> > > > On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> > > > > searching extent_io_tree is frequently used and tooks a lot of cpu time.
> > > > > We could cache last found extent_state to skip some full search. In my
> > > > > test, the hit rate is from 30% to 70% depending on different workload,
> > > > > which can speed up the search.
> > > > > 
> > > > > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > > > > 
> > > > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > > > > index d2d0368..645f00c 100644
> > > > > --- a/fs/btrfs/extent_io.c
> > > > > +++ b/fs/btrfs/extent_io.c
> > > > > @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
> > > > >  	spin_lock_init(&tree->lock);
> > > > >  	spin_lock_init(&tree->buffer_lock);
> > > > >  	tree->mapping = mapping;
> > > > > +	tree->cached_state = NULL;
> > > > >  }
> > > > >  
> > > > >  static struct extent_state *alloc_extent_state(gfp_t mask)
> > > > > @@ -135,6 +136,22 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
> > > > >  	return state;
> > > > >  }
> > > > >  
> > > > > +static void remove_cached_extent(struct extent_io_tree *tree,
> > > > > +	struct extent_state *state)
> > > > > +{
> > > > > +	if (!tree->cached_state)
> > > > > +		return;
> > > > > +	if (tree->cached_state == state)
> > > > > +		tree->cached_state = NULL;
> > > > > +}
> > > > > +
> > > > > +static void merge_cached_extent(struct extent_io_tree *tree,
> > > > > +	struct extent_state *first, struct extent_state *last)
> > > > > +{
> > > > > +	if (tree->cached_state == first || tree->cached_state == last)
> > > > > +		tree->cached_state = first;
> > > > > +}
> > > > > +
> > > > >  static void free_extent_state(struct extent_state *state)
> > > > >  {
> > > > >  	if (!state)
> > > > > @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > > > >  	struct rb_node *orig_prev = NULL;
> > > > >  	struct tree_entry *entry;
> > > > >  	struct tree_entry *prev_entry = NULL;
> > > > > +	struct tree_entry *cached_entry =
> > > > > +				(struct tree_entry *)tree->cached_state;
> > > > > +
> > > > > +	if (likely(cached_entry && offset >= cached_entry->start &&
> > > > > +		offset <= cached_entry->end))
> > > > > +		return &cached_entry->rb_node;
> > > > >  
> > > > >  	while (n) {
> > > > >  		entry = rb_entry(n, struct tree_entry, rb_node);
> > > > > @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
> > > > >  			n = n->rb_left;
> > > > >  		else if (offset > entry->end)
> > > > >  			n = n->rb_right;
> > > > > -		else
> > > > > +		else {
> > > > > +			tree->cached_state = (struct extent_state *)entry;
> > > > >  			return n;
> > > > > +		}
> > > > >  	}
> > > > >  
> > > > >  	if (prev_ret) {
> > > > > @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree *tree,
> > > > >  			merge_cb(tree, state, other);
> > > > >  			state->start = other->start;
> > > > >  			other->tree = NULL;
> > > > > +			merge_cached_extent(tree, state, other);
> > > > >  			rb_erase(&other->rb_node, &tree->state);
> > > > >  			free_extent_state(other);
> > > > >  		}
> > > > > @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree *tree,
> > > > >  			merge_cb(tree, state, other);
> > > > >  			other->start = state->start;
> > > > >  			state->tree = NULL;
> > > > > +			merge_cached_extent(tree, other, state);
> > > > >  			rb_erase(&state->rb_node, &tree->state);
> > > > >  			free_extent_state(state);
> > > > >  			state = NULL;
> > > > > @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
> > > > >  		wake_up(&state->wq);
> > > > >  	if (delete || state->state == 0) {
> > > > >  		if (state->tree) {
> > > > > +			remove_cached_extent(tree, state);
> > > > >  			clear_state_cb(tree, state, state->state);
> > > > >  			rb_erase(&state->rb_node, &tree->state);
> > > > >  			state->tree = NULL;
> > > > > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > > > > index bbab481..e60b367 100644
> > > > > --- a/fs/btrfs/extent_io.h
> > > > > +++ b/fs/btrfs/extent_io.h
> > > > > @@ -89,6 +89,7 @@ struct extent_io_tree {
> > > > >  	spinlock_t lock;
> > > > >  	spinlock_t buffer_lock;
> > > > >  	struct extent_io_ops *ops;
> > > > > +	struct extent_state *cached_state;
> > > > >  };
> > > > >  
> > > > >  struct extent_state {
> > > > 
> > > > Sorry I saw this earlier but then forgot about it.  So instead of doing a
> > > > per-tree thing, which will end up with misses if somebody else tries to search
> > > > the tree for a different offset, you will want to do something like this
> > > > 
> > > > http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f
> > > > 
> > > > So that way _anybody_ who does a search will have a cached state, and so all
> > > > subsequent searches won't be needed, instead of only working for the first guy
> > > > who gets their state cached.  Thanks,
> > > Hmm, the patch you pointed out is already in upstream but I still saw the search
> > > takes a lot of CPU.
> > > 
> > 
> > I've probably missed some places where we could be using cached extent states, I
> > wasn't terribly thorough when I was checking.  It may be good to instrument the
> > cases where we come into test/clear/set bits and we not end up using the cached
> > state to see where the trouble spots are.  Thanks,
> My test basically is in the code path of .readpage/.readpages and
> .writepage/.writepages. In my first glance of the code, such places look not easily
> to covert to use your scheme of cached extent states. But I need more check anyway.
> 

Hmm I see your point.  I guess this doesn't mess with anybody that already uses
the cached state stuff, and if it helps out in these more complicated cases then
I guess thats good enough.

Acked-by: Josef Bacik <josef@redhat.com>

Thanks,

Josef

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-04-21 13:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-20  9:21 [PATCH]btrfs: speed up extent_io tree search Shaohua Li
2010-04-20 14:39 ` Josef Bacik
2010-04-21  1:48   ` Shaohua Li
2010-04-21  2:11     ` Josef Bacik
2010-04-21  3:10       ` Shaohua Li
2010-04-21 13:42         ` Josef Bacik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).