All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy@panasas.com>
To: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org>
Cc: linux-nfs@vger.kernel.org, iisaman@netapp.com
Subject: Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order
Date: Wed, 12 May 2010 09:46:43 +0300	[thread overview]
Message-ID: <4BEA4ED3.3010702@panasas.com> (raw)
In-Reply-To: <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org>

On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote:
> Optimize for sequencial write. Layout infos and tags are organized by
> file offset. When appending data to a file whole list will be examined,
> which introduce notable performance decrease.

Looks good to me.

Fred, can you please double check?

Benny

P.S.: Zhang, please note Fred's new email address

> 
> Signed-off-by: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org>
> ---
>  fs/nfs/blocklayout/extents.c |  126 +++++++++++++++++++++---------------------
>  1 files changed, 64 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index 3c311f2..514f2cc 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -66,8 +66,8 @@ static int32_t _find_entry(struct my_tree_t *tree, u64 s)
>  	struct pnfs_inval_tracking *pos;
>  
>  	dprintk("%s(%llu) enter\n", __func__, s);
> -	list_for_each_entry(pos, &tree->mtt_stub, it_link) {
> -		if (pos->it_sector < s)
> +	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> +		if (pos->it_sector > s)
>  			continue;
>  		else if (pos->it_sector == s)
>  			return pos->it_tags & INTERNAL_MASK;
> @@ -102,8 +102,8 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
>  	struct pnfs_inval_tracking *pos;
>  
>  	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
> -	list_for_each_entry(pos, &tree->mtt_stub, it_link) {
> -		if (pos->it_sector < s)
> +	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> +		if (pos->it_sector > s)
>  			continue;
>  		else if (pos->it_sector == s) {
>  			found = 1;
> @@ -125,7 +125,7 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
>  		}
>  		new->it_sector = s;
>  		new->it_tags = (1 << tag);
> -		list_add_tail(&new->it_link, &pos->it_link);
> +		list_add(&new->it_link, &pos->it_link);
>  		return 1;
>  	}
>  }
> @@ -230,14 +230,14 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
>  	u64 expect = 0;
>  
>  	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
> -	list_for_each_entry(pos, &tree->mtt_stub, it_link) {
> -		if (pos->it_sector < start)
> +	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
> +		if (pos->it_sector >= end)
>  			continue;
>  		if (!expect) {
> -			if ((pos->it_sector == start) &&
> +			if ((pos->it_sector == end - tree->mtt_step_size) &&
>  			    (pos->it_tags & (1 << tag))) {
> -				expect = start + tree->mtt_step_size;
> -				if (expect == end)
> +				expect = pos->it_sector - tree->mtt_step_size;
> +				if (expect < start)
>  					return 1;
>  				continue;
>  			} else {
> @@ -246,8 +246,8 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
>  		}
>  		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
>  			return 0;
> -		expect += tree->mtt_step_size;
> -		if (expect == end)
> +		expect -= tree->mtt_step_size;
> +		if (expect < start)
>  			return 1;
>  	}
>  	return 0;
> @@ -594,65 +594,67 @@ add_and_merge_extent(struct pnfs_block_layout *bl,
>  	/* Scan for proper place to insert, extending new to the left
>  	 * as much as possible.
>  	 */
> -	list_for_each_entry_safe(be, tmp, list, be_node) {
> -		if (new->be_f_offset < be->be_f_offset)
> +	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
> +		if (new->be_f_offset >= be->be_f_offset + be->be_length)
>  			break;
> -		if (end <= be->be_f_offset + be->be_length) {
> -			/* new is a subset of existing be*/
> +		if (new->be_f_offset >= be->be_f_offset) {
> +			if (end <= be->be_f_offset + be->be_length) {
> +				/* new is a subset of existing be*/
> +				if (extents_consistent(be, new)) {
> +					dprintk("%s: new is subset, ignoring\n",
> +						__func__);
> +					put_extent(new);
> +					return 0;
> +				} else {
> +					goto out_err;
> +				}
> +			} else {
> +				/* |<--   be   -->|
> +				 *          |<--   new   -->| */
> +				if (extents_consistent(be, new)) {
> +					/* extend new to fully replace be */
> +					new->be_length += new->be_f_offset -
> +						be->be_f_offset;
> +					new->be_f_offset = be->be_f_offset;
> +					new->be_v_offset = be->be_v_offset;
> +					dprintk("%s: removing %p\n", __func__, be);
> +					list_del(&be->be_node);
> +					put_extent(be);
> +				} else {
> +					goto out_err;
> +				}
> +			}
> +		} else if (end >= be->be_f_offset + be->be_length) {
> +			/* new extent overlap existing be */
>  			if (extents_consistent(be, new)) {
> -				dprintk("%s: new is subset, ignoring\n",
> -					__func__);
> -				put_extent(new);
> -				return 0;
> -			} else
> +				/* extend new to fully replace be */
> +				dprintk("%s: removing %p\n", __func__, be);
> +				list_del(&be->be_node);
> +				put_extent(be);
> +			} else {
>  				goto out_err;
> -		} else if (new->be_f_offset <=
> -				be->be_f_offset + be->be_length) {
> -			/* new overlaps or abuts existing be */
> -			if (extents_consistent(be, new)) {
> +			}
> +		} else if (end > be->be_f_offset) {
> +			/*           |<--   be   -->|
> +			 *|<--   new   -->| */
> +			if (extents_consistent(new, be)) {
>  				/* extend new to fully replace be */
> -				new->be_length += new->be_f_offset -
> -						  be->be_f_offset;
> -				new->be_f_offset = be->be_f_offset;
> -				new->be_v_offset = be->be_v_offset;
> +				new->be_length += be->be_f_offset + be->be_length -
> +					new->be_f_offset - new->be_length;
>  				dprintk("%s: removing %p\n", __func__, be);
>  				list_del(&be->be_node);
>  				put_extent(be);
> -			} else if (new->be_f_offset !=
> -				   be->be_f_offset + be->be_length)
> +			} else {
>  				goto out_err;
> +			}
>  		}
>  	}
>  	/* Note that if we never hit the above break, be will not point to a
>  	 * valid extent.  However, in that case &be->be_node==list.
>  	 */
> -	list_add_tail(&new->be_node, &be->be_node);
> +	list_add(&new->be_node, &be->be_node);
>  	dprintk("%s: inserting new\n", __func__);
>  	print_elist(list);
> -	/* Scan forward for overlaps.  If we find any, extend new and
> -	 * remove the overlapped extent.
> -	 */
> -	be = list_prepare_entry(new, list, be_node);
> -	list_for_each_entry_safe_continue(be, tmp, list, be_node) {
> -		if (end < be->be_f_offset)
> -			break;
> -		/* new overlaps or abuts existing be */
> -		if (extents_consistent(be, new)) {
> -			if (end < be->be_f_offset + be->be_length) {
> -				/* extend new to fully cover be */
> -				end = be->be_f_offset + be->be_length;
> -				new->be_length = end - new->be_f_offset;
> -			}
> -			dprintk("%s: removing %p\n", __func__, be);
> -			list_del(&be->be_node);
> -			put_extent(be);
> -		} else if (end != be->be_f_offset) {
> -			list_del(&new->be_node);
> -			goto out_err;
> -		}
> -	}
> -	dprintk("%s: after merging\n", __func__);
> -	print_elist(list);
>  	/* STUB - The per-list consistency checks have all been done,
>  	 * should now check cross-list consistency.
>  	 */
> @@ -685,10 +687,10 @@ find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
>  		if (ret &&
>  		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
>  			break;
> -		list_for_each_entry(be, &bl->bl_extents[i], be_node) {
> -			if (isect < be->be_f_offset)
> +		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
> +			if (isect >= be->be_f_offset + be->be_length)
>  				break;
> -			if (isect < be->be_f_offset + be->be_length) {
> +			if (isect >= be->be_f_offset) {
>  				/* We have found an extent */
>  				dprintk("%s Get %p (%i)\n", __func__, be,
>  					atomic_read(&be->be_refcnt.refcount));
> @@ -721,10 +723,10 @@ find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
>  	for (i = 0; i < EXTENT_LISTS; i++) {
>  		if (ret)
>  			break;
> -		list_for_each_entry(be, &bl->bl_extents[i], be_node) {
> -			if (isect < be->be_f_offset)
> +		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
> +			if (isect >= be->be_f_offset + be->be_length)
>  				break;
> -			if (isect < be->be_f_offset + be->be_length) {
> +			if (isect >= be->be_f_offset) {
>  				/* We have found an extent */
>  				dprintk("%s Get %p (%i)\n", __func__, be,
>  					atomic_read(&be->be_refcnt.refcount));


  parent reply	other threads:[~2010-05-12  6:46 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-10  3:36 [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order Zhang Jingwang
     [not found] ` <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org>
2010-05-12  6:46   ` Benny Halevy [this message]
2010-05-12 20:28     ` J. Bruce Fields
2010-05-17 13:53       ` J. Bruce Fields
2010-05-17 14:24         ` Boaz Harrosh
2010-05-17 14:53           ` J. Bruce Fields
2010-05-17 16:53             ` J. Bruce Fields
2010-05-17 17:22               ` Zhang Jingwang
     [not found]                 ` <AANLkTilUpAHrtHH8pauvYrAuD3rWgj7aDmrTOzrmU-h5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-18 16:20                   ` J. Bruce Fields
2010-05-19  4:56                     ` Tao Guo
     [not found]                       ` <AANLkTik9L15tqpSboBpb9cSTy3hVPLEK487w94pEbLrS-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-19 16:36                         ` J. Bruce Fields
2010-05-19 21:38                           ` J. Bruce Fields
2010-05-20  5:44                             ` Tao Guo
2010-05-21 23:00                               ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BEA4ED3.3010702@panasas.com \
    --to=bhalevy@panasas.com \
    --cc=iisaman@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.