public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Alex Elder <aelder@sgi.com>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 04/10] xfs: Sort delayed write buffers before dispatch
Date: Fri, 05 Feb 2010 17:53:34 -0600	[thread overview]
Message-ID: <1265414014.2714.154.camel@doink1> (raw)
In-Reply-To: <1265153104-29680-5-git-send-email-david@fromorbit.com>

On Wed, 2010-02-03 at 10:24 +1100, Dave Chinner wrote:
> Currently when the xfsbufd writes delayed write buffers, it pushes
> them to disk in the order they come off the delayed write list. If
> there are lots of buffers ѕpread widely over the disk, this results
> in overwhelming the elevator sort queues in the block layer and we
> end up losing the posibility of merging adjacent buffers to minimise
> the number of IOs.
> 
> Use the new generic list_sort function to sort the delwri dispatch
> queue before issue to ensure that the buffers are pushed in the most
> friendly order possible to the lower layers.

Looks good.

> Signed-off-by: Dave Chinner <david@fromorbit.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Alex Elder <aelder@sgi.com>

> ---
>  fs/xfs/linux-2.6/xfs_buf.c |   87 ++++++++++++++++++++++++++++++--------------
>  1 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
> index b306265..4556a4c 100644
> --- a/fs/xfs/linux-2.6/xfs_buf.c
> +++ b/fs/xfs/linux-2.6/xfs_buf.c
> @@ -33,6 +33,7 @@
>  #include <linux/migrate.h>
>  #include <linux/backing-dev.h>
>  #include <linux/freezer.h>
> +#include <linux/list_sort.h>
>  
>  #include "xfs_sb.h"
>  #include "xfs_inum.h"
> @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split(
>  
>  }
>  
> +/*
> + * Compare function is more complex than it needs to be because
> + * the return value is only 32 bits and we are doing comparisons
> + * on 64 bit values
> + */
> +static int
> +xfs_buf_cmp(
> +	void		*priv,
> +	struct list_head *a,
> +	struct list_head *b)
> +{
> +	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
> +	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
> +	xfs_daddr_t		diff;
> +
> +	diff = ap->b_bn - bp->b_bn;
> +	if (diff < 0)
> +		return -1;
> +	if (diff > 0)
> +		return 1;
> +	return 0;
> +}
> +
> +void
> +xfs_buf_delwri_sort(
> +	xfs_buftarg_t	*target,
> +	struct list_head *list)
> +{
> +	list_sort(NULL, list, xfs_buf_cmp);
> +}
> +
>  STATIC int
>  xfsbufd(
>  	void		*data)
>  {
> -	struct list_head tmp;
> -	xfs_buftarg_t	*target = (xfs_buftarg_t *)data;
> -	int		count;
> -	xfs_buf_t	*bp;
> +	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
>  
>  	current->flags |= PF_MEMALLOC;
>  
> @@ -1893,6 +1922,8 @@ xfsbufd(
>  	do {
>  		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
>  		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
> +		int	count = 0;
> +		struct list_head tmp;
>  
>  		if (unlikely(freezing(current))) {
>  			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
> @@ -1907,11 +1938,10 @@ xfsbufd(
>  		schedule_timeout_interruptible(tout);
>  
>  		xfs_buf_delwri_split(target, &tmp, age);
> -		count = 0;
> +		list_sort(NULL, &tmp, xfs_buf_cmp);
>  		while (!list_empty(&tmp)) {
> -			bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -			ASSERT(target == bp->b_target);
> -
> +			struct xfs_buf *bp;
> +			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
>  			list_del_init(&bp->b_list);
>  			xfs_buf_iostrategy(bp);
>  			count++;
> @@ -1937,42 +1967,45 @@ xfs_flush_buftarg(
>  	xfs_buftarg_t	*target,
>  	int		wait)
>  {
> -	struct list_head tmp;
> -	xfs_buf_t	*bp, *n;
> +	xfs_buf_t	*bp;
>  	int		pincount = 0;
> +	LIST_HEAD(tmp_list);
> +	LIST_HEAD(wait_list);
>  
>  	xfs_buf_runall_queues(xfsconvertd_workqueue);
>  	xfs_buf_runall_queues(xfsdatad_workqueue);
>  	xfs_buf_runall_queues(xfslogd_workqueue);
>  
>  	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
> -	pincount = xfs_buf_delwri_split(target, &tmp, 0);
> +	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
>  
>  	/*
> -	 * Dropped the delayed write list lock, now walk the temporary list
> +	 * Dropped the delayed write list lock, now walk the temporary list.
> +	 * All I/O is issued async and then if we need to wait for completion
> +	 * we do that after issuing all the IO.
>  	 */
> -	list_for_each_entry_safe(bp, n, &tmp, b_list) {
> +	list_sort(NULL, &tmp_list, xfs_buf_cmp);
> +	while (!list_empty(&tmp_list)) {
> +		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
>  		ASSERT(target == bp->b_target);
> -		if (wait)
> +		list_del_init(&bp->b_list);
> +		if (wait) {
>  			bp->b_flags &= ~XBF_ASYNC;
> -		else
> -			list_del_init(&bp->b_list);
> -
> +			list_add(&bp->b_list, &wait_list);
> +		}
>  		xfs_buf_iostrategy(bp);
>  	}
>  
> -	if (wait)
> +	if (wait) {
> +		/* Expedite and wait for IO to complete. */
>  		blk_run_address_space(target->bt_mapping);
> +		while (!list_empty(&wait_list)) {
> +			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
>  
> -	/*
> -	 * Remaining list items must be flushed before returning
> -	 */
> -	while (!list_empty(&tmp)) {
> -		bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -
> -		list_del_init(&bp->b_list);
> -		xfs_iowait(bp);
> -		xfs_buf_relse(bp);
> +			list_del_init(&bp->b_list);
> +			xfs_iowait(bp);
> +			xfs_buf_relse(bp);
> +		}
>  	}
>  
>  	return pincount;



_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2010-02-05 23:52 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-02 23:24 [PATCH 0/10] Delayed write metadata writeback V4 Dave Chinner
2010-02-02 23:24 ` [PATCH 01/10] xfs: Make inode reclaim states explicit Dave Chinner
2010-02-05 19:06   ` Alex Elder
2010-02-06  0:07     ` Dave Chinner
2010-02-02 23:24 ` [PATCH 02/10] xfs: Use delayed write for inodes rather than async V2 Dave Chinner
2010-02-03 11:17   ` Christoph Hellwig
2010-02-05 21:38   ` Alex Elder
2010-02-02 23:24 ` [PATCH 03/10] xfs: Don't issue buffer IO direct from AIL push V2 Dave Chinner
2010-02-05 22:51   ` Alex Elder
2010-02-02 23:24 ` [PATCH 04/10] xfs: Sort delayed write buffers before dispatch Dave Chinner
2010-02-05 23:53   ` Alex Elder [this message]
2010-02-02 23:24 ` [PATCH 05/10] xfs: Use delay write promotion for dquot flushing Dave Chinner
2010-02-05 23:55   ` Alex Elder
2010-02-02 23:25 ` [PATCH 06/10] xfs: kill the unused XFS_QMOPT_* flush flags V2 Dave Chinner
2010-02-03 11:17   ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 07/10] xfs: remove invalid barrier optimization from xfs_fsync Dave Chinner
2010-02-02 23:25 ` [PATCH 08/10] xfs: move the inode locking outside xfs_fsync() Dave Chinner
2010-02-03 11:29   ` Christoph Hellwig
2010-02-03 23:08     ` Dave Chinner
2010-02-04 16:07       ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 09/10] xfs: xfs_fs_write_inode() can fail to write inodes synchronously V2 Dave Chinner
2010-02-03 11:27   ` Christoph Hellwig
2010-02-03 18:07     ` bpm
2010-02-03 20:55       ` Christoph Hellwig
2010-02-03 20:56     ` Christoph Hellwig
2010-02-03 23:02       ` Dave Chinner
2010-02-04 17:36         ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 10/10] xfs: kill xfs_bawrite Dave Chinner
2010-02-03 11:19   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1265414014.2714.154.camel@doink1 \
    --to=aelder@sgi.com \
    --cc=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox