From: Alex Elder <aelder@sgi.com>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 04/10] xfs: Sort delayed write buffers before dispatch
Date: Fri, 05 Feb 2010 17:53:34 -0600 [thread overview]
Message-ID: <1265414014.2714.154.camel@doink1> (raw)
In-Reply-To: <1265153104-29680-5-git-send-email-david@fromorbit.com>
On Wed, 2010-02-03 at 10:24 +1100, Dave Chinner wrote:
> Currently when the xfsbufd writes delayed write buffers, it pushes
> them to disk in the order they come off the delayed write list. If
> there are lots of buffers ѕpread widely over the disk, this results
> in overwhelming the elevator sort queues in the block layer and we
> end up losing the posibility of merging adjacent buffers to minimise
> the number of IOs.
>
> Use the new generic list_sort function to sort the delwri dispatch
> queue before issue to ensure that the buffers are pushed in the most
> friendly order possible to the lower layers.
Looks good.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
> fs/xfs/linux-2.6/xfs_buf.c | 87 ++++++++++++++++++++++++++++++--------------
> 1 files changed, 60 insertions(+), 27 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
> index b306265..4556a4c 100644
> --- a/fs/xfs/linux-2.6/xfs_buf.c
> +++ b/fs/xfs/linux-2.6/xfs_buf.c
> @@ -33,6 +33,7 @@
> #include <linux/migrate.h>
> #include <linux/backing-dev.h>
> #include <linux/freezer.h>
> +#include <linux/list_sort.h>
>
> #include "xfs_sb.h"
> #include "xfs_inum.h"
> @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split(
>
> }
>
> +/*
> + * Compare function is more complex than it needs to be because
> + * the return value is only 32 bits and we are doing comparisons
> + * on 64 bit values
> + */
> +static int
> +xfs_buf_cmp(
> + void *priv,
> + struct list_head *a,
> + struct list_head *b)
> +{
> + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
> + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
> + xfs_daddr_t diff;
> +
> + diff = ap->b_bn - bp->b_bn;
> + if (diff < 0)
> + return -1;
> + if (diff > 0)
> + return 1;
> + return 0;
> +}
> +
> +void
> +xfs_buf_delwri_sort(
> + xfs_buftarg_t *target,
> + struct list_head *list)
> +{
> + list_sort(NULL, list, xfs_buf_cmp);
> +}
> +
> STATIC int
> xfsbufd(
> void *data)
> {
> - struct list_head tmp;
> - xfs_buftarg_t *target = (xfs_buftarg_t *)data;
> - int count;
> - xfs_buf_t *bp;
> + xfs_buftarg_t *target = (xfs_buftarg_t *)data;
>
> current->flags |= PF_MEMALLOC;
>
> @@ -1893,6 +1922,8 @@ xfsbufd(
> do {
> long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
> long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
> + int count = 0;
> + struct list_head tmp;
>
> if (unlikely(freezing(current))) {
> set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
> @@ -1907,11 +1938,10 @@ xfsbufd(
> schedule_timeout_interruptible(tout);
>
> xfs_buf_delwri_split(target, &tmp, age);
> - count = 0;
> + list_sort(NULL, &tmp, xfs_buf_cmp);
> while (!list_empty(&tmp)) {
> - bp = list_entry(tmp.next, xfs_buf_t, b_list);
> - ASSERT(target == bp->b_target);
> -
> + struct xfs_buf *bp;
> + bp = list_first_entry(&tmp, struct xfs_buf, b_list);
> list_del_init(&bp->b_list);
> xfs_buf_iostrategy(bp);
> count++;
> @@ -1937,42 +1967,45 @@ xfs_flush_buftarg(
> xfs_buftarg_t *target,
> int wait)
> {
> - struct list_head tmp;
> - xfs_buf_t *bp, *n;
> + xfs_buf_t *bp;
> int pincount = 0;
> + LIST_HEAD(tmp_list);
> + LIST_HEAD(wait_list);
>
> xfs_buf_runall_queues(xfsconvertd_workqueue);
> xfs_buf_runall_queues(xfsdatad_workqueue);
> xfs_buf_runall_queues(xfslogd_workqueue);
>
> set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
> - pincount = xfs_buf_delwri_split(target, &tmp, 0);
> + pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
>
> /*
> - * Dropped the delayed write list lock, now walk the temporary list
> + * Dropped the delayed write list lock, now walk the temporary list.
> + * All I/O is issued async and then if we need to wait for completion
> + * we do that after issuing all the IO.
> */
> - list_for_each_entry_safe(bp, n, &tmp, b_list) {
> + list_sort(NULL, &tmp_list, xfs_buf_cmp);
> + while (!list_empty(&tmp_list)) {
> + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
> ASSERT(target == bp->b_target);
> - if (wait)
> + list_del_init(&bp->b_list);
> + if (wait) {
> bp->b_flags &= ~XBF_ASYNC;
> - else
> - list_del_init(&bp->b_list);
> -
> + list_add(&bp->b_list, &wait_list);
> + }
> xfs_buf_iostrategy(bp);
> }
>
> - if (wait)
> + if (wait) {
> + /* Expedite and wait for IO to complete. */
> blk_run_address_space(target->bt_mapping);
> + while (!list_empty(&wait_list)) {
> + bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
>
> - /*
> - * Remaining list items must be flushed before returning
> - */
> - while (!list_empty(&tmp)) {
> - bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -
> - list_del_init(&bp->b_list);
> - xfs_iowait(bp);
> - xfs_buf_relse(bp);
> + list_del_init(&bp->b_list);
> + xfs_iowait(bp);
> + xfs_buf_relse(bp);
> + }
> }
>
> return pincount;
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-02-05 23:52 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-02 23:24 [PATCH 0/10] Delayed write metadata writeback V4 Dave Chinner
2010-02-02 23:24 ` [PATCH 01/10] xfs: Make inode reclaim states explicit Dave Chinner
2010-02-05 19:06 ` Alex Elder
2010-02-06 0:07 ` Dave Chinner
2010-02-02 23:24 ` [PATCH 02/10] xfs: Use delayed write for inodes rather than async V2 Dave Chinner
2010-02-03 11:17 ` Christoph Hellwig
2010-02-05 21:38 ` Alex Elder
2010-02-02 23:24 ` [PATCH 03/10] xfs: Don't issue buffer IO direct from AIL push V2 Dave Chinner
2010-02-05 22:51 ` Alex Elder
2010-02-02 23:24 ` [PATCH 04/10] xfs: Sort delayed write buffers before dispatch Dave Chinner
2010-02-05 23:53 ` Alex Elder [this message]
2010-02-02 23:24 ` [PATCH 05/10] xfs: Use delay write promotion for dquot flushing Dave Chinner
2010-02-05 23:55 ` Alex Elder
2010-02-02 23:25 ` [PATCH 06/10] xfs: kill the unused XFS_QMOPT_* flush flags V2 Dave Chinner
2010-02-03 11:17 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 07/10] xfs: remove invalid barrier optimization from xfs_fsync Dave Chinner
2010-02-02 23:25 ` [PATCH 08/10] xfs: move the inode locking outside xfs_fsync() Dave Chinner
2010-02-03 11:29 ` Christoph Hellwig
2010-02-03 23:08 ` Dave Chinner
2010-02-04 16:07 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 09/10] xfs: xfs_fs_write_inode() can fail to write inodes synchronously V2 Dave Chinner
2010-02-03 11:27 ` Christoph Hellwig
2010-02-03 18:07 ` bpm
2010-02-03 20:55 ` Christoph Hellwig
2010-02-03 20:56 ` Christoph Hellwig
2010-02-03 23:02 ` Dave Chinner
2010-02-04 17:36 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 10/10] xfs: kill xfs_bawrite Dave Chinner
2010-02-03 11:19 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1265414014.2714.154.camel@doink1 \
--to=aelder@sgi.com \
--cc=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.