From: Alex Elder <aelder@sgi.com>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 04/10] xfs: Sort delayed write buffers before dispatch
Date: Fri, 05 Feb 2010 17:53:34 -0600 [thread overview]
Message-ID: <1265414014.2714.154.camel@doink1> (raw)
In-Reply-To: <1265153104-29680-5-git-send-email-david@fromorbit.com>
On Wed, 2010-02-03 at 10:24 +1100, Dave Chinner wrote:
> Currently when the xfsbufd writes delayed write buffers, it pushes
> them to disk in the order they come off the delayed write list. If
> there are lots of buffers ѕpread widely over the disk, this results
> in overwhelming the elevator sort queues in the block layer and we
> end up losing the posibility of merging adjacent buffers to minimise
> the number of IOs.
>
> Use the new generic list_sort function to sort the delwri dispatch
> queue before issue to ensure that the buffers are pushed in the most
> friendly order possible to the lower layers.
Looks good.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
> fs/xfs/linux-2.6/xfs_buf.c | 87 ++++++++++++++++++++++++++++++--------------
> 1 files changed, 60 insertions(+), 27 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
> index b306265..4556a4c 100644
> --- a/fs/xfs/linux-2.6/xfs_buf.c
> +++ b/fs/xfs/linux-2.6/xfs_buf.c
> @@ -33,6 +33,7 @@
> #include <linux/migrate.h>
> #include <linux/backing-dev.h>
> #include <linux/freezer.h>
> +#include <linux/list_sort.h>
>
> #include "xfs_sb.h"
> #include "xfs_inum.h"
> @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split(
>
> }
>
> +/*
> + * Compare function is more complex than it needs to be because
> + * the return value is only 32 bits and we are doing comparisons
> + * on 64 bit values
> + */
> +static int
> +xfs_buf_cmp(
> + void *priv,
> + struct list_head *a,
> + struct list_head *b)
> +{
> + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
> + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
> + xfs_daddr_t diff;
> +
> + diff = ap->b_bn - bp->b_bn;
> + if (diff < 0)
> + return -1;
> + if (diff > 0)
> + return 1;
> + return 0;
> +}
> +
> +void
> +xfs_buf_delwri_sort(
> + xfs_buftarg_t *target,
> + struct list_head *list)
> +{
> + list_sort(NULL, list, xfs_buf_cmp);
> +}
> +
> STATIC int
> xfsbufd(
> void *data)
> {
> - struct list_head tmp;
> - xfs_buftarg_t *target = (xfs_buftarg_t *)data;
> - int count;
> - xfs_buf_t *bp;
> + xfs_buftarg_t *target = (xfs_buftarg_t *)data;
>
> current->flags |= PF_MEMALLOC;
>
> @@ -1893,6 +1922,8 @@ xfsbufd(
> do {
> long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
> long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
> + int count = 0;
> + struct list_head tmp;
>
> if (unlikely(freezing(current))) {
> set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
> @@ -1907,11 +1938,10 @@ xfsbufd(
> schedule_timeout_interruptible(tout);
>
> xfs_buf_delwri_split(target, &tmp, age);
> - count = 0;
> + list_sort(NULL, &tmp, xfs_buf_cmp);
> while (!list_empty(&tmp)) {
> - bp = list_entry(tmp.next, xfs_buf_t, b_list);
> - ASSERT(target == bp->b_target);
> -
> + struct xfs_buf *bp;
> + bp = list_first_entry(&tmp, struct xfs_buf, b_list);
> list_del_init(&bp->b_list);
> xfs_buf_iostrategy(bp);
> count++;
> @@ -1937,42 +1967,45 @@ xfs_flush_buftarg(
> xfs_buftarg_t *target,
> int wait)
> {
> - struct list_head tmp;
> - xfs_buf_t *bp, *n;
> + xfs_buf_t *bp;
> int pincount = 0;
> + LIST_HEAD(tmp_list);
> + LIST_HEAD(wait_list);
>
> xfs_buf_runall_queues(xfsconvertd_workqueue);
> xfs_buf_runall_queues(xfsdatad_workqueue);
> xfs_buf_runall_queues(xfslogd_workqueue);
>
> set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
> - pincount = xfs_buf_delwri_split(target, &tmp, 0);
> + pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
>
> /*
> - * Dropped the delayed write list lock, now walk the temporary list
> + * Dropped the delayed write list lock, now walk the temporary list.
> + * All I/O is issued async and then if we need to wait for completion
> + * we do that after issuing all the IO.
> */
> - list_for_each_entry_safe(bp, n, &tmp, b_list) {
> + list_sort(NULL, &tmp_list, xfs_buf_cmp);
> + while (!list_empty(&tmp_list)) {
> + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
> ASSERT(target == bp->b_target);
> - if (wait)
> + list_del_init(&bp->b_list);
> + if (wait) {
> bp->b_flags &= ~XBF_ASYNC;
> - else
> - list_del_init(&bp->b_list);
> -
> + list_add(&bp->b_list, &wait_list);
> + }
> xfs_buf_iostrategy(bp);
> }
>
> - if (wait)
> + if (wait) {
> + /* Expedite and wait for IO to complete. */
> blk_run_address_space(target->bt_mapping);
> + while (!list_empty(&wait_list)) {
> + bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
>
> - /*
> - * Remaining list items must be flushed before returning
> - */
> - while (!list_empty(&tmp)) {
> - bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -
> - list_del_init(&bp->b_list);
> - xfs_iowait(bp);
> - xfs_buf_relse(bp);
> + list_del_init(&bp->b_list);
> + xfs_iowait(bp);
> + xfs_buf_relse(bp);
> + }
> }
>
> return pincount;
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-02-05 23:52 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-02 23:24 [PATCH 0/10] Delayed write metadata writeback V4 Dave Chinner
2010-02-02 23:24 ` [PATCH 01/10] xfs: Make inode reclaim states explicit Dave Chinner
2010-02-05 19:06 ` Alex Elder
2010-02-06 0:07 ` Dave Chinner
2010-02-02 23:24 ` [PATCH 02/10] xfs: Use delayed write for inodes rather than async V2 Dave Chinner
2010-02-03 11:17 ` Christoph Hellwig
2010-02-05 21:38 ` Alex Elder
2010-02-02 23:24 ` [PATCH 03/10] xfs: Don't issue buffer IO direct from AIL push V2 Dave Chinner
2010-02-05 22:51 ` Alex Elder
2010-02-02 23:24 ` [PATCH 04/10] xfs: Sort delayed write buffers before dispatch Dave Chinner
2010-02-05 23:53 ` Alex Elder [this message]
2010-02-02 23:24 ` [PATCH 05/10] xfs: Use delay write promotion for dquot flushing Dave Chinner
2010-02-05 23:55 ` Alex Elder
2010-02-02 23:25 ` [PATCH 06/10] xfs: kill the unused XFS_QMOPT_* flush flags V2 Dave Chinner
2010-02-03 11:17 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 07/10] xfs: remove invalid barrier optimization from xfs_fsync Dave Chinner
2010-02-02 23:25 ` [PATCH 08/10] xfs: move the inode locking outside xfs_fsync() Dave Chinner
2010-02-03 11:29 ` Christoph Hellwig
2010-02-03 23:08 ` Dave Chinner
2010-02-04 16:07 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 09/10] xfs: xfs_fs_write_inode() can fail to write inodes synchronously V2 Dave Chinner
2010-02-03 11:27 ` Christoph Hellwig
2010-02-03 18:07 ` bpm
2010-02-03 20:55 ` Christoph Hellwig
2010-02-03 20:56 ` Christoph Hellwig
2010-02-03 23:02 ` Dave Chinner
2010-02-04 17:36 ` Christoph Hellwig
2010-02-02 23:25 ` [PATCH 10/10] xfs: kill xfs_bawrite Dave Chinner
2010-02-03 11:19 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1265414014.2714.154.camel@doink1 \
--to=aelder@sgi.com \
--cc=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox