public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 3/3] xfs: Sort delayed write buffers before dispatch
Date: Tue,  5 Jan 2010 11:04:21 +1100	[thread overview]
Message-ID: <1262649861-28530-4-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1262649861-28530-1-git-send-email-david@fromorbit.com>

Currently when the xfsbufd writes delayed write buffers, it pushes
them to disk in the order they come off the delayed write list. If
there are lots of buffers ѕpread widely over the disk, this results
in overwhelming the elevator sort queues in the block layer and we
end up losing the posibility of merging adjacent buffers to minimise
the number of IOs.

Use the new generic list_sort function to sort the delwri dispatch
queue before issue to ensure that the buffers are pushed in the most
friendly order possible to the lower layers.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/linux-2.6/xfs_buf.c |   90 +++++++++++++++++++++++++++++++------------
 1 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aaefc33..15c0dea 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
 #include <linux/freezer.h>
+#include <linux/sort.h>
 
 #include "xfs_sb.h"
 #include "xfs_inum.h"
@@ -1644,12 +1645,18 @@ xfs_buf_delwri_promote(
 	spinlock_t	*dwlk = &bp->b_target->bt_delwrite_lock;
 	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
 
-	spin_lock(dwlk);
 	ASSERT(bp->b_flags & XBF_DELWRI);
 	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-	list_del(&bp->b_list);
-	list_add(&bp->b_list, dwq);
+
+	/*
+	 * Check the buffer age before locking the delayed write queue as we
+	 * don't need to promote buffers that are already past the flush age.
+	 */
+	if (bp->b_queuetime < jiffies - age)
+		return;
 	bp->b_queuetime = jiffies - age;
+	spin_lock(dwlk);
+	list_move(&bp->b_list, dwq);
 	spin_unlock(dwlk);
 }
 
@@ -1723,14 +1730,42 @@ xfs_buf_delwri_split(
 
 }
 
+/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+ */
+int
+xfs_buf_cmp(
+	void		*priv,
+	struct list_head *a,
+	struct list_head *b)
+{
+	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
+	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
+	xfs_daddr_t		diff;
+
+	diff = ap->b_bn - bp->b_bn;
+	if (diff < 0)
+		return -1;
+	if (diff > 0)
+		return 1;
+	return 0;
+}
+
+void
+xfs_buf_delwri_sort(
+	xfs_buftarg_t	*target,
+	struct list_head *list)
+{
+	list_sort(NULL, list, xfs_buf_cmp);
+}
+
 STATIC int
 xfsbufd(
 	void		*data)
 {
-	struct list_head tmp;
-	xfs_buftarg_t	*target = (xfs_buftarg_t *)data;
-	int		count;
-	xfs_buf_t	*bp;
+	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
 
 	current->flags |= PF_MEMALLOC;
 
@@ -1739,6 +1774,8 @@ xfsbufd(
 	do {
 		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
 		long	tout = age;
+		int	count = 0;
+		struct list_head tmp;
 
 		if (unlikely(freezing(current))) {
 			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1753,11 +1790,10 @@ xfsbufd(
 		schedule_timeout_interruptible(tout);
 
 		xfs_buf_delwri_split(target, &tmp, age);
-		count = 0;
+		xfs_buf_delwri_sort(target, &tmp);
 		while (!list_empty(&tmp)) {
-			bp = list_entry(tmp.next, xfs_buf_t, b_list);
-			ASSERT(target == bp->b_target);
-
+			struct xfs_buf *bp;
+			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
 			list_del_init(&bp->b_list);
 			xfs_buf_iostrategy(bp);
 			count++;
@@ -1783,38 +1819,42 @@ xfs_flush_buftarg(
 	xfs_buftarg_t	*target,
 	int		wait)
 {
-	struct list_head tmp;
-	xfs_buf_t	*bp, *n;
+	xfs_buf_t	*bp;
 	int		pincount = 0;
+	LIST_HEAD(tmp_list);
+	LIST_HEAD(wait_list);
 
 	xfs_buf_runall_queues(xfsconvertd_workqueue);
 	xfs_buf_runall_queues(xfsdatad_workqueue);
 	xfs_buf_runall_queues(xfslogd_workqueue);
 
 	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-	pincount = xfs_buf_delwri_split(target, &tmp, 0);
+	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
 
 	/*
-	 * Dropped the delayed write list lock, now walk the temporary list
+	 * Dropped the delayed write list lock, now walk the temporary list.
+	 * All I/O is issued async and then if we need to wait for completion
+	 * we do that after issuing all the IO.
 	 */
-	list_for_each_entry_safe(bp, n, &tmp, b_list) {
+	xfs_buf_delwri_sort(target, &tmp_list);
+	while (!list_empty(&tmp_list)) {
+		struct xfs_buf *bp;
+		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
 		ASSERT(target == bp->b_target);
-		if (wait)
+		list_del_init(&bp->b_list);
+		if (wait) {
 			bp->b_flags &= ~XBF_ASYNC;
-		else
-			list_del_init(&bp->b_list);
-
+			list_add(&bp->b_list, &wait_list);
+		}
 		xfs_buf_iostrategy(bp);
 	}
 
 	if (wait)
 		blk_run_address_space(target->bt_mapping);
 
-	/*
-	 * Remaining list items must be flushed before returning
-	 */
-	while (!list_empty(&tmp)) {
-		bp = list_entry(tmp.next, xfs_buf_t, b_list);
+	/* Now wait for IO to complete if required. */
+	while (!list_empty(&wait_list)) {
+		bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
 
 		list_del_init(&bp->b_list);
 		xfs_iowait(bp);
-- 
1.6.5

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2010-01-05  0:07 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-05  0:04 [PATCH 0/3] Kill async inode writeback V2 Dave Chinner
2010-01-05  0:04 ` [PATCH 1/3] xfs: Use delayed write for inodes rather than async Dave Chinner
2010-01-08 10:36   ` Christoph Hellwig
2010-01-08 11:05     ` Dave Chinner
2010-01-08 11:14       ` Christoph Hellwig
2010-01-05  0:04 ` [PATCH 2/3] xfs: Don't issue buffer IO direct from AIL push Dave Chinner
2010-01-08 11:07   ` Christoph Hellwig
2010-01-08 11:15     ` Dave Chinner
2010-01-05  0:04 ` Dave Chinner [this message]
2010-01-08 11:11   ` [PATCH 3/3] xfs: Sort delayed write buffers before dispatch Christoph Hellwig
2010-01-08 11:17     ` Dave Chinner
2010-01-06 18:08 ` [PATCH 0/3] Kill async inode writeback V2 Christoph Hellwig
2010-01-06 22:49   ` Dave Chinner
2010-01-08 10:14     ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2010-01-02  3:03 [RFC, PATCH 0/3] Kill async inode writeback Dave Chinner
2010-01-02  3:03 ` [PATCH 3/3] XFS: Sort delayed write buffers before dispatch Dave Chinner
2010-01-02 13:08   ` Andi Kleen
2010-01-02 14:14     ` Dave Chinner
2010-01-04 15:43   ` Christoph Hellwig
2010-01-04 23:53     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1262649861-28530-4-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox