linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Theodore Tso <tytso@mit.edu>,
	Christoph Hellwig <hch@infradead.org>,
	Dave Chinner <david@fromorbit.com>,
	Chris Mason <chris.mason@oracle.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	"Li Shaohua" <shaohua.li@intel.com>,
	"Myklebust Trond" <Trond.Myklebust@netapp.com>,
	"jens.axboe@oracle.com" <jens.axboe@oracle.com>,
	Jan Kara <jack@suse.cz>, Nick Piggin <npiggin@suse.de>,
	<linux-fsdevel@vger.kernel.org>, David Chinner <dgc@sgi.com>,
	Michael Rubin <mrubin@google.com>,
	Peter Zijlstra <peterz@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 38/45] writeback: introduce queue b_more_io_wait
Date: Wed, 07 Oct 2009 15:38:56 +0800	[thread overview]
Message-ID: <20091007074906.108562976@intel.com> (raw)
In-Reply-To: 20091007073818.318088777@intel.com

[-- Attachment #1: writeback-more_io_wait.patch --]
[-- Type: text/plain, Size: 6145 bytes --]

Introduce the b_more_io_wait queue to park inodes that for some reason
cannot be synced immediately. They will be revisited either in the next
b_io scan time, or after 0.1s sleep for sync, or retried after 5s in the
next periodic writeback.

The new data flow after this patchset:

b_dirty --> b_io --> b_more_io/b_more_io_wait --+
             ^                                  |
	     |                                  |
	     +----------------------------------+

The rational is to address two issues:
- the 30s delay of redirty_tail() may be too long
- redirty_tail() may update i_dirtied_when, however we now rely on it
  remain unchanged for all candidate inodes of sync(). (to avoid extra
  work and livelock, we now exclude any inodes from being synced if its
  dirty time is after the sync time)

Cc: Jan Kara <jack@suse.cz> 
Cc: David Chinner <dgc@sgi.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
---
 fs/fs-writeback.c           |   27 ++++++++++++++++-----------
 include/linux/backing-dev.h |    8 +++++---
 mm/backing-dev.c            |   14 +++++++++++---
 3 files changed, 32 insertions(+), 17 deletions(-)

--- linux.orig/fs/fs-writeback.c	2009-10-07 14:31:47.000000000 +0800
+++ linux/fs/fs-writeback.c	2009-10-07 14:32:50.000000000 +0800
@@ -384,6 +384,16 @@ static void requeue_io(struct inode *ino
 	list_move(&inode->i_list, &wb->b_more_io);
 }
 
+/*
+ * The inode should be retried in an opportunistic way.
+ */
+static void requeue_io_wait(struct inode *inode)
+{
+	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+	list_move(&inode->i_list, &wb->b_more_io_wait);
+}
+
 static void inode_sync_complete(struct inode *inode)
 {
 	/*
@@ -453,12 +463,14 @@ static void move_expired_inodes(struct l
 /*
  * Queue all expired dirty inodes for io, eldest first:
  * (newly dirtied) => b_dirty inodes
+ *                 => b_more_io_wait inodes
  *                 => b_more_io inodes
  *                 => remaining inodes in b_io => (dequeue for sync)
  */
 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
 	list_splice_init(&wb->b_more_io, &wb->b_io);
+	list_splice_init(&wb->b_more_io_wait, &wb->b_io);
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
@@ -860,18 +872,11 @@ static long wb_writeback(struct bdi_writ
 		 */
 		if (nr)
 			continue;
-		/*
-		 * Nothing written. Wait for some inode to
-		 * become available for writeback. Otherwise
-		 * we'll just busyloop.
-		 */
-		spin_lock(&inode_lock);
-		if (!list_empty(&wb->b_more_io))  {
-			inode = list_entry(wb->b_more_io.prev,
-						struct inode, i_list);
-			inode_wait_for_writeback(inode);
+		if (wbc.for_sync && !list_empty(&wb->b_more_io_wait)) {
+			schedule_timeout_interruptible(HZ/10);
+			continue;
 		}
-		spin_unlock(&inode_lock);
+		break;
 	}
 
 	if (args->for_sync)
--- linux.orig/include/linux/backing-dev.h	2009-10-07 14:32:46.000000000 +0800
+++ linux/include/linux/backing-dev.h	2009-10-07 14:32:50.000000000 +0800
@@ -56,6 +56,7 @@ struct bdi_writeback {
 	struct list_head	b_dirty;	/* dirty inodes */
 	struct list_head	b_io;		/* parked for writeback */
 	struct list_head	b_more_io;	/* parked for more writeback */
+	struct list_head	b_more_io_wait;	/* opportunistic retry io */
 };
 
 struct backing_dev_info {
@@ -140,9 +141,10 @@ extern struct list_head bdi_list;
 
 static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
-	return !list_empty(&wb->b_dirty) ||
-	       !list_empty(&wb->b_io) ||
-	       !list_empty(&wb->b_more_io);
+	return !list_empty(&wb->b_dirty)	||
+	       !list_empty(&wb->b_io)		||
+	       !list_empty(&wb->b_more_io)	||
+	       !list_empty(&wb->b_more_io_wait);
 }
 
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
--- linux.orig/mm/backing-dev.c	2009-10-07 14:32:46.000000000 +0800
+++ linux/mm/backing-dev.c	2009-10-07 14:32:50.000000000 +0800
@@ -63,14 +63,17 @@ static int bdi_debug_stats_show(struct s
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
-	unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
+	unsigned long nr_dirty		= 0;
+	unsigned long nr_io		= 0;
+	unsigned long nr_more_io	= 0;
+	unsigned long nr_more_io_wait	= 0;
+	unsigned long nr_wb		= 0;
 	struct inode *inode;
 
 	/*
 	 * inode lock is enough here, the bdi->wb_list is protected by
 	 * RCU on the reader side
 	 */
-	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
 	spin_lock(&inode_lock);
 	list_for_each_entry(wb, &bdi->wb_list, list) {
 		nr_wb++;
@@ -80,6 +83,8 @@ static int bdi_debug_stats_show(struct s
 			nr_io++;
 		list_for_each_entry(inode, &wb->b_more_io, i_list)
 			nr_more_io++;
+		list_for_each_entry(inode, &wb->b_more_io_wait, i_list)
+			nr_more_io_wait++;
 	}
 	spin_unlock(&inode_lock);
 
@@ -98,6 +103,7 @@ static int bdi_debug_stats_show(struct s
 		   "b_dirty:          %8lu\n"
 		   "b_io:             %8lu\n"
 		   "b_more_io:        %8lu\n"
+		   "b_more_io_wait:   %8lu\n"
 		   "bdi_list:         %8u\n"
 		   "state:            %8lx\n"
 		   "wb_mask:          %8lx\n"
@@ -107,7 +113,7 @@ static int bdi_debug_stats_show(struct s
 		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
 		   K(bdi_thresh), K(dirty_thresh), K(background_thresh),
 		   (unsigned long) K(bdi->write_bandwidth),
-		   nr_wb, nr_dirty, nr_io, nr_more_io,
+		   nr_wb, nr_dirty, nr_io, nr_more_io, nr_more_io_wait,
 		   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
 		   !list_empty(&bdi->wb_list), bdi->wb_cnt);
 #undef K
@@ -264,6 +270,7 @@ static void bdi_wb_init(struct bdi_write
 	INIT_LIST_HEAD(&wb->b_dirty);
 	INIT_LIST_HEAD(&wb->b_io);
 	INIT_LIST_HEAD(&wb->b_more_io);
+	INIT_LIST_HEAD(&wb->b_more_io_wait);
 }
 
 static void bdi_task_init(struct backing_dev_info *bdi,
@@ -688,6 +695,7 @@ void bdi_destroy(struct backing_dev_info
 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
 		list_splice(&bdi->wb.b_io, &dst->b_io);
 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+		list_splice(&bdi->wb.b_more_io_wait, &dst->b_more_io_wait);
 		spin_unlock(&inode_lock);
 	}
 

  parent reply	other threads:[~2009-10-07  7:38 UTC|newest]

Thread overview: 116+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-07  7:38 [PATCH 00/45] some writeback experiments Wu Fengguang
2009-10-07  7:38 ` [PATCH 01/45] writeback: reduce calls to global_page_state in balance_dirty_pages() Wu Fengguang
2009-10-09 15:12   ` Jan Kara
2009-10-09 15:18     ` Peter Zijlstra
2009-10-09 15:47       ` Jan Kara
2009-10-11  2:28         ` Wu Fengguang
2009-10-11  7:44           ` Peter Zijlstra
2009-10-11 10:50             ` Wu Fengguang
2009-10-11 10:58               ` Peter Zijlstra
2009-10-11 11:25               ` Peter Zijlstra
2009-10-12  1:26                 ` Wu Fengguang
2009-10-12  9:07                   ` Peter Zijlstra
2009-10-12  9:24                     ` Wu Fengguang
2009-10-10 21:33     ` Wu Fengguang
2009-10-12 21:18       ` Jan Kara
2009-10-13  3:24         ` Wu Fengguang
2009-10-13  8:41           ` Peter Zijlstra
2009-10-13 18:12           ` Jan Kara
2009-10-13 18:28             ` Peter Zijlstra
2009-10-14  1:38               ` Wu Fengguang
2009-10-14 11:22                 ` Peter Zijlstra
2009-10-17  5:30                   ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 02/45] writeback: reduce calculation of bdi dirty thresholds Wu Fengguang
2009-10-07  7:38 ` [PATCH 03/45] ext4: remove unused parameter wbc from __ext4_journalled_writepage() Wu Fengguang
2009-10-07  7:38 ` [PATCH 04/45] writeback: remove unused nonblocking and congestion checks Wu Fengguang
2009-10-09 15:26   ` Jan Kara
2009-10-10 13:47     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 05/45] writeback: remove the always false bdi_cap_writeback_dirty() test Wu Fengguang
2009-10-07  7:38 ` [PATCH 06/45] writeback: use larger ratelimit when dirty_exceeded Wu Fengguang
2009-10-07  8:53   ` Peter Zijlstra
2009-10-07  9:17     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 07/45] writeback: dont redirty tail an inode with dirty pages Wu Fengguang
2009-10-09 15:45   ` Jan Kara
2009-10-07  7:38 ` [PATCH 08/45] writeback: quit on wrap for .range_cyclic (write_cache_pages) Wu Fengguang
2009-10-07  7:38 ` [PATCH 09/45] writeback: quit on wrap for .range_cyclic (pohmelfs) Wu Fengguang
2009-10-07 12:32   ` Evgeniy Polyakov
2009-10-07 14:23     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 10/45] writeback: quit on wrap for .range_cyclic (btrfs) Wu Fengguang
2009-10-07  7:38 ` [PATCH 11/45] writeback: quit on wrap for .range_cyclic (cifs) Wu Fengguang
2009-10-07  7:38 ` [PATCH 12/45] writeback: quit on wrap for .range_cyclic (ext4) Wu Fengguang
2009-10-07  7:38 ` [PATCH 13/45] writeback: quit on wrap for .range_cyclic (gfs2) Wu Fengguang
2009-10-07  7:38 ` [PATCH 14/45] writeback: quit on wrap for .range_cyclic (afs) Wu Fengguang
2009-10-07  7:38 ` [PATCH 15/45] writeback: fix queue_io() ordering Wu Fengguang
2009-10-07  7:38 ` [PATCH 16/45] writeback: merge for_kupdate and !for_kupdate cases Wu Fengguang
2009-10-07  7:38 ` [PATCH 17/45] writeback: only allow two background writeback works Wu Fengguang
2009-10-07  7:38 ` [PATCH 18/45] writeback: introduce wait queue for balance_dirty_pages() Wu Fengguang
2009-10-08  1:01   ` KAMEZAWA Hiroyuki
2009-10-08  1:58     ` Wu Fengguang
2009-10-08  2:40       ` KAMEZAWA Hiroyuki
2009-10-08  4:01         ` Wu Fengguang
2009-10-08  5:59           ` KAMEZAWA Hiroyuki
2009-10-08  6:07             ` Wu Fengguang
2009-10-08  6:28             ` Wu Fengguang
2009-10-08  6:39               ` KAMEZAWA Hiroyuki
2009-10-08  8:08       ` Peter Zijlstra
2009-10-08  8:11         ` KAMEZAWA Hiroyuki
2009-10-08  8:36         ` Jens Axboe
2009-10-09  2:52           ` [PATCH] writeback: account IO throttling wait as iowait Wu Fengguang
2009-10-09 10:41             ` Jens Axboe
2009-10-09 10:58               ` Wu Fengguang
2009-10-09 11:01                 ` Jens Axboe
2009-10-08  8:05     ` [PATCH 18/45] writeback: introduce wait queue for balance_dirty_pages() Peter Zijlstra
2009-10-07  7:38 ` [PATCH 19/45] writeback: remove the loop in balance_dirty_pages() Wu Fengguang
2009-10-07  7:38 ` [PATCH 20/45] NFS: introduce writeback wait queue Wu Fengguang
2009-10-07  8:53   ` Peter Zijlstra
2009-10-07  9:07     ` Wu Fengguang
2009-10-07  9:15       ` Peter Zijlstra
2009-10-07  9:19         ` Wu Fengguang
2009-10-07  9:17       ` Nick Piggin
2009-10-07  9:52         ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 21/45] writeback: estimate bdi write bandwidth Wu Fengguang
2009-10-07  8:53   ` Peter Zijlstra
2009-10-07  9:39     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 22/45] writeback: show bdi write bandwidth in debugfs Wu Fengguang
2009-10-07  7:38 ` [PATCH 23/45] writeback: kill space in debugfs item name Wu Fengguang
2009-10-07  7:38 ` [PATCH 24/45] writeback: remove global nr_to_write and use timeout instead Wu Fengguang
2009-10-07  7:38 ` [PATCH 25/45] writeback: convert wbc.nr_to_write to per-file parameter Wu Fengguang
2009-10-07  7:38 ` [PATCH 26/45] block: pass the non-rotational queue flag to backing_dev_info Wu Fengguang
2009-10-07  7:38 ` [PATCH 27/45] writeback: introduce wbc.for_background Wu Fengguang
2009-10-07  7:38 ` [PATCH 28/45] writeback: introduce wbc.nr_segments Wu Fengguang
2009-10-07  7:38 ` [PATCH 29/45] writeback: fix the shmem AOP_WRITEPAGE_ACTIVATE case Wu Fengguang
2009-10-07 11:57   ` Hugh Dickins
2009-10-07 14:00     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 30/45] vmscan: lumpy pageout Wu Fengguang
2009-10-07  7:38 ` [PATCH 31/45] writeback: sync old inodes first in background writeback Wu Fengguang
2010-07-12  3:01   ` Christoph Hellwig
2010-07-12 15:24     ` Wu Fengguang
2009-10-07  7:38 ` [PATCH 32/45] writeback: update kupdate expire timestamp on each scan of b_io Wu Fengguang
2009-10-07  7:38 ` [PATCH 34/45] writeback: sync livelock - kick background writeback Wu Fengguang
2009-10-07  7:38 ` [PATCH 35/45] writeback: sync livelock - use single timestamp for whole sync work Wu Fengguang
2009-10-07  7:38 ` [PATCH 36/45] writeback: sync livelock - curb dirty speed for inodes to be synced Wu Fengguang
2009-10-07  7:38 ` [PATCH 37/45] writeback: use timestamp to indicate dirty exceeded Wu Fengguang
2009-10-07  7:38 ` Wu Fengguang [this message]
2009-10-07  7:38 ` [PATCH 39/45] writeback: remove wbc.more_io Wu Fengguang
2009-10-07  7:38 ` [PATCH 40/45] writeback: requeue_io_wait() on I_SYNC locked inode Wu Fengguang
2009-10-07  7:38 ` [PATCH 41/45] writeback: requeue_io_wait() on pages_skipped inode Wu Fengguang
2009-10-07  7:39 ` [PATCH 42/45] writeback: requeue_io_wait() on blocked inode Wu Fengguang
2009-10-07  7:39 ` [PATCH 43/45] writeback: requeue_io_wait() on fs redirtied inode Wu Fengguang
2009-10-07  7:39 ` [PATCH 44/45] NFS: remove NFS_INO_FLUSHING lock Wu Fengguang
2009-10-07 13:11   ` Peter Staubach
2009-10-07 13:32     ` Wu Fengguang
2009-10-07 13:59       ` Peter Staubach
2009-10-08  1:44         ` Wu Fengguang
2009-10-07  7:39 ` [PATCH 45/45] btrfs: fix race on syncing the btree inode Wu Fengguang
2009-10-07  8:53 ` [PATCH 00/45] some writeback experiments Peter Zijlstra
2009-10-07 10:17 ` [PATCH 14/45] writeback: quit on wrap for .range_cyclic (afs) David Howells
2009-10-07 10:21   ` Nick Piggin
2009-10-07 10:47     ` Wu Fengguang
2009-10-07 11:23       ` Nick Piggin
2009-10-07 12:21         ` Wu Fengguang
2009-10-07 13:47 ` [PATCH 00/45] some writeback experiments Peter Staubach
2009-10-07 15:18   ` Wu Fengguang
2009-10-08  5:33     ` Wu Fengguang
2009-10-08  5:44       ` Wu Fengguang
2009-10-07 14:26 ` Theodore Tso
2009-10-07 14:45   ` Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091007074906.108562976@intel.com \
    --to=fengguang.wu@intel.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=david@fromorbit.com \
    --cc=dgc@sgi.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jens.axboe@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=mrubin@google.com \
    --cc=npiggin@suse.de \
    --cc=peterz@infradead.org \
    --cc=shaohua.li@intel.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).