linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, Dave Chinner <david@fromorbit.com>,
	Chris Mason <chris.mason@oracle.com>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Christoph Hellwig <hch@infradead.org>,
	Wu Fengguang <fengguang.wu@intel.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Mel Gorman <mel@csn.ul.ie>
Subject: [PATCH 9/9] writeback: Prioritise dirty inodes encountered by reclaim for background flushing
Date: Wed, 28 Jul 2010 11:27:23 +0100	[thread overview]
Message-ID: <1280312843-11789-10-git-send-email-mel@csn.ul.ie> (raw)
In-Reply-To: <1280312843-11789-1-git-send-email-mel@csn.ul.ie>

It is preferable that as few dirty pages are dispatched for cleaning from
the page reclaim path. When dirty pages are encountered by page reclaim,
this patch marks the inodes that they should be dispatched immediately. When
the background flusher runs, it moves such inodes immediately to the dispatch
queue regardless of inode age.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---
 fs/fs-writeback.c         |   52 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/fs.h        |    5 ++-
 include/linux/writeback.h |    1 +
 mm/vmscan.c               |    6 +++-
 4 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5a3c764..27a8b75 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -221,7 +221,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 	LIST_HEAD(tmp);
 	struct list_head *pos, *node;
 	struct super_block *sb = NULL;
-	struct inode *inode;
+	struct inode *inode, *tinode;
 	int do_sb_sort = 0;
 
 	if (wbc->for_kupdate || wbc->for_background) {
@@ -229,6 +229,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 		older_than_this = jiffies - expire_interval;
 	}
 
+	/* Move inodes reclaim found at end of LRU to dispatch queue */
+	list_for_each_entry_safe(inode, tinode, delaying_queue, i_list) {
+		if (inode->i_state & I_DIRTY_RECLAIM) {
+			inode->i_state &= ~I_DIRTY_RECLAIM;
+			list_move(&inode->i_list, &tmp);
+		}
+	}
+
 	while (!list_empty(delaying_queue)) {
 		inode = list_entry(delaying_queue->prev, struct inode, i_list);
 		if (expire_interval &&
@@ -906,6 +914,48 @@ void wakeup_flusher_threads(long nr_pages)
 	rcu_read_unlock();
 }
 
+/*
+ * Similar to wakeup_flusher_threads except prioritise inodes contained
+ * in the page_list regardless of age
+ */
+void wakeup_flusher_threads_pages(long nr_pages, struct list_head *page_list)
+{
+	struct page *page;
+	struct address_space *mapping;
+	struct inode *inode;
+
+	list_for_each_entry(page, page_list, lru) {
+		if (!PageDirty(page))
+			continue;
+
+		lock_page(page);
+		mapping = page_mapping(page);
+		if (!mapping || mapping == &swapper_space)
+			goto unlock;
+
+		/*
+		 * Test outside the lock to see as if it is already set, taking
+		 * the inode lock is a waste and the inode should be pinned by
+		 * the lock_page
+		 */
+		inode = page->mapping->host;
+		if (inode->i_state & I_DIRTY_RECLAIM)
+			goto unlock;
+
+		/*
+		 * XXX: Yuck, has to be a way of batching this by not requiring
+		 * 	the page lock to pin the inode
+		 */
+		spin_lock(&inode_lock);
+		inode->i_state |= I_DIRTY_RECLAIM;
+		spin_unlock(&inode_lock);
+unlock:
+		unlock_page(page);
+	}
+
+	wakeup_flusher_threads(nr_pages);
+}
+
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e29f0ed..8836698 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1585,8 +1585,8 @@ struct super_operations {
 /*
  * Inode state bits.  Protected by inode_lock.
  *
- * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
- * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
+ * Four bits determine the dirty state of the inode, I_DIRTY_SYNC,
+ * I_DIRTY_DATASYNC, I_DIRTY_PAGES and I_DIRTY_RECLAIM.
  *
  * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
  * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
@@ -1633,6 +1633,7 @@ struct super_operations {
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
 #define I_DIRTY_PAGES		4
+#define I_DIRTY_RECLAIM		256
 #define __I_NEW			3
 #define I_NEW			(1 << __I_NEW)
 #define I_WILL_FREE		16
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 494edd6..73a4df2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -64,6 +64,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 		struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads_pages(long nr_pages, struct list_head *page_list);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b66d1f5..bad1abf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -901,7 +901,8 @@ keep:
 	 * laptop mode avoiding disk spin-ups
 	 */
 	if (file && nr_dirty_seen && sc->may_writepage)
-		wakeup_flusher_threads(nr_writeback_pages(nr_dirty));
+		wakeup_flusher_threads_pages(nr_writeback_pages(nr_dirty),
+					page_list);
 
 	*nr_still_dirty = nr_dirty;
 	count_vm_events(PGACTIVATE, pgactivate);
@@ -1368,7 +1369,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 				list_add(&page->lru, &putback_list);
 			}
 
-			wakeup_flusher_threads(laptop_mode ? 0 : nr_dirty);
+			wakeup_flusher_threads_pages(laptop_mode ? 0 : nr_dirty,
+								&page_list);
 			congestion_wait(BLK_RW_ASYNC, HZ/10);
 
 			/*
-- 
1.7.1

  parent reply	other threads:[~2010-07-28 10:27 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-28 10:27 [PATCH 0/9] Reduce writeback from page reclaim context V5 Mel Gorman
2010-07-28 10:27 ` [PATCH 1/9] vmscan: tracing: Roll up of patches currently in mmotm Mel Gorman
2010-07-28 10:27 ` [PATCH 2/9] vmscan: tracing: Update trace event to track if page reclaim IO is for anon or file pages Mel Gorman
2010-07-28 10:27 ` [PATCH 3/9] vmscan: tracing: Update post-processing script to distinguish between anon and file IO from page reclaim Mel Gorman
2010-07-28 11:05   ` Christoph Hellwig
2010-07-28 11:19     ` Mel Gorman
2010-07-28 10:27 ` [PATCH 4/9] vmscan: tracing: Correct units in post-processing script Mel Gorman
2010-07-28 10:27 ` [PATCH 5/9] vmscan: Do not writeback filesystem pages in direct reclaim Mel Gorman
2010-07-28 10:27 ` [PATCH 6/9] writeback: Roll up of writeback changes in next-20100722 versus 2.6.35-rc5 Mel Gorman
2010-07-28 10:27 ` [PATCH 7/9] writeback: Roll up of writeback: try to write older pages first Mel Gorman
2010-07-28 10:27 ` [PATCH 8/9] vmscan: Kick flusher threads to clean pages when reclaim is encountering dirty pages Mel Gorman
2010-07-28 10:27 ` Mel Gorman [this message]
2010-07-28 11:08   ` [PATCH 9/9] writeback: Prioritise dirty inodes encountered by reclaim for background flushing Christoph Hellwig
2010-07-28 11:30     ` Mel Gorman
2010-07-29  8:45 ` [PATCH 0/9] Reduce writeback from page reclaim context V5 Christoph Hellwig
2010-08-03  7:34   ` Wu Fengguang
2010-08-03 12:52     ` Jan Kara
2010-08-03 15:04       ` Wu Fengguang
2010-08-03 15:07         ` Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1280312843-11789-10-git-send-email-mel@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=chris.mason@oracle.com \
    --cc=david@fromorbit.com \
    --cc=fengguang.wu@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).