From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>,
LKML <linux-kernel@vger.kernel.org>
Cc: "linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Chris Mason <chris.mason@oracle.com>, Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>, Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Subject: [PATCH 4/5] writeback: introduce bdi_start_inode_writeback()
Date: Thu, 29 Jul 2010 19:51:46 +0800 [thread overview]
Message-ID: <20100729121423.613727382@intel.com> (raw)
In-Reply-To: 20100729115142.102255590@intel.com
[-- Attachment #1: writeback-bdi_start_inode_writeback.patch --]
[-- Type: text/plain, Size: 5350 bytes --]
This is to transfer dirty pages encountered in page reclaim to the
flusher threads for writeback.
The flusher will piggy back more dirty pages for IO, yeilding more
efficient IO.
To avoid memory allocations at page reclaim, a mempool is created.
TODO: more adaptive mempool size.
Background works will be kicked to clean the pages under reclaim ASAP.
TODO: sync_works is temporary reused for convenience.
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/fs-writeback.c | 69 ++++++++++++++++++++++++++++++++--
include/linux/backing-dev.h | 1
2 files changed, 66 insertions(+), 4 deletions(-)
--- linux-next.orig/fs/fs-writeback.c 2010-07-29 17:13:58.000000000 +0800
+++ linux-next/fs/fs-writeback.c 2010-07-29 17:49:05.000000000 +0800
@@ -35,12 +35,15 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
+ struct inode *inode;
+ pgoff_t offset;
enum writeback_sync_modes sync_mode;
unsigned long sync_after;
unsigned int for_sync:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
+ unsigned int for_reclaim:1;
struct list_head list; /* pending work list */
struct completion *done; /* set if the caller waits */
@@ -61,6 +64,27 @@ struct wb_writeback_work {
*/
int nr_pdflush_threads;
+static mempool_t *wb_work_mempool;
+
+static void *wb_work_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ /*
+ * bdi_start_inode_writeback() may be called on page reclaim
+ */
+ if (current->flags & PF_MEMALLOC)
+ return NULL;
+
+ return kmalloc(sizeof(struct wb_writeback_work), gfp_mask);
+}
+
+static __init int wb_work_init(void)
+{
+ wb_work_mempool = mempool_create(10240, /* XXX: better number */
+ wb_work_alloc, mempool_kfree, NULL);
+ return wb_work_mempool ? 0 : -ENOMEM;
+}
+fs_initcall(wb_work_init);
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -80,7 +104,7 @@ static void bdi_queue_work(struct backin
spin_lock(&bdi->wb_lock);
list_add_tail(&work->list, &bdi->work_list);
- if (work->for_sync)
+ if (work->for_sync || work->for_reclaim)
atomic_inc(&bdi->wb.sync_works);
spin_unlock(&bdi->wb_lock);
@@ -109,7 +133,7 @@ __bdi_start_writeback(struct backing_dev
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ work = mempool_alloc(wb_work_mempool, GFP_NOWAIT);
if (!work) {
if (bdi->wb.task) {
trace_writeback_nowork(bdi);
@@ -118,6 +142,7 @@ __bdi_start_writeback(struct backing_dev
return;
}
+ memset(work, 0, sizeof(*work));
work->sync_mode = WB_SYNC_NONE;
work->nr_pages = nr_pages;
work->range_cyclic = range_cyclic;
@@ -156,6 +181,26 @@ void bdi_start_background_writeback(stru
__bdi_start_writeback(bdi, LONG_MAX, true, true);
}
+void bdi_start_inode_writeback(struct inode *inode, pgoff_t offset)
+{
+ struct wb_writeback_work *work;
+
+ if (!igrab(inode))
+ return;
+
+ work = mempool_alloc(wb_work_mempool, GFP_NOWAIT);
+ if (!work)
+ return;
+
+ memset(work, 0, sizeof(*work));
+ work->sync_mode = WB_SYNC_NONE;
+ work->inode = inode;
+ work->offset = offset;
+ work->for_reclaim = 1;
+
+ bdi_queue_work(inode->i_sb->s_bdi, work);
+}
+
/*
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the
* furthest end of its superblock's dirty-inode list.
@@ -618,6 +663,22 @@ static long wb_writeback(struct bdi_writ
long wrote = 0;
struct inode *inode;
+ if (work->for_reclaim) {
+ struct page *page = find_get_page(work->inode->i_mapping,
+ work->offset);
+ wrote = __filemap_fdatawrite_range( /* XXX: write around */
+ work->inode->i_mapping,
+ work->offset,
+ work->offset + MAX_WRITEBACK_PAGES,
+ WB_SYNC_NONE);
+ if (page && PageWriteback(page))
+ SetPageReclaim(page);
+ if (page)
+ page_cache_release(page);
+ iput(work->inode);
+ return wrote;
+ }
+
if (!wbc.range_cyclic) {
wbc.range_start = 0;
wbc.range_end = LLONG_MAX;
@@ -771,7 +832,7 @@ long wb_do_writeback(struct bdi_writebac
wrote += wb_writeback(wb, work);
- if (work->for_sync)
+ if (work->for_sync || work->for_reclaim)
atomic_dec(&wb->sync_works);
/*
@@ -781,7 +842,7 @@ long wb_do_writeback(struct bdi_writebac
if (work->done)
complete(work->done);
else
- kfree(work);
+ mempool_free(work, wb_work_mempool);
}
/*
--- linux-next.orig/include/linux/backing-dev.h 2010-07-29 17:13:31.000000000 +0800
+++ linux-next/include/linux/backing-dev.h 2010-07-29 17:47:58.000000000 +0800
@@ -108,6 +108,7 @@ void bdi_unregister(struct backing_dev_i
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
void bdi_start_background_writeback(struct backing_dev_info *bdi);
+void bdi_start_inode_writeback(struct inode *inode, pgoff_t offset);
int bdi_writeback_thread(void *data);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_arm_supers_timer(void);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-07-29 11:51 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-29 11:51 [PATCH 0/5] [RFC] transfer ASYNC vmscan writeback IO to the flusher threads Wu Fengguang
2010-07-29 11:51 ` [PATCH 1/5] writeback: introduce wbc.for_sync to cover the two sync stages Wu Fengguang
2010-07-29 15:04 ` Jan Kara
2010-07-30 5:10 ` Wu Fengguang
2010-07-29 11:51 ` [PATCH 2/5] writeback: stop periodic/background work on seeing sync works Wu Fengguang
2010-07-29 16:20 ` Jan Kara
2010-07-30 4:03 ` Wu Fengguang
2010-08-02 20:51 ` Jan Kara
2010-08-03 3:01 ` Wu Fengguang
2010-08-03 10:55 ` Jan Kara
2010-08-03 12:39 ` Jan Kara
2010-08-03 12:59 ` Wu Fengguang
2010-08-03 13:18 ` Jan Kara
2010-08-03 13:22 ` Wu Fengguang
2010-08-03 13:44 ` Wu Fengguang
2010-08-03 13:48 ` Wu Fengguang
2010-08-03 14:36 ` Wu Fengguang
2010-07-29 11:51 ` [PATCH 3/5] writeback: prevent sync livelock with the sync_after timestamp Wu Fengguang
2010-07-29 15:02 ` Jan Kara
2010-07-30 5:17 ` Wu Fengguang
2010-07-29 11:51 ` Wu Fengguang [this message]
2010-07-29 11:51 ` [PATCH 5/5] vmscan: transfer async file writeback to the flusher Wu Fengguang
2010-07-29 16:09 ` [PATCH 0/5] [RFC] transfer ASYNC vmscan writeback IO to the flusher threads Jan Kara
2010-07-30 5:34 ` Wu Fengguang
2010-07-29 23:23 ` Dave Chinner
2010-07-30 7:58 ` Wu Fengguang
2010-07-30 9:22 ` KOSAKI Motohiro
2010-07-30 12:25 ` Wu Fengguang
2010-07-30 11:12 ` Dave Chinner
2010-07-30 13:18 ` Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100729121423.613727382@intel.com \
--to=fengguang.wu@intel.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).