From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>, Dave Chinner <david@fromorbit.com>,
Wu Fengguang <fengguang.wu@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: <linux-fsdevel@vger.kernel.org>
Subject: [PATCH 1/3] writeback: introduce wbc.for_sync to cover the two sync stages
Date: Sun, 01 May 2011 06:36:06 +0800 [thread overview]
Message-ID: <20110430224552.227301247@intel.com> (raw)
In-Reply-To: 20110430223605.034517922@intel.com
[-- Attachment #1: writeback-for-sync.patch --]
[-- Type: text/plain, Size: 6854 bytes --]
sync(2) is performed in two stages: the WB_SYNC_NONE sync and the
WB_SYNC_ALL sync. Tag both stages with wbc.for_sync for livelock
prevention.
Note that writeback_inodes_sb() is called by not only sync(), they
are treated the same because the other callers need also need livelock
prevention.
Impacts:
- it changes the order in which pages/inodes are synced to disk. Now in
the WB_SYNC_NONE stage, it won't proceed to write the next inode until
finished with the current inode.
- this adds a new field to the writeback trace events and may possibly
break some scripts.
CC: Jan Kara <jack@suse.cz>
CC: Dave Chinner <david@fromorbit.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/ext4/inode.c | 4 ++--
fs/fs-writeback.c | 10 ++++++----
include/linux/writeback.h | 1 +
include/trace/events/writeback.h | 10 ++++++++--
mm/page-writeback.c | 4 ++--
5 files changed, 19 insertions(+), 10 deletions(-)
--- linux-next.orig/fs/fs-writeback.c 2011-05-01 06:35:15.000000000 +0800
+++ linux-next/fs/fs-writeback.c 2011-05-01 06:35:17.000000000 +0800
@@ -36,6 +36,7 @@ struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
+ unsigned int for_sync:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
@@ -645,13 +646,14 @@ static long wb_writeback(struct bdi_writ
struct writeback_control wbc = {
.sync_mode = work->sync_mode,
.older_than_this = NULL,
+ .for_sync = work->for_sync,
.for_kupdate = work->for_kupdate,
.for_background = work->for_background,
.range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
- long write_chunk;
+ long write_chunk = MAX_WRITEBACK_PAGES;
struct inode *inode;
if (!wbc.range_cyclic) {
@@ -672,9 +674,7 @@ static long wb_writeback(struct bdi_writ
* (quickly) tag currently dirty pages
* (maybe slowly) sync all tagged pages
*/
- if (wbc.sync_mode == WB_SYNC_NONE)
- write_chunk = MAX_WRITEBACK_PAGES;
- else
+ if (wbc.for_sync)
write_chunk = LONG_MAX;
wbc.wb_start = jiffies; /* livelock avoidance */
@@ -1209,6 +1209,7 @@ void writeback_inodes_sb_nr(struct super
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
+ .for_sync = 1,
.done = &done,
.nr_pages = nr,
};
@@ -1286,6 +1287,7 @@ void sync_inodes_sb(struct super_block *
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
+ .for_sync = 1,
.nr_pages = LONG_MAX,
.range_cyclic = 0,
.done = &done,
--- linux-next.orig/include/linux/writeback.h 2011-05-01 06:35:16.000000000 +0800
+++ linux-next/include/linux/writeback.h 2011-05-01 06:35:17.000000000 +0800
@@ -46,6 +46,7 @@ struct writeback_control {
unsigned encountered_congestion:1; /* An output: a queue is full */
unsigned for_kupdate:1; /* A kupdate writeback */
unsigned for_background:1; /* A background writeback */
+ unsigned for_sync:1; /* do livelock prevention for sync */
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */
--- linux-next.orig/mm/page-writeback.c 2011-05-01 06:35:16.000000000 +0800
+++ linux-next/mm/page-writeback.c 2011-05-01 06:35:17.000000000 +0800
@@ -892,12 +892,12 @@ int write_cache_pages(struct address_spa
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
}
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->for_sync)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->for_sync)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
--- linux-next.orig/include/trace/events/writeback.h 2011-05-01 06:35:16.000000000 +0800
+++ linux-next/include/trace/events/writeback.h 2011-05-01 06:35:17.000000000 +0800
@@ -17,6 +17,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
__array(char, name, 32)
__field(long, nr_pages)
__field(dev_t, sb_dev)
+ __field(int, for_sync)
__field(int, sync_mode)
__field(int, for_kupdate)
__field(int, range_cyclic)
@@ -26,16 +27,18 @@ DECLARE_EVENT_CLASS(writeback_work_class
strncpy(__entry->name, dev_name(bdi->dev), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
+ __entry->for_sync = work->for_sync;
__entry->sync_mode = work->sync_mode;
__entry->for_kupdate = work->for_kupdate;
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
),
- TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+ TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync=%d sync_mode=%d "
"kupdate=%d range_cyclic=%d background=%d",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
+ __entry->for_sync,
__entry->sync_mode,
__entry->for_kupdate,
__entry->range_cyclic,
@@ -96,6 +99,7 @@ DECLARE_EVENT_CLASS(wbc_class,
__array(char, name, 32)
__field(long, nr_to_write)
__field(long, pages_skipped)
+ __field(int, for_sync)
__field(int, sync_mode)
__field(int, for_kupdate)
__field(int, for_background)
@@ -111,6 +115,7 @@ DECLARE_EVENT_CLASS(wbc_class,
strncpy(__entry->name, dev_name(bdi->dev), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
+ __entry->for_sync = wbc->for_sync;
__entry->sync_mode = wbc->sync_mode;
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_background = wbc->for_background;
@@ -123,12 +128,13 @@ DECLARE_EVENT_CLASS(wbc_class,
__entry->range_end = (long)wbc->range_end;
),
- TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+ TP_printk("bdi %s: towrt=%ld skip=%ld sync=%d mode=%d kupd=%d "
"bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
"start=0x%lx end=0x%lx",
__entry->name,
__entry->nr_to_write,
__entry->pages_skipped,
+ __entry->for_sync,
__entry->sync_mode,
__entry->for_kupdate,
__entry->for_background,
--- linux-next.orig/fs/ext4/inode.c 2011-05-01 06:35:15.000000000 +0800
+++ linux-next/fs/ext4/inode.c 2011-05-01 06:35:17.000000000 +0800
@@ -2741,7 +2741,7 @@ static int write_cache_pages_da(struct a
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->for_sync)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
@@ -2975,7 +2975,7 @@ static int ext4_da_writepages(struct add
}
retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->for_sync)
tag_pages_for_writeback(mapping, index, end);
while (!ret && wbc->nr_to_write > 0) {
next prev parent reply other threads:[~2011-04-30 22:48 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-04-30 22:36 [PATCH 0/3] sync livelock fixes Wu Fengguang
2011-04-30 22:36 ` Wu Fengguang [this message]
2011-05-01 7:46 ` [PATCH 1/3] writeback: introduce wbc.for_sync to cover the two sync stages Dave Chinner
2011-05-02 3:23 ` Wu Fengguang
2011-04-30 22:36 ` [PATCH 2/3] writeback: update dirtied_when for synced inode to prevent livelock Wu Fengguang
2011-04-30 22:36 ` [PATCH 3/3] writeback: avoid extra sync work at enqueue time Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110430224552.227301247@intel.com \
--to=fengguang.wu@intel.com \
--cc=akpm@linux-foundation.org \
--cc=david@fromorbit.com \
--cc=jack@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.