From: Wu Fengguang <fengguang.wu@intel.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: "linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
Jan Kara <jack@suse.cz>, Dave Chinner <david@fromorbit.com>
Subject: Re: xfstests 073 regression
Date: Sun, 31 Jul 2011 19:05:06 +0800 [thread overview]
Message-ID: <20110731110506.GA14810@localhost> (raw)
In-Reply-To: <20110731090916.GA9497@localhost>
> --- linux.orig/fs/fs-writeback.c 2011-07-29 22:14:18.000000000 +0800
> +++ linux/fs/fs-writeback.c 2011-07-31 17:04:25.000000000 +0800
> @@ -618,7 +618,12 @@ static long __writeback_inodes_wb(struct
> struct super_block *sb = inode->i_sb;
>
> if (!grab_super_passive(sb)) {
> - requeue_io(inode, wb);
> + /*
> + * grab_super_passive() may fail consistently due to
> + * s_umount being grabbed by someone else. So redirty
> + * the inode to avoid busy loop.
> + */
> + redirty_tail(inode, wb);
> continue;
> }
> wrote += writeback_sb_inodes(sb, wb, work);
Or we could fix it by moving the inode into b_more_io_wait. This
avoids introducing possible delays to the inode, as well as makes
it possible to eliminate extra sync works by setting the sync works'
work->older_than_this to the sync() _syscall_ time instead of the
current sync work _execution_ time.
Thanks,
Fengguang
---
Subject: writeback: introduce queue b_more_io_wait
Date: Sun Jul 31 18:44:44 CST 2011
Introduce the b_more_io_wait queue to park inodes that for some reason
cannot be synced immediately. They will be enqueued at the next b_io
refill time and won't be busy retried as b_more_io.
The new data flow after this patchset:
b_dirty --> b_io --> b_more_io/b_more_io_wait --+
^ |
| |
+----------------------------------+
The rational is to address two issues:
- the 30s max delay of redirty_tail() may be too long
- redirty_tail() may update i_dirtied_when. With b_more_io_wait, we'll
be able to avoid extra sync() works by excluding any inodes from being
synced if its dirty time is after the sync() _syscall_ time.
Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/fs-writeback.c | 10 ++++++++++
include/linux/backing-dev.h | 8 +++++---
mm/backing-dev.c | 10 ++++++++--
3 files changed, 23 insertions(+), 5 deletions(-)
--- linux.orig/fs/fs-writeback.c 2011-07-31 18:39:19.000000000 +0800
+++ linux/fs/fs-writeback.c 2011-07-31 19:03:28.000000000 +0800
@@ -220,6 +220,15 @@ static void requeue_io(struct inode *ino
list_move(&inode->i_wb_list, &wb->b_more_io);
}
+/*
+ * The inode should be retried in an opportunistic way.
+ */
+static void requeue_io_wait(struct inode *inode, struct bdi_writeback *wb)
+{
+ assert_spin_locked(&wb->list_lock);
+ list_move(&inode->i_wb_list, &wb->b_more_io_wait);
+}
+
static void inode_sync_complete(struct inode *inode)
{
/*
@@ -307,6 +316,7 @@ static void queue_io(struct bdi_writebac
int moved;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
+ list_splice_init(&wb->b_more_io_wait, &wb->b_io);
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
trace_writeback_queue_io(wb, older_than_this, moved);
}
--- linux.orig/include/linux/backing-dev.h 2011-07-31 18:39:20.000000000 +0800
+++ linux/include/linux/backing-dev.h 2011-07-31 18:42:18.000000000 +0800
@@ -58,6 +58,7 @@ struct bdi_writeback {
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
+ struct list_head b_more_io_wait;/* opportunistic retry io */
spinlock_t list_lock; /* protects the b_* lists */
};
@@ -121,9 +122,10 @@ extern struct list_head bdi_pending_list
static inline int wb_has_dirty_io(struct bdi_writeback *wb)
{
- return !list_empty(&wb->b_dirty) ||
- !list_empty(&wb->b_io) ||
- !list_empty(&wb->b_more_io);
+ return !list_empty(&wb->b_dirty) ||
+ !list_empty(&wb->b_io) ||
+ !list_empty(&wb->b_more_io) ||
+ !list_empty(&wb->b_more_io_wait);
}
static inline void __add_bdi_stat(struct backing_dev_info *bdi,
--- linux.orig/mm/backing-dev.c 2011-07-31 18:39:19.000000000 +0800
+++ linux/mm/backing-dev.c 2011-07-31 18:44:26.000000000 +0800
@@ -74,10 +74,10 @@ static int bdi_debug_stats_show(struct s
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io;
+ unsigned long nr_dirty, nr_io, nr_more_io, nr_more_io_wait;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = 0;
+ nr_dirty = nr_io = nr_more_io = nr_more_io_wait = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
nr_dirty++;
@@ -85,6 +85,8 @@ static int bdi_debug_stats_show(struct s
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
nr_more_io++;
+ list_for_each_entry(inode, &wb->b_more_io_wait, i_wb_list)
+ nr_more_io_wait++;
spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -102,6 +104,7 @@ static int bdi_debug_stats_show(struct s
"b_dirty: %10lu\n"
"b_io: %10lu\n"
"b_more_io: %10lu\n"
+ "b_more_io_wait: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
@@ -114,6 +117,7 @@ static int bdi_debug_stats_show(struct s
nr_dirty,
nr_io,
nr_more_io,
+ nr_more_io_wait,
!list_empty(&bdi->bdi_list), bdi->state);
#undef K
@@ -637,6 +641,7 @@ static void bdi_wb_init(struct bdi_write
INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
+ INIT_LIST_HEAD(&wb->b_more_io_wait);
spin_lock_init(&wb->list_lock);
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
}
@@ -702,6 +707,7 @@ void bdi_destroy(struct backing_dev_info
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+ list_splice(&bdi->wb.b_more_io_wait, &dst->b_more_io_wait);
spin_unlock(&bdi->wb.list_lock);
spin_unlock(&dst->list_lock);
}
next prev parent reply other threads:[~2011-07-31 11:05 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-28 16:41 xfstests 073 regression Christoph Hellwig
2011-07-29 14:21 ` Wu Fengguang
2011-07-30 13:44 ` Christoph Hellwig
2011-07-31 9:09 ` Wu Fengguang
2011-07-31 11:05 ` Wu Fengguang [this message]
2011-07-31 11:28 ` Dave Chinner
2011-07-31 15:10 ` Wu Fengguang
2011-07-31 15:14 ` [GIT PULL] fix xfstests 073 regression for 3.1-rc1 Wu Fengguang
2011-07-31 23:47 ` xfstests 073 regression Dave Chinner
2011-08-01 0:25 ` Linus Torvalds
2011-08-01 1:28 ` Dave Chinner
2011-08-01 1:40 ` Linus Torvalds
2011-08-01 2:09 ` Dave Chinner
2011-08-01 2:21 ` Linus Torvalds
2011-08-01 5:52 ` Wu Fengguang
2011-08-01 16:44 ` Christoph Hellwig
2011-08-01 11:23 ` Christoph Hellwig
2011-08-01 16:52 ` Christoph Hellwig
2011-08-02 11:44 ` Wu Fengguang
2011-08-02 12:04 ` Christoph Hellwig
2011-08-02 12:04 ` Dave Chinner
2011-08-02 12:16 ` Wu Fengguang
2011-08-02 12:26 ` Wu Fengguang
2011-08-02 12:05 ` Wu Fengguang
2011-08-01 5:24 ` Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110731110506.GA14810@localhost \
--to=fengguang.wu@intel.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).