From: Wu Fengguang <fengguang.wu@intel.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: "linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
Jan Kara <jack@suse.cz>, Dave Chinner <david@fromorbit.com>
Subject: Re: xfstests 073 regression
Date: Sun, 31 Jul 2011 19:05:06 +0800 [thread overview]
Message-ID: <20110731110506.GA14810@localhost> (raw)
In-Reply-To: <20110731090916.GA9497@localhost>
> --- linux.orig/fs/fs-writeback.c 2011-07-29 22:14:18.000000000 +0800
> +++ linux/fs/fs-writeback.c 2011-07-31 17:04:25.000000000 +0800
> @@ -618,7 +618,12 @@ static long __writeback_inodes_wb(struct
> struct super_block *sb = inode->i_sb;
>
> if (!grab_super_passive(sb)) {
> - requeue_io(inode, wb);
> + /*
> + * grab_super_passive() may fail consistently due to
> + * s_umount being grabbed by someone else. So redirty
> + * the inode to avoid busy loop.
> + */
> + redirty_tail(inode, wb);
> continue;
> }
> wrote += writeback_sb_inodes(sb, wb, work);
Or we could fix it by moving the inode into b_more_io_wait. This
avoids introducing possible delays to the inode, as well as makes
it possible to eliminate extra sync works by setting the sync works'
work->older_than_this to the sync() _syscall_ time instead of the
current sync work _execution_ time.
Thanks,
Fengguang
---
Subject: writeback: introduce queue b_more_io_wait
Date: Sun Jul 31 18:44:44 CST 2011
Introduce the b_more_io_wait queue to park inodes that for some reason
cannot be synced immediately. They will be enqueued at the next b_io
refill time and won't be busy retried as b_more_io.
The new data flow after this patchset:
b_dirty --> b_io --> b_more_io/b_more_io_wait --+
^ |
| |
+----------------------------------+
The rational is to address two issues:
- the 30s max delay of redirty_tail() may be too long
- redirty_tail() may update i_dirtied_when. With b_more_io_wait, we'll
be able to avoid extra sync() works by excluding any inodes from being
synced if its dirty time is after the sync() _syscall_ time.
Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/fs-writeback.c | 10 ++++++++++
include/linux/backing-dev.h | 8 +++++---
mm/backing-dev.c | 10 ++++++++--
3 files changed, 23 insertions(+), 5 deletions(-)
--- linux.orig/fs/fs-writeback.c 2011-07-31 18:39:19.000000000 +0800
+++ linux/fs/fs-writeback.c 2011-07-31 19:03:28.000000000 +0800
@@ -220,6 +220,15 @@ static void requeue_io(struct inode *ino
list_move(&inode->i_wb_list, &wb->b_more_io);
}
+/*
+ * The inode should be retried in an opportunistic way.
+ */
+static void requeue_io_wait(struct inode *inode, struct bdi_writeback *wb)
+{
+ assert_spin_locked(&wb->list_lock);
+ list_move(&inode->i_wb_list, &wb->b_more_io_wait);
+}
+
static void inode_sync_complete(struct inode *inode)
{
/*
@@ -307,6 +316,7 @@ static void queue_io(struct bdi_writebac
int moved;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
+ list_splice_init(&wb->b_more_io_wait, &wb->b_io);
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
trace_writeback_queue_io(wb, older_than_this, moved);
}
--- linux.orig/include/linux/backing-dev.h 2011-07-31 18:39:20.000000000 +0800
+++ linux/include/linux/backing-dev.h 2011-07-31 18:42:18.000000000 +0800
@@ -58,6 +58,7 @@ struct bdi_writeback {
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
+ struct list_head b_more_io_wait;/* opportunistic retry io */
spinlock_t list_lock; /* protects the b_* lists */
};
@@ -121,9 +122,10 @@ extern struct list_head bdi_pending_list
static inline int wb_has_dirty_io(struct bdi_writeback *wb)
{
- return !list_empty(&wb->b_dirty) ||
- !list_empty(&wb->b_io) ||
- !list_empty(&wb->b_more_io);
+ return !list_empty(&wb->b_dirty) ||
+ !list_empty(&wb->b_io) ||
+ !list_empty(&wb->b_more_io) ||
+ !list_empty(&wb->b_more_io_wait);
}
static inline void __add_bdi_stat(struct backing_dev_info *bdi,
--- linux.orig/mm/backing-dev.c 2011-07-31 18:39:19.000000000 +0800
+++ linux/mm/backing-dev.c 2011-07-31 18:44:26.000000000 +0800
@@ -74,10 +74,10 @@ static int bdi_debug_stats_show(struct s
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io;
+ unsigned long nr_dirty, nr_io, nr_more_io, nr_more_io_wait;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = 0;
+ nr_dirty = nr_io = nr_more_io = nr_more_io_wait = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
nr_dirty++;
@@ -85,6 +85,8 @@ static int bdi_debug_stats_show(struct s
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
nr_more_io++;
+ list_for_each_entry(inode, &wb->b_more_io_wait, i_wb_list)
+ nr_more_io_wait++;
spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -102,6 +104,7 @@ static int bdi_debug_stats_show(struct s
"b_dirty: %10lu\n"
"b_io: %10lu\n"
"b_more_io: %10lu\n"
+ "b_more_io_wait: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
@@ -114,6 +117,7 @@ static int bdi_debug_stats_show(struct s
nr_dirty,
nr_io,
nr_more_io,
+ nr_more_io_wait,
!list_empty(&bdi->bdi_list), bdi->state);
#undef K
@@ -637,6 +641,7 @@ static void bdi_wb_init(struct bdi_write
INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
+ INIT_LIST_HEAD(&wb->b_more_io_wait);
spin_lock_init(&wb->list_lock);
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
}
@@ -702,6 +707,7 @@ void bdi_destroy(struct backing_dev_info
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
+ list_splice(&bdi->wb.b_more_io_wait, &dst->b_more_io_wait);
spin_unlock(&bdi->wb.list_lock);
spin_unlock(&dst->list_lock);
}
next prev parent reply other threads:[~2011-07-31 11:05 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-28 16:41 xfstests 073 regression Christoph Hellwig
2011-07-29 14:21 ` Wu Fengguang
2011-07-30 13:44 ` Christoph Hellwig
2011-07-31 9:09 ` Wu Fengguang
2011-07-31 11:05 ` Wu Fengguang [this message]
2011-07-31 11:28 ` Dave Chinner
2011-07-31 15:10 ` Wu Fengguang
2011-07-31 15:14 ` [GIT PULL] fix xfstests 073 regression for 3.1-rc1 Wu Fengguang
2011-07-31 23:47 ` xfstests 073 regression Dave Chinner
2011-08-01 0:25 ` Linus Torvalds
2011-08-01 1:28 ` Dave Chinner
2011-08-01 1:40 ` Linus Torvalds
2011-08-01 2:09 ` Dave Chinner
2011-08-01 2:21 ` Linus Torvalds
2011-08-01 5:52 ` Wu Fengguang
2011-08-01 16:44 ` Christoph Hellwig
2011-08-01 11:23 ` Christoph Hellwig
2011-08-01 16:52 ` Christoph Hellwig
2011-08-02 11:44 ` Wu Fengguang
2011-08-02 12:04 ` Christoph Hellwig
2011-08-02 12:04 ` Dave Chinner
2011-08-02 12:16 ` Wu Fengguang
2011-08-02 12:26 ` Wu Fengguang
2011-08-02 12:05 ` Wu Fengguang
2011-08-01 5:24 ` Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110731110506.GA14810@localhost \
--to=fengguang.wu@intel.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.