From: Jan Kara <jack@suse.cz>
To: Wu Fengguang <fengguang.wu@intel.com>
Cc: Christoph Hellwig <hch@infradead.org>,
linux-fsdevel@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: [PATCH 6/7] writeback: Refactor writeback_single_inode()
Date: Tue, 20 Mar 2012 23:56:30 +0100 [thread overview]
Message-ID: <1332284191-21076-7-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1332284191-21076-1-git-send-email-jack@suse.cz>
The code in writeback_single_inode() is relatively complex. The list requeing
logic makes sense only for flusher thread but not really for sync_inode() or
write_inode_now() callers. Also when we want to get rid of inode references
held by flusher thread, we will need a special I_SYNC handling there.
So separate part of writeback_single_inode() which does the real writeback work
into __writeback_single_inode() and make writeback_single_inode() do only stuff
necessary for callers writing only one inode, moving the special list handling
into writeback_sb_inodes(). As a sideeffect this fixes a possible race where we
could skip some inode during sync(2) because other writer refiled it from b_io
to b_dirty list. Also I_SYNC handling is moved into the callers of
__writeback_single_inode() to make locking easier.
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/fs-writeback.c | 137 +++++++++++++++++++++++++++++++----------------------
1 files changed, 81 insertions(+), 56 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 27f7191..6f5f930 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -355,6 +355,15 @@ static void inode_wb_requeue(struct inode *inode, struct bdi_writeback *wb,
(wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
inode->dirtied_when = jiffies;
+ if (wbc->pages_skipped) {
+ /*
+ * writeback is not making progress due to locked
+ * buffers. Skip this inode for now.
+ */
+ redirty_tail(inode, wb);
+ return;
+ }
+
if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
/*
* We didn't write back all the pages. nfs_writepages()
@@ -387,46 +396,20 @@ static void inode_wb_requeue(struct inode *inode, struct bdi_writeback *wb,
}
/*
- * Write out an inode's dirty pages. Called under wb->list_lock and
- * inode->i_lock. Either the caller has an active reference on the inode or
- * the inode has I_WILL_FREE set.
- *
- * If `wait' is set, wait on the writeout.
- *
- * The whole writeout design is quite complex and fragile. We want to avoid
- * starvation of particular inodes when others are being redirtied, prevent
- * livelocks, etc.
+ * Write out an inode and its dirty pages. Do not update the writeback list
+ * linkage. That is left to the caller. The caller is also responsible for
+ * setting I_SYNC flag and calling inode_sync_complete() to clear it.
*/
static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
- struct writeback_control *wbc)
+__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
struct address_space *mapping = inode->i_mapping;
long nr_to_write = wbc->nr_to_write;
unsigned dirty;
int ret;
- assert_spin_locked(&inode->i_lock);
-
- if (!atomic_read(&inode->i_count))
- WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
- else
- WARN_ON(inode->i_state & I_WILL_FREE);
-
- if (inode->i_state & I_SYNC) {
- if (wbc->sync_mode != WB_SYNC_ALL)
- return 0;
- /*
- * It's a data-integrity sync. We must wait.
- */
- inode_wait_for_writeback(inode);
- }
-
- BUG_ON(inode->i_state & I_SYNC);
-
- /* Set I_SYNC, reset I_DIRTY_PAGES */
- inode->i_state |= I_SYNC;
- spin_unlock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_SYNC));
ret = do_writepages(mapping, wbc);
@@ -459,12 +442,65 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
if (ret == 0)
ret = err;
}
+ trace_writeback_single_inode(inode, wbc, nr_to_write);
+ return ret;
+}
+
+/*
+ * Write out an inode's dirty pages. Either the caller has an active reference
+ * on the inode or the inode has I_WILL_FREE set.
+ *
+ * This function is designed to be called for writing back one inode which
+ * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
+ * and does more profound writeback list handling in writeback_sb_inodes().
+ */
+static int
+writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
+{
+ int ret = 0;
+
+ spin_lock(&inode->i_lock);
+ if (!atomic_read(&inode->i_count))
+ WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+ else
+ WARN_ON(inode->i_state & I_WILL_FREE);
+
+ if (inode->i_state & I_SYNC) {
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ goto out;
+ /*
+ * It's a data-integrity sync. We must wait.
+ */
+ inode_wait_for_writeback(inode);
+ }
+ BUG_ON(inode->i_state & I_SYNC);
+ /*
+ * Skip inode if it is clean. We don't want to mess with writeback
+ * lists in this function since flusher thread may be doing for example
+ * sync in parallel and if we move the inode, it could get skipped. So
+ * here we make sure inode is on some writeback list and leave it there
+ * unless we have completely cleaned the inode.
+ */
+ if (!(inode->i_state & I_DIRTY))
+ goto out;
+ inode->i_state |= I_SYNC;
+ spin_unlock(&inode->i_lock);
+
+ ret = __writeback_single_inode(inode, wb, wbc);
spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
- inode_wb_requeue(inode, wb, wbc);
+ /*
+ * If inode is clean, remove it from writeback lists. Otherwise don't
+ * touch it. See comment above for explanation.
+ */
+ if (!(inode->i_state & I_DIRTY))
+ list_del_init(&inode->i_wb_list);
+ spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
- trace_writeback_single_inode(inode, wbc, nr_to_write);
+out:
+ spin_unlock(&inode->i_lock);
return ret;
}
@@ -576,23 +612,24 @@ static long writeback_sb_inodes(struct super_block *sb,
spin_unlock(&wb->list_lock);
__iget(inode);
+ if (inode->i_state & I_SYNC)
+ inode_wait_for_writeback(inode);
+ inode->i_state |= I_SYNC;
+ spin_unlock(&inode->i_lock);
write_chunk = writeback_chunk_size(wb->bdi, work);
wbc.nr_to_write = write_chunk;
wbc.pages_skipped = 0;
- writeback_single_inode(inode, wb, &wbc);
+ __writeback_single_inode(inode, wb, &wbc);
work->nr_pages -= write_chunk - wbc.nr_to_write;
wrote += write_chunk - wbc.nr_to_write;
+ spin_lock(&wb->list_lock);
+ spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY))
wrote++;
- if (wbc.pages_skipped) {
- /*
- * writeback is not making progress due to locked
- * buffers. Skip this inode for now.
- */
- redirty_tail(inode, wb);
- }
+ inode_wb_requeue(inode, wb, &wbc);
+ inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
iput(inode);
@@ -1328,7 +1365,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
int write_inode_now(struct inode *inode, int sync)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- int ret;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -1340,11 +1376,7 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- spin_lock(&inode->i_lock);
- ret = writeback_single_inode(inode, wb, &wbc);
- spin_unlock(&inode->i_lock);
- spin_unlock(&wb->list_lock);
- return ret;
+ return writeback_single_inode(inode, wb, &wbc);
}
EXPORT_SYMBOL(write_inode_now);
@@ -1361,14 +1393,7 @@ EXPORT_SYMBOL(write_inode_now);
*/
int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- int ret;
-
- spin_lock(&inode->i_lock);
- ret = writeback_single_inode(inode, wb, wbc);
- spin_unlock(&inode->i_lock);
- spin_unlock(&wb->list_lock);
- return ret;
+ return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
}
EXPORT_SYMBOL(sync_inode);
--
1.7.1
next prev parent reply other threads:[~2012-03-20 22:56 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-20 22:56 [PATCH 0/7 v2] writeback: Avoid iput() from flusher thread Jan Kara
2012-03-20 22:56 ` [PATCH 1/7] writeback: Move clearing of I_SYNC into inode_sync_complete() Jan Kara
2012-04-30 14:36 ` Christoph Hellwig
2012-04-30 21:30 ` Jan Kara
2012-03-20 22:56 ` [PATCH 2/7] writeback: Move requeueing when I_SYNC set to writeback_sb_inodes() Jan Kara
2012-04-30 14:38 ` Christoph Hellwig
2012-03-20 22:56 ` [PATCH 3/7] writeback: Move I_DIRTY_PAGES handling Jan Kara
2012-03-22 2:41 ` Fengguang Wu
2012-03-22 8:35 ` Jan Kara
2012-03-28 3:11 ` Fengguang Wu
2012-03-28 15:12 ` Christoph Hellwig
2012-04-30 14:39 ` Christoph Hellwig
2012-04-30 14:41 ` Christoph Hellwig
2012-04-30 21:21 ` Jan Kara
2012-03-20 22:56 ` [PATCH 4/7] writeback: Separate inode requeueing after writeback Jan Kara
2012-04-30 14:43 ` Christoph Hellwig
2012-04-30 21:42 ` Jan Kara
2012-03-20 22:56 ` [PATCH 5/7] writeback: Remove wb->list_lock from writeback_single_inode() Jan Kara
2012-04-30 14:44 ` Christoph Hellwig
2012-03-20 22:56 ` Jan Kara [this message]
2012-03-20 23:35 ` [PATCH 6/7] writeback: Refactor writeback_single_inode() Dave Chinner
2012-03-21 10:03 ` Jan Kara
2012-04-30 14:46 ` Christoph Hellwig
2012-03-20 22:56 ` [PATCH 7/7] writeback: Avoid iput() from flusher thread Jan Kara
2012-03-20 23:50 ` Dave Chinner
2012-03-21 10:25 ` Jan Kara
2012-03-22 3:03 ` Fengguang Wu
2012-03-22 6:27 ` Dave Chinner
2012-03-22 9:50 ` Jan Kara
2012-03-22 3:01 ` Fengguang Wu
2012-04-30 15:30 ` Christoph Hellwig
2012-04-30 21:58 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1332284191-21076-7-git-send-email-jack@suse.cz \
--to=jack@suse.cz \
--cc=fengguang.wu@intel.com \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).