All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	akpm@linux-foundation.org
Subject: Re: merging the per-bdi writeback patchset
Date: Tue, 23 Jun 2009 14:20:09 +0200	[thread overview]
Message-ID: <20090623122009.GC31415@kernel.dk> (raw)
In-Reply-To: <20090623115224.GB31415@kernel.dk>

On Tue, Jun 23 2009, Jens Axboe wrote:
> On Tue, Jun 23 2009, Christoph Hellwig wrote:
> > On Tue, Jun 23, 2009 at 01:12:10PM +0200, Jens Axboe wrote:
> > > > Last time we discussed this you said you're happy with 2.6.32.  I really
> > > > want to take a more detailed look and put that on the not so urgent list
> > > > because ou didn't seem to rush for .31.  So my vote goes for waiting a
> > > > bit longer.
> > > 
> > > Yeah, 2.6.32 works for me too, .31 would have been nice though so I
> > > don't have to carry it around anymore. But either is fine, if you and
> > > Andrew want more time to review this stuff, then lets just settle for
> > > .32.
> > 
> > Yes, I'd really prefer more time.  I also expect to come up with some
> > more changes in that area.  Your patch makes the differences between
> > kupdate and pdflush-stye writeback look even more ugly then it already
> > is, so I want to see i there's some nicer way to handle it.  I also want
> 
> Good point, should be easy enough to fold the two together.

Something like the below, untested except for checking that it compiles.
So this doesn't dual-path the two reasons we wake up to flush data
anymore, rather it just flushes kupdated style if we didn't have any
work to do (or did any work).

 fs-writeback.c |  111 +++++++++++++++----------------------------------
 1 file changed, 36 insertions(+), 75 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d589db3..c5996e5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -269,8 +269,18 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  */
 #define MAX_WRITEBACK_PAGES     1024
 
+static inline bool over_bground_thresh(void)
+{
+	unsigned long background_thresh, dirty_thresh;
+
+	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
+
+	return (global_page_state(NR_FILE_DIRTY) +
+		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+}
+
 /*
- * Periodic writeback of "old" data.
+ * Retrieve work items queued, or perform periodic writeback of "old" data.
  *
  * Define "old": the first time one of an inode's pages is dirtied, we mark the
  * dirtying-time in the inode's address_space.  So this periodic writeback code
@@ -284,61 +294,26 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
  * older_than_this takes precedence over nr_to_write.  So we'll only write back
  * all dirty pages if they are all attached to "old" mappings.
  */
-static long wb_kupdated(struct bdi_writeback *wb)
-{
-	unsigned long oldest_jif;
-	long nr_to_write, wrote = 0;
-	struct writeback_control wbc = {
-		.bdi			= wb->bdi,
-		.sync_mode		= WB_SYNC_NONE,
-		.older_than_this	= &oldest_jif,
-		.nr_to_write		= 0,
-		.for_kupdate		= 1,
-		.range_cyclic		= 1,
-	};
-
-	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
-
-	nr_to_write = global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
-	while (nr_to_write > 0) {
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		generic_sync_wb_inodes(wb, NULL, &wbc);
-		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-		if (wbc.nr_to_write > 0)
-			break;	/* All the old data is written */
-		nr_to_write -= MAX_WRITEBACK_PAGES;
-	}
-
-	return wrote;
-}
-
-static inline bool over_bground_thresh(void)
-{
-	unsigned long background_thresh, dirty_thresh;
-
-	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
-
-	return (global_page_state(NR_FILE_DIRTY) +
-		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
-}
-
-static long __wb_writeback(struct bdi_writeback *wb, long nr_pages,
-			   struct super_block *sb,
-			   enum writeback_sync_modes sync_mode)
+static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
+			 struct super_block *sb,
+			 enum writeback_sync_modes sync_mode, int for_kupdate)
 {
 	struct writeback_control wbc = {
 		.bdi			= wb->bdi,
 		.sync_mode		= sync_mode,
 		.older_than_this	= NULL,
+		.for_kupdate		= for_kupdate,
 		.range_cyclic		= 1,
 	};
+	unsigned long oldest_jif;
 	long wrote = 0;
 
+	if (wbc.for_kupdate) {
+		wbc.older_than_this = &oldest_jif;
+		oldest_jif = jiffies -
+				msecs_to_jiffies(dirty_expire_interval * 10);
+	}
+
 	for (;;) {
 		if (sync_mode == WB_SYNC_NONE && nr_pages <= 0 &&
 		    !over_bground_thresh())
@@ -355,7 +330,7 @@ static long __wb_writeback(struct bdi_writeback *wb, long nr_pages,
 		 * If we ran out of stuff to write, bail unless more_io got set
 		 */
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
-			if (wbc.more_io)
+			if (wbc.more_io && !wbc.for_kupdate)
 				continue;
 			break;
 		}
@@ -390,17 +365,18 @@ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
 /*
  * Retrieve work items and do the writeback they describe
  */
-static long wb_writeback(struct bdi_writeback *wb, int force_wait)
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 {
 	struct backing_dev_info *bdi = wb->bdi;
 	struct bdi_work *work;
-	long wrote = 0;
+	long nr_pages, wrote = 0;
 
 	while ((work = get_next_work_item(bdi, wb)) != NULL) {
 		struct super_block *sb = bdi_work_sb(work);
-		long nr_pages = work->nr_pages;
 		enum writeback_sync_modes sync_mode;
 
+		nr_pages = work->nr_pages;
+
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 */
@@ -416,7 +392,7 @@ static long wb_writeback(struct bdi_writeback *wb, int force_wait)
 		if (sync_mode == WB_SYNC_NONE)
 			wb_clear_pending(wb, work);
 
-		wrote += __wb_writeback(wb, nr_pages, sb, sync_mode);
+		wrote += wb_writeback(wb, nr_pages, sb, sync_mode, 0);
 
 		/*
 		 * This is a data integrity writeback, so only do the
@@ -426,31 +402,16 @@ static long wb_writeback(struct bdi_writeback *wb, int force_wait)
 			wb_clear_pending(wb, work);
 	}
 
-	return wrote;
-}
-
-/*
- * This will be inlined in bdi_writeback_task() once we get rid of any
- * dirty inodes on the default_backing_dev_info
- */
-long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
-{
-	long wrote;
-
 	/*
-	 * We get here in two cases:
-	 *
-	 *  schedule_timeout() returned because the dirty writeback
-	 *  interval has elapsed. If that happens, the work item list
-	 *  will be empty and we will proceed to do kupdated style writeout.
-	 *
-	 *  Someone called bdi_start_writeback(), which put one/more work
-	 *  items on the work_list. Process those.
+	 * Check for periodic writeback, kupdated() style
 	 */
-	if (list_empty(&wb->bdi->work_list))
-		wrote = wb_kupdated(wb);
-	else
-		wrote = wb_writeback(wb, force_wait);
+	if (!wrote) {
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS) +
+				(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+		wrote = wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+	}
 
 	return wrote;
 }

-- 
Jens Axboe


      reply	other threads:[~2009-06-23 12:20 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-23  8:11 merging the per-bdi writeback patchset Jens Axboe
2009-06-23  8:48 ` Andrew Morton
2009-06-23  8:55   ` Jens Axboe
2009-06-23 10:28     ` KOSAKI Motohiro
2009-06-23 10:56       ` Jens Axboe
2009-06-23 11:19         ` KOSAKI Motohiro
2009-06-23 11:28           ` Jens Axboe
2009-06-23 11:44             ` KOSAKI Motohiro
2009-06-23 15:01     ` Andrew Morton
2009-06-24 10:04       ` Jens Axboe
2009-06-23 11:09 ` Christoph Hellwig
2009-06-23 11:12   ` Jens Axboe
2009-06-23 11:17     ` Christoph Hellwig
2009-06-23 11:52       ` Jens Axboe
2009-06-23 12:20         ` Jens Axboe [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090623122009.GC31415@kernel.dk \
    --to=jens.axboe@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.