linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: linux-ext4@vger.kernel.org, chris.mason@oracle.com
Subject: Re: [Bug 15906] serious performance regression in "umount" on ext4 over LVM
Date: Wed, 5 May 2010 10:40:56 +0200	[thread overview]
Message-ID: <20100505084056.GZ27497@kernel.dk> (raw)
In-Reply-To: <87ljbyhhxe.fsf@openvz.org>

On Wed, May 05 2010, Dmitry Monakhov wrote:
> bugzilla-daemon@bugzilla.kernel.org writes:
> 
> Hi Jens,
> 
> Just FYI, we have found a regression which was caused by your famous
> writeback patch 03ba3782e8dcc5b0e1efe440d33084f066e38cae
> I'm not allowed to add you to CC in BZ, that's why i wrote this mail.

You need to use axboe@kernel.dk as that is what I use there.

> Before the patch __sync_filesystem() called writeback_single_inode()
> directly, but now it is called indirectly (from flush-X:X task)
> which require a super_block in question to be pinned.
> But this is impossible to pin this SB on umount because we already
> hold s_umount sem for write, so effectively we already pinned that SB.
> So my proposal is to treat umount similar to WB_SYNC_ALL, and skip
> pining stage.

Hmm I see, yes that is a bug. How about adding a WB_SYNC_NONE_PINNED or
something to that effect, which acts like WB_SYNC_NONE but the caller is
required to hold the s_umount already (like WB_SYNC_ALL).

Something like the below. It compiles, but not tested.

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4b37f7c..4327465 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -245,19 +245,20 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
  * @bdi: the backing device to write from
  * @sb: write inodes from this super_block
  * @nr_pages: the number of pages to write
+ * @sb_locked: caller already holds sb umount sem.
  *
  * Description:
  *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
  *   started when this function returns, we make no guarentees on
- *   completion. Caller need not hold sb s_umount semaphore.
+ *   completion. Caller specifies whether sb umount sem is held already or not.
  *
  */
 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages)
+			 long nr_pages, int sb_locked)
 {
 	struct wb_writeback_args args = {
 		.sb		= sb,
-		.sync_mode	= WB_SYNC_NONE,
+		.sync_mode	= sb_locked ? WB_SYNC_NONE_PIN : WB_SYNC_NONE,
 		.nr_pages	= nr_pages,
 		.range_cyclic	= 1,
 	};
@@ -577,7 +578,8 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
 	/*
 	 * Caller must already hold the ref for this
 	 */
-	if (wbc->sync_mode == WB_SYNC_ALL) {
+	if (wbc->sync_mode == WB_SYNC_ALL ||
+	    wbc->sync_mode == WB_SYNC_NONE_PIN) {
 		WARN_ON(!rwsem_is_locked(&sb->s_umount));
 		return SB_NOT_PINNED;
 	}
@@ -1183,6 +1185,18 @@ static void wait_sb_inodes(struct super_block *sb)
 	iput(old_inode);
 }
 
+static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
+{
+	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+	long nr_to_write;
+
+	nr_to_write = nr_dirty + nr_unstable +
+			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+	bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
+}
+
 /**
  * writeback_inodes_sb	-	writeback dirty inodes from given super_block
  * @sb: the superblock
@@ -1194,18 +1208,23 @@ static void wait_sb_inodes(struct super_block *sb)
  */
 void writeback_inodes_sb(struct super_block *sb)
 {
-	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-	long nr_to_write;
-
-	nr_to_write = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
-	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+	__writeback_inodes_sb(sb, 0);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
+ * writeback_inodes_sb_locked	- writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Like writeback_inodes_sb(), except the caller already holds the
+ * sb umount sem.
+ */
+void writeback_inodes_sb_locked(struct super_block *sb)
+{
+	__writeback_inodes_sb(sb, 1);
+}
+
+/**
  * writeback_inodes_sb_if_idle	-	start writeback if none underway
  * @sb: the superblock
  *
diff --git a/fs/sync.c b/fs/sync.c
index 92b2281..de6a441 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-		writeback_inodes_sb(sb);
+		writeback_inodes_sb_locked(sb);
 
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index bd0e3c6..90e677a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -103,7 +103,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-				long nr_pages);
+				long nr_pages, int sb_locked);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 36520de..3cd39b0 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -19,6 +19,7 @@ extern struct list_head inode_unused;
 enum writeback_sync_modes {
 	WB_SYNC_NONE,	/* Don't wait on anything */
 	WB_SYNC_ALL,	/* Wait on every mapping */
+	WB_SYNC_NONE_PIN,	/* Like WB_SYNC_NONE, but s_umount held */
 };
 
 /*
@@ -73,6 +74,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+void writeback_inodes_sb_locked(struct super_block *);
 int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b19943..49d3508 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
 			       + global_page_state(NR_UNSTABLE_NFS))
 					  > background_thresh)))
-		bdi_start_writeback(bdi, NULL, 0);
+		bdi_start_writeback(bdi, NULL, 0, 0);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)

-- 
Jens Axboe


  reply	other threads:[~2010-05-05  8:40 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-04 20:36 [Bug 15906] New: serious performance regression in "umount" on ext4 over LVM bugzilla-daemon
2010-05-04 20:37 ` [Bug 15906] " bugzilla-daemon
2010-05-04 20:41 ` bugzilla-daemon
2010-05-04 20:52 ` bugzilla-daemon
2010-05-04 21:05 ` bugzilla-daemon
2010-05-04 21:11 ` bugzilla-daemon
2010-05-04 21:12 ` bugzilla-daemon
2010-05-04 21:34 ` bugzilla-daemon
2010-05-04 21:48 ` bugzilla-daemon
2010-05-05  0:28 ` bugzilla-daemon
2010-05-05  3:18 ` bugzilla-daemon
2010-05-05  4:00 ` bugzilla-daemon
2010-05-05  4:46 ` bugzilla-daemon
2010-05-05  7:28 ` bugzilla-daemon
2010-05-05  8:27   ` Dmitry Monakhov
2010-05-05  8:40     ` Jens Axboe [this message]
2010-05-05  9:06       ` Dmitry Monakhov
2010-05-05  7:30 ` bugzilla-daemon
2010-05-05 19:44 ` bugzilla-daemon
2010-05-05 20:34 ` [Bug 15906] performance regression in "umount" of filesystems using barriers bugzilla-daemon
2010-05-05 21:36   ` Greg Freemyer
2010-05-05 20:34 ` bugzilla-daemon
2010-05-05 21:36 ` bugzilla-daemon
2010-05-05 21:55   ` Greg Freemyer
2010-05-06  3:49 ` bugzilla-daemon
2010-05-06  7:09 ` bugzilla-daemon
2010-05-06  7:10 ` bugzilla-daemon
2010-05-07  9:47 ` bugzilla-daemon
2010-05-08 14:30 ` bugzilla-daemon
2010-05-17  3:28 ` bugzilla-daemon
2010-05-17  3:44 ` bugzilla-daemon
2010-05-17 10:52 ` bugzilla-daemon
2010-05-19 18:57 ` bugzilla-daemon
2010-05-19 19:12 ` bugzilla-daemon
2010-05-21  1:05 ` bugzilla-daemon
2010-05-21  2:07 ` bugzilla-daemon
2010-05-21  6:08 ` bugzilla-daemon
2010-05-22  4:38 ` bugzilla-daemon
2010-05-26 16:44 ` bugzilla-daemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100505084056.GZ27497@kernel.dk \
    --to=jens.axboe@oracle.com \
    --cc=chris.mason@oracle.com \
    --cc=dmonakhov@openvz.org \
    --cc=linux-ext4@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).