All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Waychison <mikew@google.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH v1 5/8] Parallelize iput()
Date: Fri, 16 Jan 2009 18:30:02 -0800	[thread overview]
Message-ID: <20090117023002.20425.93106.stgit@crlf.corp.google.com> (raw)
In-Reply-To: <20090117022936.20425.43248.stgit@crlf.corp.google.com>

Now that we have deffered batching of iputs, we can parallelize the
finalization of inodes to minimize the grabbing of inode_lock.

iput actually grabs the inode_lock twice as it needs to transition the state of
the inode into a finalizing state (I_FREEING, I_WILLFREE) before it can start
flushing the inode out.  It then needs to grab the lock again to unhash it
before releasing the lock and freeing the inode.

This patch turns the drop_inode callbacks in the VFS to this four phase
approach.  I opted to introduce a phase enum and have the callbacks switch on
it rather than creating three new callbacks each.

Signed-off-by: Mike Waychison <mikew@google.com>
---

 fs/inode.c         |  219 +++++++++++++++++++++++++++++++++++++---------------
 include/linux/fs.h |   13 ++-
 2 files changed, 167 insertions(+), 65 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 9aa574d..e19c6ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1151,71 +1151,87 @@ EXPORT_SYMBOL(remove_inode_hash);
  * I_FREEING is set so that no-one will take a new reference to the inode while
  * it is being deleted.
  */
-void generic_delete_inode(struct inode *inode)
+int generic_delete_inode(struct inode *inode, enum drop_inode_phase phase)
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
-	security_inode_delete(inode);
-
-	if (op->delete_inode) {
-		void (*delete)(struct inode *) = op->delete_inode;
-		if (!is_bad_inode(inode))
-			DQUOT_INIT(inode);
-		/* Filesystems implementing their own
-		 * s_op->delete_inode are required to call
-		 * truncate_inode_pages and clear_inode()
-		 * internally */
-		delete(inode);
-	} else {
-		truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+	switch (phase) {
+	case DIP_START_FREEING:
+		list_del_init(&inode->i_list);
+		list_del_init(&inode->i_sb_list);
+		inode->i_state |= I_FREEING;
+		inodes_stat.nr_inodes--;
+		break;
+	case DIP_DELETING:
+		security_inode_delete(inode);
+		if (op->delete_inode) {
+			void (*delete)(struct inode *) = op->delete_inode;
+			if (!is_bad_inode(inode))
+				DQUOT_INIT(inode);
+			/* Filesystems implementing their own
+			 * s_op->delete_inode are required to call
+			 * truncate_inode_pages and clear_inode()
+			 * internally */
+			delete(inode);
+		} else {
+			truncate_inode_pages(&inode->i_data, 0);
+			clear_inode(inode);
+		}
+		break;
+	case DIP_UNHASHING:
+		hlist_del_init(&inode->i_hash);
+		break;
+	case DIP_DESTROYING:
+		wake_up_inode(inode);
+		BUG_ON(inode->i_state != I_CLEAR);
+		destroy_inode(inode);
+		break;
 	}
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
-	wake_up_inode(inode);
-	BUG_ON(inode->i_state != I_CLEAR);
-	destroy_inode(inode);
+	return 0;
 }
 
 EXPORT_SYMBOL(generic_delete_inode);
 
-static void generic_forget_inode(struct inode *inode)
+static int generic_forget_inode(struct inode *inode,
+				enum drop_inode_phase phase)
 {
 	struct super_block *sb = inode->i_sb;
 
-	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
-		if (sb->s_flags & MS_ACTIVE) {
-			spin_unlock(&inode_lock);
-			return;
+	switch (phase) {
+	case DIP_START_FREEING:
+		if (!hlist_unhashed(&inode->i_hash)) {
+			if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+				list_move(&inode->i_list, &inode_unused);
+			inodes_stat.nr_unused++;
+			if (sb->s_flags & MS_ACTIVE)
+				return 1;
+			inode->i_state |= I_WILL_FREE;
 		}
-		inode->i_state |= I_WILL_FREE;
-		spin_unlock(&inode_lock);
-		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
-		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
-		hlist_del_init(&inode->i_hash);
+		break;
+	case DIP_DELETING:
+		if (!hlist_unhashed(&inode->i_hash))
+			write_inode_now(inode, 1);
+		break;
+	case DIP_UNHASHING:
+		if (!hlist_unhashed(&inode->i_hash)) {
+			inode->i_state &= ~I_WILL_FREE;
+			inodes_stat.nr_unused--;
+			hlist_del_init(&inode->i_hash);
+		}
+		list_del_init(&inode->i_list);
+		list_del_init(&inode->i_sb_list);
+		inode->i_state |= I_FREEING;
+		inodes_stat.nr_inodes--;
+		break;
+	case DIP_DESTROYING:
+		if (inode->i_data.nrpages)
+			truncate_inode_pages(&inode->i_data, 0);
+		clear_inode(inode);
+		wake_up_inode(inode);
+		destroy_inode(inode);
+		break;
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-	if (inode->i_data.nrpages)
-		truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
-	wake_up_inode(inode);
-	destroy_inode(inode);
+	return 0;
 }
 
 /*
@@ -1223,16 +1239,34 @@ static void generic_forget_inode(struct inode *inode)
  * inode when the usage count drops to zero, and
  * i_nlink is zero.
  */
-void generic_drop_inode(struct inode *inode)
+int generic_drop_inode(struct inode *inode, enum drop_inode_phase phase)
 {
 	if (!inode->i_nlink)
-		generic_delete_inode(inode);
+		return generic_delete_inode(inode, phase);
 	else
-		generic_forget_inode(inode);
+		return generic_forget_inode(inode, phase);
 }
 
 EXPORT_SYMBOL_GPL(generic_drop_inode);
 
+
+typedef int (*drop_op_t)(struct inode *inode, enum drop_inode_phase phase);
+static drop_op_t drop_op(struct inode *inode)
+{
+	const struct super_operations *op = inode->i_sb->s_op;
+	drop_op_t drop = generic_drop_inode;
+
+	if (op && op->drop_inode)
+		drop = op->drop_inode;
+	return drop;
+}
+
+static int drop_inode_phase(struct inode *inode, enum drop_inode_phase phase)
+{
+	drop_op_t drop = drop_op(inode);
+	return drop(inode, phase);
+}
+
 /*
  * Called when we're dropping the last reference
  * to an inode. 
@@ -1246,12 +1280,16 @@ EXPORT_SYMBOL_GPL(generic_drop_inode);
  */
 static inline void iput_final(struct inode *inode)
 {
-	const struct super_operations *op = inode->i_sb->s_op;
-	void (*drop)(struct inode *) = generic_drop_inode;
+	drop_op_t drop = drop_op(inode);
 
-	if (op && op->drop_inode)
-		drop = op->drop_inode;
-	drop(inode);
+	if (drop(inode, DIP_START_FREEING))
+		return;
+	spin_unlock(&inode_lock);
+	drop(inode, DIP_DELETING);
+	spin_lock(&inode_lock);
+	drop(inode, DIP_UNHASHING);
+	spin_unlock(&inode_lock);
+	drop(inode, DIP_DESTROYING);
 }
 
 static void real_iput(struct inode *inode)
@@ -1320,9 +1358,66 @@ static void add_pending_iput(struct postponed_inodes *ppi, struct inode *inode)
 
 static void process_postponed_inodes(struct postponed_inodes *ppi)
 {
+	struct inode *inode;
 	unsigned i;
-	for (i = 0; i < ppi->nr; i++)
-		real_iput(ppi->inodes[i]);
+
+	/*
+	 * Parallel iput is done in four phases:
+	 *
+	 * A) Removing the inode from sb lists and marking the inode as
+	 *    I_FREEING under inode_lock.
+	 * B) Deleting/truncating/clearing the inode with no lock.
+	 * C) Unhashing the inode under inode_lock.
+	 * D) Issuing a wakeup on the inode and destroying.
+	 */
+
+	/*
+	 * Phase (A) Removing inodes from the super_blocks.  We do this under
+	 * lock and may discover inodes that aren't unused anymore.
+	 *
+	 * Phase (A) may be cancelled by the fs, in which case we just forget
+	 * we were looking at the inode.
+	 */
+	spin_lock(&inode_lock);
+	for (i = 0; i < ppi->nr; i++) {
+		int ret;
+		inode = ppi->inodes[i];
+		if (!atomic_dec_and_test(&inode->i_count)) {
+			/* Someone is using this guy. */
+			ppi->inodes[i] = NULL;
+			continue;
+		}
+		ret = drop_inode_phase(inode, DIP_START_FREEING);
+		if (ret)
+			ppi->inodes[i] = NULL;
+	}
+	spin_unlock(&inode_lock);
+
+	/* Phase (B).  Deleting of the inode, which may incur writeout. */
+	for (i = 0; i < ppi->nr; i++) {
+		inode = ppi->inodes[i];
+		if (!inode)
+			continue;
+		drop_inode_phase(inode, DIP_DELETING);
+	}
+
+	/* Phase (C). Unhashing of the inode. */
+	spin_lock(&inode_lock);
+	for (i = 0; i < ppi->nr; i++) {
+		inode = ppi->inodes[i];
+		if (!inode)
+			continue;
+		drop_inode_phase(inode, DIP_UNHASHING);
+	}
+	spin_unlock(&inode_lock);
+
+	/* Phase (D).  Destroying of the in-core inode. */
+	for (i = 0; i < ppi->nr; i++) {
+		inode = ppi->inodes[i];
+		if (!inode)
+			continue;
+		drop_inode_phase(inode, DIP_DESTROYING);
+	}
 }
 
 static void release_postponed_inodes(struct postponed_inodes *ppi)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa90620..a7822cd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1374,13 +1374,20 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *);
 
+enum drop_inode_phase {
+	DIP_START_FREEING,
+	DIP_DELETING,
+	DIP_UNHASHING,
+	DIP_DESTROYING,
+};
+
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
-	void (*drop_inode) (struct inode *);
+	int (*drop_inode) (struct inode *, enum drop_inode_phase phase);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	void (*write_super) (struct super_block *);
@@ -1907,8 +1914,8 @@ extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
-extern void generic_delete_inode(struct inode *inode);
-extern void generic_drop_inode(struct inode *inode);
+extern int generic_delete_inode(struct inode *inode, enum drop_inode_phase);
+extern int generic_drop_inode(struct inode *inode, enum drop_inode_phase);
 extern void iput_drain_cpu(void);
 extern void iput_drain_all(void);
 


  parent reply	other threads:[~2009-01-17  2:33 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-17  2:29 [PATCH v1 0/8] Deferred dput() and iput() -- reducing lock contention Mike Waychison
2009-01-17  2:29 ` [PATCH v1 1/8] Deferred batching of dput() Mike Waychison
2009-01-17 10:15   ` Evgeniy Polyakov
2009-01-20 20:07     ` Mike Waychison
2009-01-17  2:29 ` [PATCH v1 2/8] Parallel dput() Mike Waychison
2009-01-17  2:29 ` [PATCH v1 3/8] Deferred batching of iput() Mike Waychison
2009-01-17 10:18   ` Evgeniy Polyakov
2009-01-20 20:07     ` Mike Waychison
2009-01-17  2:29 ` [PATCH v1 4/8] Fixing iput() called from put_super path Mike Waychison
2009-01-17  2:30 ` Mike Waychison [this message]
2009-01-17  2:30 ` [PATCH v1 6/8] hugetlbfs drop_inode update Mike Waychison
2009-01-17  2:30 ` [PATCH v1 7/8] Make drop_caches flush pending dput()s and iput()s Mike Waychison
2009-01-17  2:30 ` [PATCH v1 8/8] Make the sync path drain dentries and inodes Mike Waychison
2009-01-17  7:04 ` [PATCH v1 0/8] Deferred dput() and iput() -- reducing lock contention Eric Dumazet
2009-01-20 20:00   ` Mike Waychison
2009-01-20 20:00     ` Mike Waychison
2009-01-17  8:12 ` Dave Chinner
2009-01-20 19:01   ` Mike Waychison
2009-01-29  2:09   ` Mike Waychison
2009-01-21  5:52 ` Andi Kleen
2009-01-21  6:22   ` Mike Waychison
2009-01-21  8:48     ` Andi Kleen
2009-01-21 17:28       ` Mike Waychison

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090117023002.20425.93106.stgit@crlf.corp.google.com \
    --to=mikew@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.