All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: Guo Chao <yan@linux.vnet.ibm.com>
Cc: viro@zeniv.linux.org.uk, dchinner@redhat.com, hch@infradead.org,
	jack@suse.cz, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC v4 Patch 0/4] fs/inode.c: optimization for inode lock usage
Date: Mon, 24 Sep 2012 18:26:54 +1000	[thread overview]
Message-ID: <20120924082654.GG20960@dastard> (raw)
In-Reply-To: <20120924070852.GA10280@yanx>

On Mon, Sep 24, 2012 at 03:08:52PM +0800, Guo Chao wrote:
> On Mon, Sep 24, 2012 at 04:28:12PM +1000, Dave Chinner wrote:
> > > Ah, this is intended to be a code clean patchset actually. I thought these
> > > locks are redundant in an obvious and trivial manner. If, on the contrary, 
> > > they are such tricky, then never mind :) Thanks for your patient.
> > 
> > The RCU conversion is actually trivial - everything is already set
> > up for it to be done, and is simpler than this patch set. It pretty
> > much is simply replacing all the read side inode_hash_lock pairs
> > with rcu_read_lock()/rcu_read_unlock() pairs. Like I said, if you
> > want to clean up this code, then RCU traversals are the conversion
> > to make.
> >
> 
> Thanks for your suggestion. Though I doubt it's such trivial, I will try this
> after a little investigation.

Probably best to start with the patch below - it's run under heavy
concurrent load on ext4 with a working set of inodes about 20x
larger than can fit in memory for the past hour....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

fs: Use RCU lookups for inode cache

From: Dave Chinner <dchinner@redhat.com>

Convert inode cache lookups to be protected by RCU locking rather
than the global inode_hash_lock. This will improve scalability of
inode lookup intensive workloads.

Smoke tested w/ ext4 on concurrent fsmark/lookup/unlink workloads
over 50 million or so inodes...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/inode.c |   74 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index ac8d904..2e92674 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -464,7 +464,7 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode->i_lock);
-	hlist_add_head(&inode->i_hash, b);
+	hlist_add_head_rcu(&inode->i_hash, b);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 }
@@ -480,7 +480,7 @@ void __remove_inode_hash(struct inode *inode)
 {
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode->i_lock);
-	hlist_del_init(&inode->i_hash);
+	hlist_del_init_rcu(&inode->i_hash);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 }
@@ -783,14 +783,19 @@ static void __wait_on_freeing_inode(struct inode *inode);
 static struct inode *find_inode(struct super_block *sb,
 				struct hlist_head *head,
 				int (*test)(struct inode *, void *),
-				void *data)
+				void *data, bool locked)
 {
 	struct hlist_node *node;
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, node, head, i_hash) {
 		spin_lock(&inode->i_lock);
+		if (inode_unhashed(inode)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
 		if (inode->i_sb != sb) {
 			spin_unlock(&inode->i_lock);
 			continue;
@@ -800,13 +805,20 @@ repeat:
 			continue;
 		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
+			rcu_read_unlock();
+			if (locked)
+				spin_unlock(&inode_hash_lock);
 			__wait_on_freeing_inode(inode);
+			if (locked)
+				spin_lock(&inode_hash_lock);
 			goto repeat;
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
+		rcu_read_unlock();
 		return inode;
 	}
+	rcu_read_unlock();
 	return NULL;
 }
 
@@ -815,14 +827,20 @@ repeat:
  * iget_locked for details.
  */
 static struct inode *find_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino)
+				     struct hlist_head *head,
+				     unsigned long ino, bool locked)
 {
 	struct hlist_node *node;
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, node, head, i_hash) {
 		spin_lock(&inode->i_lock);
+		if (inode_unhashed(inode)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
 		if (inode->i_ino != ino) {
 			spin_unlock(&inode->i_lock);
 			continue;
@@ -832,13 +850,20 @@ repeat:
 			continue;
 		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
+			rcu_read_unlock();
+			if (locked)
+				spin_unlock(&inode_hash_lock);
 			__wait_on_freeing_inode(inode);
+			if (locked)
+				spin_lock(&inode_hash_lock);
 			goto repeat;
 		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
+		rcu_read_unlock();
 		return inode;
 	}
+	rcu_read_unlock();
 	return NULL;
 }
 
@@ -995,9 +1020,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 
-	spin_lock(&inode_hash_lock);
-	inode = find_inode(sb, head, test, data);
-	spin_unlock(&inode_hash_lock);
+	inode = find_inode(sb, head, test, data, false);
 
 	if (inode) {
 		wait_on_inode(inode);
@@ -1009,8 +1032,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
 		struct inode *old;
 
 		spin_lock(&inode_hash_lock);
-		/* We released the lock, so.. */
-		old = find_inode(sb, head, test, data);
+		old = find_inode(sb, head, test, data, true);
 		if (!old) {
 			if (set(inode, data))
 				goto set_failed;
@@ -1065,9 +1087,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 
-	spin_lock(&inode_hash_lock);
-	inode = find_inode_fast(sb, head, ino);
-	spin_unlock(&inode_hash_lock);
+	inode = find_inode_fast(sb, head, ino, false);
 	if (inode) {
 		wait_on_inode(inode);
 		return inode;
@@ -1078,8 +1098,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 		struct inode *old;
 
 		spin_lock(&inode_hash_lock);
-		/* We released the lock, so.. */
-		old = find_inode_fast(sb, head, ino);
+		old = find_inode_fast(sb, head, ino, true);
 		if (!old) {
 			inode->i_ino = ino;
 			spin_lock(&inode->i_lock);
@@ -1122,14 +1141,15 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
 	struct hlist_node *node;
 	struct inode *inode;
 
-	spin_lock(&inode_hash_lock);
+	rcu_read_lock();
 	hlist_for_each_entry(inode, node, b, i_hash) {
-		if (inode->i_ino == ino && inode->i_sb == sb) {
-			spin_unlock(&inode_hash_lock);
+		if (inode->i_ino == ino && inode->i_sb == sb &&
+		    !inode_unhashed(inode)) {
+			rcu_read_unlock();
 			return 0;
 		}
 	}
-	spin_unlock(&inode_hash_lock);
+	rcu_read_unlock();
 
 	return 1;
 }
@@ -1210,13 +1230,8 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-	struct inode *inode;
-
-	spin_lock(&inode_hash_lock);
-	inode = find_inode(sb, head, test, data);
-	spin_unlock(&inode_hash_lock);
 
-	return inode;
+	return find_inode(sb, head, test, data, false);
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1261,10 +1276,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 
-	spin_lock(&inode_hash_lock);
-	inode = find_inode_fast(sb, head, ino);
-	spin_unlock(&inode_hash_lock);
-
+	inode = find_inode_fast(sb, head, ino, false);
 	if (inode)
 		wait_on_inode(inode);
 	return inode;
@@ -1711,10 +1723,8 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_hash_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
-	spin_lock(&inode_hash_lock);
 }
 
 static __initdata unsigned long ihash_entries;

  reply	other threads:[~2012-09-24  8:26 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-21  9:31 [RFC v4 Patch 0/4] fs/inode.c: optimization for inode lock usage Guo Chao
2012-09-21  9:31 ` [PATCH 1/4] fs/inode.c: do not take i_lock on newly allocated inode Guo Chao
2012-09-21  9:31 ` [PATCH 2/4] fs/inode.c: do not take i_lock in __(insert|remove)_inode_hash Guo Chao
2012-09-21  9:31 ` [PATCH 3/4] fs/inode.c: do not take i_lock when identify an inode Guo Chao
2012-09-21  9:31 ` [PATCH 4/4] fs/inode.c: always take i_lock before calling filesystem's test() method Guo Chao
2012-09-21 12:17 ` [RFC v4 Patch 0/4] fs/inode.c: optimization for inode lock usage Matthew Wilcox
2012-09-21 22:49 ` Dave Chinner
2012-09-24  2:42   ` Guo Chao
2012-09-24  4:23     ` Dave Chinner
2012-09-24  6:12       ` Guo Chao
2012-09-24  6:28         ` Dave Chinner
2012-09-24  7:08           ` Guo Chao
2012-09-24  8:26             ` Dave Chinner [this message]
2012-09-25  8:59               ` Guo Chao
2012-09-26  0:54                 ` Dave Chinner
2012-09-27  8:41                   ` Guo Chao
2012-09-27 11:51                     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120924082654.GG20960@dastard \
    --to=david@fromorbit.com \
    --cc=dchinner@redhat.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yan@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.