linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: npiggin@kernel.dk
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	npiggin@kernel.dk
Subject: [patch 06/14] fs: icache lock inode hash
Date: Fri, 22 Oct 2010 00:08:35 +1100	[thread overview]
Message-ID: <20101021131016.533198770@kernel.dk> (raw)
In-Reply-To: 20101021130829.442910807@kernel.dk

[-- Attachment #1: fs-inode_lock-scale-2.patch --]
[-- Type: text/plain, Size: 9658 bytes --]

Add a new lock, inode_hash_lock, to protect the inode hash table lists.

[note: inode_lock can't be lifted much further here, because hash lookups
tend to involve insertions etc onto other data structures]

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

---
 fs/inode.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 75 insertions(+), 13 deletions(-)

Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c	2010-10-21 23:50:44.000000000 +1100
@@ -39,11 +39,14 @@
  *   i_sb_list
  * sb_inode_list_lock protects:
  *   s_inodes, i_sb_list
+ * inode_hash_lock protects:
+ *   inode hash table, i_hash
  *
  * Ordering:
  * inode_lock
  *   i_lock
  *     sb_inode_list_lock
+ *     inode_hash_lock
  */
 /*
  * This is needed for the following functions:
@@ -104,6 +107,7 @@ static struct hlist_head *inode_hashtabl
  */
 DEFINE_SPINLOCK(inode_lock);
 DEFINE_SPINLOCK(sb_inode_list_lock);
+static DEFINE_SPINLOCK(inode_hash_lock);
 
 /*
  * iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -390,7 +394,9 @@ static void dispose_list(struct list_hea
 
 		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
+		spin_lock(&inode_hash_lock);
 		hlist_del_init(&inode->i_hash);
+		spin_unlock(&inode_hash_lock);
 		spin_lock(&sb_inode_list_lock);
 		list_del_init(&inode->i_sb_list);
 		spin_unlock(&sb_inode_list_lock);
@@ -615,7 +621,12 @@ static struct inode *find_inode(struct s
 			continue;
 		if (!test(inode, data))
 			continue;
-		spin_lock(&inode->i_lock);
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&inode_hash_lock);
+			cpu_relax();
+			spin_lock(&inode_hash_lock);
+			goto repeat;
+		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
@@ -641,7 +652,12 @@ static struct inode *find_inode_fast(str
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		spin_lock(&inode->i_lock);
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&inode_hash_lock);
+			cpu_relax();
+			spin_lock(&inode_hash_lock);
+			goto repeat;
+		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
@@ -670,8 +686,11 @@ __inode_add_to_lists(struct super_block
 	spin_lock(&sb_inode_list_lock);
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	spin_unlock(&sb_inode_list_lock);
-	if (head)
+	if (head) {
+		spin_lock(&inode_hash_lock);
 		hlist_add_head(&inode->i_hash, head);
+		spin_unlock(&inode_hash_lock);
+	}
 }
 
 /**
@@ -790,15 +809,18 @@ static struct inode *get_new_inode(struc
 		struct inode *old;
 
 		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, head, test, data);
 		if (!old) {
 			if (set(inode, data))
 				goto set_failed;
 
-			spin_lock(&inode->i_lock);
+			BUG_ON(!spin_trylock(&inode->i_lock));
 			inode->i_state = I_NEW;
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_hash_lock);
+			__inode_add_to_lists(sb, NULL, inode);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_lock);
 
@@ -814,6 +836,7 @@ static struct inode *get_new_inode(struc
 		 * allocated.
 		 */
 		inode_get_ilock(old);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_lock);
 		destroy_inode(inode);
@@ -823,6 +846,7 @@ static struct inode *get_new_inode(struc
 	return inode;
 
 set_failed:
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode_lock);
 	destroy_inode(inode);
 	return NULL;
@@ -842,13 +866,16 @@ static struct inode *get_new_inode_fast(
 		struct inode *old;
 
 		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
-			spin_lock(&inode->i_lock);
+			BUG_ON(!spin_trylock(&inode->i_lock));
 			inode->i_ino = ino;
 			inode->i_state = I_NEW;
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_hash_lock);
+			__inode_add_to_lists(sb, NULL, inode);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_lock);
 
@@ -864,6 +891,7 @@ static struct inode *get_new_inode_fast(
 		 * allocated.
 		 */
 		inode_get_ilock(old);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_lock);
 		destroy_inode(inode);
@@ -900,15 +928,17 @@ ino_t iunique(struct super_block *sb, in
 	ino_t res;
 
 	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	do {
 		if (counter <= max_reserved)
 			counter = max_reserved + 1;
 		res = counter++;
 		head = inode_hashtable + hash(sb, res);
 		inode = find_inode_fast(sb, head, res);
-		if (inode)
-			spin_unlock(&inode->i_lock);
 	} while (inode != NULL);
+	spin_unlock(&inode_hash_lock);
+	if (inode)
+		spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 
 	return res;
@@ -963,15 +993,18 @@ static struct inode *ifind(struct super_
 	struct inode *inode;
 
 	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	inode = find_inode(sb, head, test, data);
 	if (inode) {
 		inode_get_ilock(inode);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode_lock);
 		if (likely(wait))
 			wait_on_inode(inode);
 		return inode;
 	}
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode_lock);
 	return NULL;
 }
@@ -997,14 +1030,17 @@ static struct inode *ifind_fast(struct s
 	struct inode *inode;
 
 	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 	inode = find_inode_fast(sb, head, ino);
 	if (inode) {
 		inode_get_ilock(inode);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode_lock);
 		wait_on_inode(inode);
 		return inode;
 	}
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode_lock);
 	return NULL;
 }
@@ -1169,26 +1205,34 @@ int insert_inode_locked(struct inode *in
 		struct hlist_node *node;
 		struct inode *old = NULL;
 		spin_lock(&inode_lock);
+lock_again:
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_ino != ino)
 				continue;
 			if (old->i_sb != sb)
 				continue;
-			spin_lock(&old->i_lock);
+			if (!spin_trylock(&old->i_lock)) {
+				spin_unlock(&inode_hash_lock);
+				cpu_relax();
+				goto lock_again;
+			}
 			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
 				spin_unlock(&old->i_lock);
 				continue;
 			}
 			goto found_old;
 		}
-		spin_lock(&inode->i_lock);
+		BUG_ON(!spin_trylock(&inode->i_lock)); /* XXX: init locked */
 		hlist_add_head(&inode->i_hash, head);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode_lock);
 		return 0;
 
 found_old:
 		inode_get_ilock(old);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
@@ -1214,26 +1258,34 @@ int insert_inode_locked4(struct inode *i
 		struct inode *old = NULL;
 
 		spin_lock(&inode_lock);
+lock_again:
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_sb != sb)
 				continue;
 			if (!test(old, data))
 				continue;
-			spin_lock(&old->i_lock);
+			if (!spin_trylock(&old->i_lock)) {
+				spin_unlock(&inode_hash_lock);
+				cpu_relax();
+				goto lock_again;
+			}
 			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
 				spin_unlock(&old->i_lock);
 				continue;
 			}
 			goto found_old;
 		}
-		spin_lock(&inode->i_lock);
+		BUG_ON(!spin_trylock(&inode->i_lock)); /* XXX: init locked */
 		hlist_add_head(&inode->i_hash, head);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode_lock);
 		return 0;
 
 found_old:
 		inode_get_ilock(old);
+		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
@@ -1259,7 +1311,9 @@ void __insert_inode_hash(struct inode *i
 	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&inode_hash_lock);
 	hlist_add_head(&inode->i_hash, head);
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 }
@@ -1275,7 +1329,9 @@ void remove_inode_hash(struct inode *ino
 {
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&inode_hash_lock);
 	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 }
@@ -1338,7 +1394,9 @@ static void iput_final(struct inode *ino
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
 		inodes_stat.nr_unused--;
+		spin_lock(&inode_hash_lock);
 		hlist_del_init(&inode->i_hash);
+		spin_unlock(&inode_hash_lock);
 	}
 	list_del_init(&inode->i_list);
 	spin_lock(&sb_inode_list_lock);
@@ -1352,7 +1410,9 @@ static void iput_final(struct inode *ino
 	evict(inode);
 	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
+	spin_lock(&inode_hash_lock);
 	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 	wake_up_inode(inode);
@@ -1573,11 +1633,13 @@ static void __wait_on_freeing_inode(stru
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
 	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 }
 
 static __initdata unsigned long ihash_entries;

  parent reply	other threads:[~2010-10-21 13:08 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-21 13:08 [patch 00/14] reworked minimal inode_lock breaking series npiggin
2010-10-21 13:08 ` [patch 01/14] fs: icache begin inode_lock lock breaking npiggin
2010-10-21 13:08 ` [patch 02/14] fs: icache lock i_count npiggin
2010-10-21 13:08 ` [patch 03/14] fs: icache lock inodes icache state npiggin
2010-10-21 13:08 ` [patch 04/14] fs: icache unmount code cleanup npiggin
2010-10-21 13:08 ` [patch 05/14] fs: icache lock s_inodes list npiggin
2010-10-21 13:08 ` npiggin [this message]
2010-10-21 13:08 ` [patch 07/14] fs: icache lock lru/writeback lists npiggin
2010-10-21 13:08 ` [patch 08/14] fs: icache make nr_inodes and nr_unused atomic npiggin
2010-10-21 13:08 ` [patch 09/14] fs: inode atomic last_ino, iunique lock npiggin
2010-10-21 13:08 ` [patch 10/14] fs: icache remove inode_lock npiggin
2010-10-21 13:08 ` [patch 11/14] fs: icache factor hash lock into functions npiggin
2010-10-21 13:08 ` [patch 12/14] fs: icache lazy inode lru npiggin
2010-10-21 13:08 ` [patch 13/14] fs: icache split IO and LRU lists npiggin
2010-10-21 15:28   ` Christoph Lameter
2010-10-22  0:00     ` Nick Piggin
2010-10-22  1:05       ` Nick Piggin
2010-10-21 13:08 ` [patch 14/14] fs: icache split writeback and lru locks npiggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101021131016.533198770@kernel.dk \
    --to=npiggin@kernel.dk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).