public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Al Viro <viro@ZenIV.linux.org.uk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Chris Mason <clm@fb.com>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	David Howells <dhowells@redhat.com>,
	elena.reshetova@intel.com, ishkamiel@gmail.com,
	dwindsor@gmail.com, gregkh@linuxfoundation.org,
	peterz@infradead.org
Subject: [RFC][PATCH 08/10] fs: Do RCU versions for find_inode()
Date: Fri, 24 Feb 2017 16:43:37 +0100	[thread overview]
Message-ID: <20170224162044.413149048@infradead.org> (raw)
In-Reply-To: 20170224154329.478276481@infradead.org

[-- Attachment #1: peterz-fs-inode-5.patch --]
[-- Type: text/plain, Size: 7450 bytes --]

Now that i_count is a proper reference count, such that 0 means free
or freeing, and all .destroy_inode methods use RCU to free inodes, we
can trivially convert the inode hash to RCU and do RCU lookups.

So provide RCU variants of find_inode() and find_inode_fast(), in case
we do hit an inode with i_count==0, we fall back to the old code that
does a __wait_for_freeing_inode().

_However_ this makes the situation with using i_count() for decisions
far worse; those few that are in the evict/free path seem safe, the
rest is up for grabs.

If the rest of the sites were OK, they probably are no longer and need
help.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 fs/inode.c |  104 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 76 insertions(+), 28 deletions(-)

--- a/fs/inode.c
+++ b/fs/inode.c
@@ -471,7 +471,7 @@ void __insert_inode_hash(struct inode *i
 
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode->i_lock);
-	hlist_add_head(&inode->i_hash, b);
+	hlist_add_head_rcu(&inode->i_hash, b);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 }
@@ -487,7 +487,7 @@ void __remove_inode_hash(struct inode *i
 {
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode->i_lock);
-	hlist_del_init(&inode->i_hash);
+	hlist_del_init_rcu(&inode->i_hash);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 }
@@ -777,15 +777,15 @@ long prune_icache_sb(struct super_block
 }
 
 static void __wait_on_freeing_inode(struct inode *inode);
-/*
- * Called with the inode lock held.
- */
-static struct inode *find_inode(struct super_block *sb,
+
+static struct inode *__find_inode(struct super_block *sb,
 				struct hlist_head *head,
 				int (*test)(struct inode *, void *),
 				void *data)
 {
-	struct inode *inode = NULL;
+	struct inode *inode;
+
+	lockdep_assert_held(&inode_hash_lock);
 
 repeat:
 	hlist_for_each_entry(inode, head, i_hash) {
@@ -805,14 +805,44 @@ static struct inode *find_inode(struct s
 	return NULL;
 }
 
+static struct inode *find_inode(struct super_block *sb,
+				struct hlist_head *head,
+				int (*test)(struct inode *, void *),
+				void *data)
+{
+	struct inode *inode;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, head, i_hash) {
+		if (inode->i_sb != sb)
+			continue;
+		if (!test(inode, data))
+			continue;
+		if (atomic_inc_not_zero(&inode->i_count))
+			goto out_unlock;
+		goto slow;
+	}
+	inode = NULL;
+out_unlock:
+	rcu_read_unlock();
+	return inode;
+
+slow:
+	rcu_read_unlock();
+	spin_lock(&inode_hash_lock);
+	inode = __find_inode(sb, head, test, data);
+	spin_unlock(&inode_hash_lock);
+	return inode;
+}
+
 /*
- * find_inode_fast is the fast path version of find_inode, see the comment at
+ * __find_inode_fast is the fast path version of __find_inode, see the comment at
  * iget_locked for details.
  */
-static struct inode *find_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino)
+static struct inode *__find_inode_fast(struct super_block *sb,
+		struct hlist_head *head, unsigned long ino)
 {
-	struct inode *inode = NULL;
+	struct inode *inode;
 
 	lockdep_assert_held(&inode_hash_lock);
 
@@ -834,6 +864,34 @@ static struct inode *find_inode_fast(str
 	return NULL;
 }
 
+static struct inode *find_inode_fast(struct super_block *sb,
+		struct hlist_head *head, unsigned long ino)
+{
+	struct inode *inode;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, head, i_hash) {
+		if (inode->i_ino != ino)
+			continue;
+		if (inode->i_sb != sb)
+			continue;
+		if (atomic_inc_not_zero(&inode->i_count))
+			goto out_unlock;
+		goto slow;
+	}
+	inode = NULL;
+out_unlock:
+	rcu_read_unlock();
+	return inode;
+
+slow:
+	rcu_read_unlock();
+	spin_lock(&inode_hash_lock);
+	inode = __find_inode_fast(sb, head, ino);
+	spin_unlock(&inode_hash_lock);
+	return inode;
+}
+
 /*
  * Each cpu owns a range of LAST_INO_BATCH numbers.
  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
@@ -1026,10 +1084,7 @@ struct inode *iget5_locked(struct super_
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 again:
-	spin_lock(&inode_hash_lock);
 	inode = find_inode(sb, head, test, data);
-	spin_unlock(&inode_hash_lock);
-
 	if (inode) {
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1045,14 +1100,14 @@ struct inode *iget5_locked(struct super_
 
 		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
-		old = find_inode(sb, head, test, data);
+		old = __find_inode(sb, head, test, data);
 		if (!old) {
 			if (set(inode, data))
 				goto set_failed;
 
 			spin_lock(&inode->i_lock);
 			inode->i_state = I_NEW;
-			hlist_add_head(&inode->i_hash, head);
+			hlist_add_head_rcu(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			inode_sb_list_add(inode);
 			spin_unlock(&inode_hash_lock);
@@ -1104,9 +1159,7 @@ struct inode *iget_locked(struct super_b
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 again:
-	spin_lock(&inode_hash_lock);
 	inode = find_inode_fast(sb, head, ino);
-	spin_unlock(&inode_hash_lock);
 	if (inode) {
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1122,12 +1175,12 @@ struct inode *iget_locked(struct super_b
 
 		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
-		old = find_inode_fast(sb, head, ino);
+		old = __find_inode_fast(sb, head, ino);
 		if (!old) {
 			inode->i_ino = ino;
 			spin_lock(&inode->i_lock);
 			inode->i_state = I_NEW;
-			hlist_add_head(&inode->i_hash, head);
+			hlist_add_head_rcu(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			inode_sb_list_add(inode);
 			spin_unlock(&inode_hash_lock);
@@ -1258,9 +1311,7 @@ struct inode *ilookup5_nowait(struct sup
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 
-	spin_lock(&inode_hash_lock);
 	inode = find_inode(sb, head, test, data);
-	spin_unlock(&inode_hash_lock);
 
 	return inode;
 }
@@ -1313,10 +1364,7 @@ struct inode *ilookup(struct super_block
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 again:
-	spin_lock(&inode_hash_lock);
 	inode = find_inode_fast(sb, head, ino);
-	spin_unlock(&inode_hash_lock);
-
 	if (inode) {
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1345,7 +1393,7 @@ EXPORT_SYMBOL(ilookup);
  * the inode_hash_lock spinlock held.
  *
  * This is a even more generalized version of ilookup5() when the
- * function must never block --- find_inode() can block in
+ * function must never block --- __find_inode() can block in
  * __wait_on_freeing_inode() --- or when the caller can not increment
  * the reference count because the resulting iput() might cause an
  * inode eviction.  The tradeoff is that the @match funtion must be
@@ -1402,7 +1450,7 @@ int insert_inode_locked(struct inode *in
 		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
 			inode->i_state |= I_NEW;
-			hlist_add_head(&inode->i_hash, head);
+			hlist_add_head_rcu(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_hash_lock);
 			return 0;
@@ -1445,7 +1493,7 @@ int insert_inode_locked4(struct inode *i
 		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
 			inode->i_state |= I_NEW;
-			hlist_add_head(&inode->i_hash, head);
+			hlist_add_head_rcu(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_hash_lock);
 			return 0;

  parent reply	other threads:[~2017-02-24 18:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-24 15:43 [RFC][PATCH 00/10] On inode::i_count and the usage vs reference count issue Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 01/10] fs: Use lockdep_assert_held() instead of comments Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 02/10] fs: Avoid looking at i_count without i_lock held Peter Zijlstra
     [not found]   ` <CA+55aFxLw8FXf61rsGYDjA1tS=joDeaF7OSgaepLWwcz4zt=dg@mail.gmail.com>
2017-02-24 17:06     ` Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 03/10] fs: Introduce i_count() Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 04/10] fs: Restructure iput() Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 05/10] fs: Remove iput_final() Peter Zijlstra
2017-02-24 15:43 ` [RFC][PATCH 06/10] fs: Rework i_count Peter Zijlstra
2017-02-24 20:49   ` Al Viro
2017-02-24 15:43 ` [RFC][PATCH 07/10] orangefs: Use RCU for destroy_inode Peter Zijlstra
2017-02-24 20:52   ` Al Viro
2017-02-24 23:00     ` Mike Marshall
2017-02-25 20:31       ` Mike Marshall
2017-02-27  0:34         ` Mike Marshall
2017-02-27  1:20           ` Linus Torvalds
2017-02-27  8:44           ` David Howells
2017-02-27 14:44             ` Mike Marshall
2017-02-24 15:43 ` Peter Zijlstra [this message]
2017-02-24 15:43 ` [RFC][PATCH 09/10] locking/refcount: Provide refcount_dec_unless() Peter Zijlstra
2017-02-27  9:28   ` Reshetova, Elena
2017-02-24 15:43 ` [RFC][PATCH 10/10] fs: Convert i_count over to refcount_t Peter Zijlstra
2017-02-24 16:43 ` [RFC][PATCH 00/10] On inode::i_count and the usage vs reference count issue Christoph Hellwig
2017-02-24 17:07   ` Peter Zijlstra
2017-02-24 20:59   ` David Windsor
     [not found] ` <CA+55aFy1bNbsX_3T-s_EUwTP-r_SmJJMvB3=-2nffehFVP=EdQ@mail.gmail.com>
     [not found]   ` <CA+55aFz0DbAGZ8gc+s35nm1N5frXjK_NOh7QzuSfZeJbjsT6Sg@mail.gmail.com>
     [not found]     ` <CA+55aFyR8wkHps5_AqUqzx8MDMNxRZZ7+MYH9g=ZCUi=4Oey8w@mail.gmail.com>
2017-02-24 19:24       ` Fwd: " Linus Torvalds
2017-02-24 20:42 ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170224162044.413149048@infradead.org \
    --to=peterz@infradead.org \
    --cc=clm@fb.com \
    --cc=dhowells@redhat.com \
    --cc=dwindsor@gmail.com \
    --cc=elena.reshetova@intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=ishkamiel@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox