All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Dave Chinner <david@fromorbit.com>
Cc: viro@ZenIV.linux.org.uk, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 3/3] fs: rcu protect inode hash lookups
Date: Tue, 16 Nov 2010 15:56:42 -0800	[thread overview]
Message-ID: <20101116235642.GM2503@linux.vnet.ibm.com> (raw)
In-Reply-To: <1288589624-15251-4-git-send-email-david@fromorbit.com>

On Mon, Nov 01, 2010 at 04:33:44PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Now that inodes are using RCU freeing, we can walk the hash lists
> using RCU protection during lookups. Convert all the hash list
> operations to use RCU-based operators and drop the inode_hash_lock
> around pure lookup operations.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/inode.c |   89 ++++++++++++++++++++++++++++++++++++++---------------------
>  1 files changed, 57 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 106ec7a..6bead3d 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -50,11 +50,12 @@
>   *   inode->i_lock
>   *
>   * inode_hash_lock
> - *   inode_sb_list_lock
> - *   inode->i_lock
> + *   rcu_read_lock
> + *     inode_sb_list_lock
> + *     inode->i_lock
>   *
>   * iunique_lock
> - *   inode_hash_lock
> + *   rcu_read_lock
>   */
> 
>  /*
> @@ -413,7 +414,7 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
> 
>  	spin_lock(&inode_hash_lock);
>  	spin_lock(&inode->i_lock);
> -	hlist_add_head(&inode->i_hash, b);
> +	hlist_add_head_rcu(&inode->i_hash, b);
>  	spin_unlock(&inode->i_lock);
>  	spin_unlock(&inode_hash_lock);
>  }
> @@ -429,7 +430,7 @@ void remove_inode_hash(struct inode *inode)
>  {
>  	spin_lock(&inode_hash_lock);
>  	spin_lock(&inode->i_lock);
> -	hlist_del_init(&inode->i_hash);
> +	hlist_del_init_rcu(&inode->i_hash);
>  	spin_unlock(&inode->i_lock);
>  	spin_unlock(&inode_hash_lock);
>  }
> @@ -741,26 +742,38 @@ static void __wait_on_freeing_inode(struct inode *inode);
>  static struct inode *find_inode(struct super_block *sb,
>  				struct hlist_head *head,
>  				int (*test)(struct inode *, void *),
> -				void *data)
> +				void *data, bool locked)
>  {
>  	struct hlist_node *node;
>  	struct inode *inode = NULL;
> 
>  repeat:
> +	rcu_read_lock();
>  	hlist_for_each_entry(inode, node, head, i_hash) {

The above needs to be hlist_for_each_entry_rcu(), correct?

This is needed even in the SLAB_DESTROY_BY_RCU case, because you
are still inserting elements into this list concurrently with
readers traversing it.

That said, this seems to be replaced by hlist_bl_for_each_entry()
in your git tree with the caller doing hlist_bl_lock(), though it is
entirely possible that I am looking at the wrong branch.  I feel
slow and late to the party...  ;-)

							Thanx, Paul

>  		if (inode->i_sb != sb)
>  			continue;
>  		if (!test(inode, data))
>  			continue;
>  		spin_lock(&inode->i_lock);
> +		if (inode_unhashed(inode)) {
> +			spin_unlock(&inode->i_lock);
> +			continue;
> +		}
>  		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
> +			rcu_read_unlock();
> +			if (locked)
> +				spin_unlock(&inode_hash_lock);
>  			__wait_on_freeing_inode(inode);
> +			if (locked)
> +				spin_lock(&inode_hash_lock);
>  			goto repeat;
>  		}
>  		__iget(inode);
>  		spin_unlock(&inode->i_lock);
> +		rcu_read_unlock();
>  		return inode;
>  	}
> +	rcu_read_unlock();
>  	return NULL;
>  }
> 
> @@ -769,26 +782,39 @@ repeat:
>   * iget_locked for details.
>   */
>  static struct inode *find_inode_fast(struct super_block *sb,
> -				struct hlist_head *head, unsigned long ino)
> +				struct hlist_head *head, unsigned long ino,
> +				bool locked)
>  {
>  	struct hlist_node *node;
>  	struct inode *inode = NULL;
> 
>  repeat:
> +	rcu_read_lock();
>  	hlist_for_each_entry(inode, node, head, i_hash) {
>  		if (inode->i_ino != ino)
>  			continue;
>  		if (inode->i_sb != sb)
>  			continue;
>  		spin_lock(&inode->i_lock);
> +		if (inode_unhashed(inode)) {
> +			spin_unlock(&inode->i_lock);
> +			continue;
> +		}
>  		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
> +			rcu_read_unlock();
> +			if (locked)
> +				spin_unlock(&inode_hash_lock);
>  			__wait_on_freeing_inode(inode);
> +			if (locked)
> +				spin_lock(&inode_hash_lock);
>  			goto repeat;
>  		}
>  		__iget(inode);
>  		spin_unlock(&inode->i_lock);
> +		rcu_read_unlock();
>  		return inode;
>  	}
> +	rcu_read_unlock();
>  	return NULL;
>  }
> 
> @@ -913,14 +939,14 @@ static struct inode *get_new_inode(struct super_block *sb,
> 
>  		spin_lock(&inode_hash_lock);
>  		/* We released the lock, so.. */
> -		old = find_inode(sb, head, test, data);
> +		old = find_inode(sb, head, test, data, true);
>  		if (!old) {
>  			if (set(inode, data))
>  				goto set_failed;
> 
>  			spin_lock(&inode->i_lock);
>  			inode->i_state = I_NEW;
> -			hlist_add_head(&inode->i_hash, head);
> +			hlist_add_head_rcu(&inode->i_hash, head);
>  			spin_unlock(&inode->i_lock);
>  			inode_sb_list_add(inode);
>  			spin_unlock(&inode_hash_lock);
> @@ -964,12 +990,12 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
> 
>  		spin_lock(&inode_hash_lock);
>  		/* We released the lock, so.. */
> -		old = find_inode_fast(sb, head, ino);
> +		old = find_inode_fast(sb, head, ino, true);
>  		if (!old) {
>  			inode->i_ino = ino;
>  			spin_lock(&inode->i_lock);
>  			inode->i_state = I_NEW;
> -			hlist_add_head(&inode->i_hash, head);
> +			hlist_add_head_rcu(&inode->i_hash, head);
>  			spin_unlock(&inode->i_lock);
>  			inode_sb_list_add(inode);
>  			spin_unlock(&inode_hash_lock);
> @@ -1006,15 +1032,22 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
>  	struct hlist_node *node;
>  	struct inode *inode;
> 
> -	spin_lock(&inode_hash_lock);
> -	hlist_for_each_entry(inode, node, b, i_hash) {
> -		if (inode->i_ino == ino && inode->i_sb == sb) {
> -			spin_unlock(&inode_hash_lock);
> -			return 0;
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(inode, node, b, i_hash) {
> +		if (inode->i_ino != ino)
> +			continue;
> +		if (inode->i_sb != sb)
> +			continue;
> +		spin_lock(&inode->i_lock);
> +		if (inode_unhashed(inode)) {
> +			spin_unlock(&inode->i_lock);
> +			continue;
>  		}
> +		spin_unlock(&inode->i_lock);
> +		rcu_read_unlock();
> +		return 0;
>  	}
> -	spin_unlock(&inode_hash_lock);
> -
> +	rcu_read_unlock();
>  	return 1;
>  }
> 
> @@ -1099,15 +1132,12 @@ static struct inode *ifind(struct super_block *sb,
>  {
>  	struct inode *inode;
> 
> -	spin_lock(&inode_hash_lock);
> -	inode = find_inode(sb, head, test, data);
> +	inode = find_inode(sb, head, test, data, false);
>  	if (inode) {
> -		spin_unlock(&inode_hash_lock);
>  		if (likely(wait))
>  			wait_on_inode(inode);
>  		return inode;
>  	}
> -	spin_unlock(&inode_hash_lock);
>  	return NULL;
>  }
> 
> @@ -1131,14 +1161,11 @@ static struct inode *ifind_fast(struct super_block *sb,
>  {
>  	struct inode *inode;
> 
> -	spin_lock(&inode_hash_lock);
> -	inode = find_inode_fast(sb, head, ino);
> +	inode = find_inode_fast(sb, head, ino, false);
>  	if (inode) {
> -		spin_unlock(&inode_hash_lock);
>  		wait_on_inode(inode);
>  		return inode;
>  	}
> -	spin_unlock(&inode_hash_lock);
>  	return NULL;
>  }
> 
> @@ -1301,7 +1328,7 @@ int insert_inode_locked(struct inode *inode)
>  		struct hlist_node *node;
>  		struct inode *old = NULL;
>  		spin_lock(&inode_hash_lock);
> -		hlist_for_each_entry(old, node, head, i_hash) {
> +		hlist_for_each_entry_rcu(old, node, head, i_hash) {
>  			if (old->i_ino != ino)
>  				continue;
>  			if (old->i_sb != sb)
> @@ -1316,7 +1343,7 @@ int insert_inode_locked(struct inode *inode)
>  		if (likely(!node)) {
>  			spin_lock(&inode->i_lock);
>  			inode->i_state |= I_NEW;
> -			hlist_add_head(&inode->i_hash, head);
> +			hlist_add_head_rcu(&inode->i_hash, head);
>  			spin_unlock(&inode->i_lock);
>  			spin_unlock(&inode_hash_lock);
>  			return 0;
> @@ -1345,7 +1372,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
>  		struct inode *old = NULL;
> 
>  		spin_lock(&inode_hash_lock);
> -		hlist_for_each_entry(old, node, head, i_hash) {
> +		hlist_for_each_entry_rcu(old, node, head, i_hash) {
>  			if (old->i_sb != sb)
>  				continue;
>  			if (!test(old, data))
> @@ -1360,7 +1387,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
>  		if (likely(!node)) {
>  			spin_lock(&inode->i_lock);
>  			inode->i_state |= I_NEW;
> -			hlist_add_head(&inode->i_hash, head);
> +			hlist_add_head_rcu(&inode->i_hash, head);
>  			spin_unlock(&inode->i_lock);
>  			spin_unlock(&inode_hash_lock);
>  			return 0;
> @@ -1646,10 +1673,8 @@ static void __wait_on_freeing_inode(struct inode *inode)
>  	wq = bit_waitqueue(&inode->i_state, __I_NEW);
>  	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
>  	spin_unlock(&inode->i_lock);
> -	spin_unlock(&inode_hash_lock);
>  	schedule();
>  	finish_wait(wq, &wait.wait);
> -	spin_lock(&inode_hash_lock);
>  }
> 
>  static __initdata unsigned long ihash_entries;
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

      parent reply	other threads:[~2010-11-16 23:56 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-01  5:33 fs: inode freeing and hash lookup via RCU Dave Chinner
2010-11-01  5:33 ` [PATCH 1/3] fs: pull inode->i_lock up out of writeback_single_inode Dave Chinner
2010-11-01  5:33 ` [PATCH 2/3] fs: Use RCU freeing of inodes via SLAB_DESTROY_BY_RCU Dave Chinner
2010-11-01 15:31   ` Christoph Hellwig
2010-11-01  5:33 ` [PATCH 3/3] fs: rcu protect inode hash lookups Dave Chinner
2010-11-01  9:38   ` Eric Dumazet
2010-11-01  9:38     ` Eric Dumazet
2010-11-01 13:44     ` Dave Chinner
2010-11-01 13:44       ` Dave Chinner
2010-11-01 15:29       ` Eric Dumazet
2010-11-01 15:29         ` Eric Dumazet
2010-11-02  0:01         ` Dave Chinner
2010-11-02  0:01           ` Dave Chinner
2010-11-02  4:46           ` Eric Dumazet
2010-11-02  4:46             ` Eric Dumazet
2010-11-02 12:11           ` Paul E. McKenney
2010-11-02 12:11             ` Paul E. McKenney
2010-11-16 23:56   ` Paul E. McKenney [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101116235642.GM2503@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    --cc=david@fromorbit.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.