public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Kirill Korotaev <dev@sw.ru>
To: Jan Blunck <jblunck@suse.de>
Cc: viro@zeniv.linux.org.uk, linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@osdl.org>,
	olh@suse.de
Subject: Re: [PATCH] shrink_dcache_parent() races against shrink_dcache_memory()
Date: Mon, 23 Jan 2006 11:07:48 +0300	[thread overview]
Message-ID: <43D48ED4.3010306@sw.ru> (raw)
In-Reply-To: <20060120203645.GF24401@hasse.suse.de>

Jan,

1. this patch doesn't fix the whole problem. iput() after sb free is 
still possible. So busy inodes after umount too.
2. it has big problems with locking...

comments below inside.

Kirill

> Kirill Korotaev <dev@sw.ru> discovered a race between shrink_dcache_parent()
> and shrink_dcache_memory(). That one is based on dput() is calling
> dentry_iput() too early and therefore is giving up the dcache_lock. This leads
> to the situation that the parent dentry might be still referenced although all
> childs are already dead. This parent is ignore by a concurrent select_parent()
> call which might be the reason for busy inode after umount failures.
> 
> This is from Kirill's original patch:
> 
> CPU 1                    CPU 2
> ~~~~~                    ~~~~~
> umount /dev/sda1
> generic_shutdown_super   shrink_dcache_memory
> shrink_dcache_parent     prune_one_dentry
> select_parent            dput     <<<< child is dead, locks are released,
>                                        but parent is still referenced!!! >>>>
> skip dentry->parent,
> since it's d_count > 0
> 
> message: BUSY inodes after umount...
>                                   <<< parent is left on dentry_unused list,
>                                       referencing freed super block >>>
> 
> This patch is introducing dput_locked() which is doing all the dput work
> except of freeing up the dentry's inode and memory itself. Therefore, when the
> dcache_lock is given up, all the reference counts of the parents are correct.
> prune_one_dentry() must also use the dput_locked version and free up the
> inodes and the memory of the parents later. Otherwise we have an incorrect
> reference count on the parents of the dentry to prune.
> 
> Signed-off-by: Jan Blunck <jblunck@suse.de>
> ---
> 
> 
> ------------------------------------------------------------------------
> 
>  fs/dcache.c |   76 ++++++++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 54 insertions(+), 22 deletions(-)
> 
> Index: linux-2.6/fs/dcache.c
> ===================================================================
> --- linux-2.6.orig/fs/dcache.c
> +++ linux-2.6/fs/dcache.c
> @@ -143,21 +143,18 @@ static void dentry_iput(struct dentry * 
>   * no dcache lock, please.
>   */
>  
> -void dput(struct dentry *dentry)
> +static void dput_locked(struct dentry *dentry, struct list_head *list)
>  {
>  	if (!dentry)
>  		return;
>  
> -repeat:
> -	if (atomic_read(&dentry->d_count) == 1)
> -		might_sleep();
> -	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
> +	if (!atomic_dec_and_test(&dentry->d_count))
>  		return;
>  
> +repeat:
>  	spin_lock(&dentry->d_lock);
>  	if (atomic_read(&dentry->d_count)) {
>  		spin_unlock(&dentry->d_lock);
> -		spin_unlock(&dcache_lock);
>  		return;
>  	}
>  
> @@ -177,32 +174,54 @@ repeat:
>    		dentry_stat.nr_unused++;
>    	}
>   	spin_unlock(&dentry->d_lock);
> -	spin_unlock(&dcache_lock);
>  	return;
>  
>  unhash_it:
>  	__d_drop(dentry);
>  
>  kill_it: {
> -		struct dentry *parent;
> -
>  		/* If dentry was on d_lru list
>  		 * delete it from there
>  		 */
>    		if (!list_empty(&dentry->d_lru)) {
> -  			list_del(&dentry->d_lru);
> +  			list_del_init(&dentry->d_lru);
>    			dentry_stat.nr_unused--;
>    		}
>    		list_del(&dentry->d_u.d_child);
>  		dentry_stat.nr_dentry--;	/* For d_free, below */
> -		/*drops the locks, at that point nobody can reach this dentry */
> -		dentry_iput(dentry);
> -		parent = dentry->d_parent;
> -		d_free(dentry);
> -		if (dentry == parent)
> +		/* at this point nobody can reach this dentry */
> +		list_add(&dentry->d_lru, list);
> +		spin_unlock(&dentry->d_lock);
> +		if (dentry == dentry->d_parent)
>  			return;
> -		dentry = parent;
> -		goto repeat;
> +		dentry = dentry->d_parent;
> +		if (atomic_dec_and_test(&dentry->d_count))
> +			goto repeat;
<<<< I would prefer to have "goto repeat" as it was before...
> +		/* out */
> +	}
> +}
> +
> +void dput(struct dentry *dentry)
> +{
> +	LIST_HEAD(free_list);
> +
> +	if (!dentry)
> +		return;
> +
> +	if (atomic_add_unless(&dentry->d_count, -1, 1))
> +		return;
<<<< I would better introduce __dput_locked() w/o atomic_dec_and_test() 
instead of using this atomic_add_unless()...
<<<< For me it looks like an obfuscation of a code, which must be clean 
and tidy.
> +
> +	spin_lock(&dcache_lock);
> +	dput_locked(dentry, &free_list);
> +	spin_unlock(&dcache_lock);
> +
<<<< 1. locking here is totally broken... spin_unlock() in dentry_iput()
<<<< 2. it doesn't help the situation I wrote to you,
<<<<    since iput() can be done on inode _after_ sb freeing...
> +	if (!list_empty(&free_list)) {
> +		struct dentry *dentry, *p;
> +		list_for_each_entry_safe(dentry, p, &free_list, d_lru) {
> +			list_del(&dentry->d_lru);
> +			dentry_iput(dentry);
> +			d_free(dentry);
> +		}
>  	}
>  }
>  
> @@ -364,16 +383,29 @@ restart:
>   */
>  static inline void prune_one_dentry(struct dentry * dentry)
>  {
> -	struct dentry * parent;
> +	LIST_HEAD(free_list);
>  
>  	__d_drop(dentry);
>  	list_del(&dentry->d_u.d_child);
>  	dentry_stat.nr_dentry--;	/* For d_free, below */
> -	dentry_iput(dentry);
> -	parent = dentry->d_parent;
> +
> +	/* dput the parent here before we release dcache_lock */
> +	if (dentry != dentry->d_parent)
> +		dput_locked(dentry->d_parent, &free_list);
> +
> +	dentry_iput(dentry);		/* drop locks */
<<<< comment 2) from dput()
>  	d_free(dentry);
> -	if (parent != dentry)
> -		dput(parent);
> +
> +	if (!list_empty(&free_list)) {
> +		struct dentry *tmp, *p;
> +
> +		list_for_each_entry_safe(tmp, p, &free_list, d_lru) {
> +			list_del(&tmp->d_lru);
> +			dentry_iput(tmp);
<<<< comment 1) from dput()
> +			d_free(tmp);
> +		}
> +	}
> +
>  	spin_lock(&dcache_lock);
>  }
>  



  parent reply	other threads:[~2006-01-23  8:06 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-20 20:36 [PATCH] shrink_dcache_parent() races against shrink_dcache_memory() Jan Blunck
2006-01-23  5:22 ` Andrew Morton
2006-01-23  8:12   ` Kirill Korotaev
2006-01-23 15:13   ` Jan Blunck
2006-01-23  8:07 ` Kirill Korotaev [this message]
2006-01-23 15:57   ` Jan Blunck
2006-01-24  5:54     ` Balbir Singh
2006-01-24  9:48       ` Kirill Korotaev
2006-01-24 11:10         ` Balbir Singh
2006-01-24 17:18           ` Kirill Korotaev
2006-01-25  7:03             ` Balbir Singh
2006-01-30 12:03   ` Jan Blunck
2006-01-30 14:38     ` Balbir Singh
2006-01-30 14:54       ` Jan Blunck
2006-01-30 15:02         ` Kirill Korotaev
2006-01-30 15:25           ` Jan Blunck
2006-01-30 15:31             ` Kirill Korotaev
2006-01-30 14:42     ` Kirill Korotaev
2006-01-30 14:58       ` Jan Blunck
2006-01-30 15:59         ` Kirill Korotaev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=43D48ED4.3010306@sw.ru \
    --to=dev@sw.ru \
    --cc=akpm@osdl.org \
    --cc=jblunck@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=olh@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox