All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
@ 2010-06-21 13:43 Wengang Wang
  2010-06-25  1:53 ` Wengang Wang
  2010-07-19 10:11 ` Wengang Wang
  0 siblings, 2 replies; 6+ messages in thread
From: Wengang Wang @ 2010-06-21 13:43 UTC (permalink / raw)
  To: ocfs2-devel

When we need to take both dlm_domain_lock and dlm->spinlock, we should take
them in order of: dlm_domain_lock then dlm->spinlock.

There is pathes disobey this order. That is calling dlm_lockres_put() with
dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
the ref and dlm_put() locks on dlm_domain_lock.

Fix:
In dlm_run_purge_list, if it could the last ref on the lockres, unlock
dlm->spinlock before calling dlm_lockres_put() and lock it back after
dlm_lockres_put().

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
 1 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d4f73ca..b1bd624 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 	spin_unlock(&dlm->spinlock);
 }
 
+/* returns 1 if the lockres is removed from purge list, 0 otherwise */
 static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 			     struct dlm_lock_resource *res)
 {
@@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		     res, master);
 		list_del_init(&res->purge);
 		spin_unlock(&res->spinlock);
+		/* not the last ref, dlm_run_purge_list() holds another */
 		dlm_lockres_put(res);
+		ret = 1;
 		dlm->purge_count--;
 	} else
 		spin_unlock(&res->spinlock);
@@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
 	}
-	return 0;
+	return ret;
 }
 
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			       int purge_now)
 {
-	unsigned int run_max, unused;
+	unsigned int run_max, unused, removed;
 	unsigned long purge_jiffies;
 	struct dlm_lock_resource *lockres;
 
@@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 
 		/* This may drop and reacquire the dlm spinlock if it
 		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
-
+		removed = dlm_purge_lockres(dlm, lockres);
+		/* If the lockres is removed from purge list, this could be
+		 * the last ref. Unlock dlm->spinlock to avoid deadlock
+		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
+		 * last ref while there, in other path, could be lock requests
+		 * in normal order: dlm_domain_lock then dlm->spinlock.
+		 */
+		if (removed)
+			spin_unlock(&dlm->spinlock);
 		dlm_lockres_put(lockres);
-
-		/* Avoid adding any scheduling latencies */
-		cond_resched_lock(&dlm->spinlock);
+		if (removed)
+			spin_lock(&dlm->spinlock);
+		else
+			/* Avoid adding any scheduling latencies */
+			cond_resched_lock(&dlm->spinlock);
 	}
 
 	spin_unlock(&dlm->spinlock);
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
  2010-06-21 13:43 [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2 Wengang Wang
@ 2010-06-25  1:53 ` Wengang Wang
  2010-07-05  9:59   ` Wengang Wang
  2010-07-19 10:11 ` Wengang Wang
  1 sibling, 1 reply; 6+ messages in thread
From: Wengang Wang @ 2010-06-25  1:53 UTC (permalink / raw)
  To: ocfs2-devel

Hi,

Any comment on this?

regards,
wengang.
On 10-06-21 21:43, Wengang Wang wrote:
> When we need to take both dlm_domain_lock and dlm->spinlock, we should take
> them in order of: dlm_domain_lock then dlm->spinlock.
> 
> There is pathes disobey this order. That is calling dlm_lockres_put() with
> dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
> the ref and dlm_put() locks on dlm_domain_lock.
> 
> Fix:
> In dlm_run_purge_list, if it could the last ref on the lockres, unlock
> dlm->spinlock before calling dlm_lockres_put() and lock it back after
> dlm_lockres_put().
> 
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
>  fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
>  1 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index d4f73ca..b1bd624 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>  	spin_unlock(&dlm->spinlock);
>  }
>  
> +/* returns 1 if the lockres is removed from purge list, 0 otherwise */
>  static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  			     struct dlm_lock_resource *res)
>  {
> @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		     res, master);
>  		list_del_init(&res->purge);
>  		spin_unlock(&res->spinlock);
> +		/* not the last ref, dlm_run_purge_list() holds another */
>  		dlm_lockres_put(res);
> +		ret = 1;
>  		dlm->purge_count--;
>  	} else
>  		spin_unlock(&res->spinlock);
> @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		spin_unlock(&res->spinlock);
>  		wake_up(&res->wq);
>  	}
> -	return 0;
> +	return ret;
>  }
>  
>  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  			       int purge_now)
>  {
> -	unsigned int run_max, unused;
> +	unsigned int run_max, unused, removed;
>  	unsigned long purge_jiffies;
>  	struct dlm_lock_resource *lockres;
>  
> @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  
>  		/* This may drop and reacquire the dlm spinlock if it
>  		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> -
> +		removed = dlm_purge_lockres(dlm, lockres);
> +		/* If the lockres is removed from purge list, this could be
> +		 * the last ref. Unlock dlm->spinlock to avoid deadlock
> +		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
> +		 * last ref while there, in other path, could be lock requests
> +		 * in normal order: dlm_domain_lock then dlm->spinlock.
> +		 */
> +		if (removed)
> +			spin_unlock(&dlm->spinlock);
>  		dlm_lockres_put(lockres);
> -
> -		/* Avoid adding any scheduling latencies */
> -		cond_resched_lock(&dlm->spinlock);
> +		if (removed)
> +			spin_lock(&dlm->spinlock);
> +		else
> +			/* Avoid adding any scheduling latencies */
> +			cond_resched_lock(&dlm->spinlock);
>  	}
>  
>  	spin_unlock(&dlm->spinlock);
> -- 
> 1.6.6.1
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
  2010-06-25  1:53 ` Wengang Wang
@ 2010-07-05  9:59   ` Wengang Wang
  0 siblings, 0 replies; 6+ messages in thread
From: Wengang Wang @ 2010-07-05  9:59 UTC (permalink / raw)
  To: ocfs2-devel

Any comment on this?

On 10-06-25 09:53, Wengang Wang wrote:
> Hi,
> 
> Any comment on this?
> 
> regards,
> wengang.
> On 10-06-21 21:43, Wengang Wang wrote:
> > When we need to take both dlm_domain_lock and dlm->spinlock, we should take
> > them in order of: dlm_domain_lock then dlm->spinlock.
> > 
> > There is pathes disobey this order. That is calling dlm_lockres_put() with
> > dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
> > the ref and dlm_put() locks on dlm_domain_lock.
> > 
> > Fix:
> > In dlm_run_purge_list, if it could the last ref on the lockres, unlock
> > dlm->spinlock before calling dlm_lockres_put() and lock it back after
> > dlm_lockres_put().
> > 
> > Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> > ---
> >  fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
> >  1 files changed, 19 insertions(+), 8 deletions(-)
> > 
> > diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> > index d4f73ca..b1bd624 100644
> > --- a/fs/ocfs2/dlm/dlmthread.c
> > +++ b/fs/ocfs2/dlm/dlmthread.c
> > @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
> >  	spin_unlock(&dlm->spinlock);
> >  }
> >  
> > +/* returns 1 if the lockres is removed from purge list, 0 otherwise */
> >  static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >  			     struct dlm_lock_resource *res)
> >  {
> > @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >  		     res, master);
> >  		list_del_init(&res->purge);
> >  		spin_unlock(&res->spinlock);
> > +		/* not the last ref, dlm_run_purge_list() holds another */
> >  		dlm_lockres_put(res);
> > +		ret = 1;
> >  		dlm->purge_count--;
> >  	} else
> >  		spin_unlock(&res->spinlock);
> > @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >  		spin_unlock(&res->spinlock);
> >  		wake_up(&res->wq);
> >  	}
> > -	return 0;
> > +	return ret;
> >  }
> >  
> >  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> >  			       int purge_now)
> >  {
> > -	unsigned int run_max, unused;
> > +	unsigned int run_max, unused, removed;
> >  	unsigned long purge_jiffies;
> >  	struct dlm_lock_resource *lockres;
> >  
> > @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> >  
> >  		/* This may drop and reacquire the dlm spinlock if it
> >  		 * has to do migration. */
> > -		if (dlm_purge_lockres(dlm, lockres))
> > -			BUG();
> > -
> > +		removed = dlm_purge_lockres(dlm, lockres);
> > +		/* If the lockres is removed from purge list, this could be
> > +		 * the last ref. Unlock dlm->spinlock to avoid deadlock
> > +		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
> > +		 * last ref while there, in other path, could be lock requests
> > +		 * in normal order: dlm_domain_lock then dlm->spinlock.
> > +		 */
> > +		if (removed)
> > +			spin_unlock(&dlm->spinlock);
> >  		dlm_lockres_put(lockres);
> > -
> > -		/* Avoid adding any scheduling latencies */
> > -		cond_resched_lock(&dlm->spinlock);
> > +		if (removed)
> > +			spin_lock(&dlm->spinlock);
> > +		else
> > +			/* Avoid adding any scheduling latencies */
> > +			cond_resched_lock(&dlm->spinlock);
> >  	}
> >  
> >  	spin_unlock(&dlm->spinlock);
> > -- 
> > 1.6.6.1
> > 
> > 
> > _______________________________________________
> > Ocfs2-devel mailing list
> > Ocfs2-devel at oss.oracle.com
> > http://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
  2010-06-21 13:43 [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2 Wengang Wang
  2010-06-25  1:53 ` Wengang Wang
@ 2010-07-19 10:11 ` Wengang Wang
  2010-07-19 23:14   ` Sunil Mushran
  1 sibling, 1 reply; 6+ messages in thread
From: Wengang Wang @ 2010-07-19 10:11 UTC (permalink / raw)
  To: ocfs2-devel

Any comment?

On 10-06-21 21:43, Wengang Wang wrote:
> When we need to take both dlm_domain_lock and dlm->spinlock, we should take
> them in order of: dlm_domain_lock then dlm->spinlock.
> 
> There is pathes disobey this order. That is calling dlm_lockres_put() with
> dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
> the ref and dlm_put() locks on dlm_domain_lock.
> 
> Fix:
> In dlm_run_purge_list, if it could the last ref on the lockres, unlock
> dlm->spinlock before calling dlm_lockres_put() and lock it back after
> dlm_lockres_put().
> 
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> ---
>  fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
>  1 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index d4f73ca..b1bd624 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>  	spin_unlock(&dlm->spinlock);
>  }
>  
> +/* returns 1 if the lockres is removed from purge list, 0 otherwise */
>  static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  			     struct dlm_lock_resource *res)
>  {
> @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		     res, master);
>  		list_del_init(&res->purge);
>  		spin_unlock(&res->spinlock);
> +		/* not the last ref, dlm_run_purge_list() holds another */
>  		dlm_lockres_put(res);
> +		ret = 1;
>  		dlm->purge_count--;
>  	} else
>  		spin_unlock(&res->spinlock);
> @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		spin_unlock(&res->spinlock);
>  		wake_up(&res->wq);
>  	}
> -	return 0;
> +	return ret;
>  }
>  
>  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  			       int purge_now)
>  {
> -	unsigned int run_max, unused;
> +	unsigned int run_max, unused, removed;
>  	unsigned long purge_jiffies;
>  	struct dlm_lock_resource *lockres;
>  
> @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>  
>  		/* This may drop and reacquire the dlm spinlock if it
>  		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> -
> +		removed = dlm_purge_lockres(dlm, lockres);
> +		/* If the lockres is removed from purge list, this could be
> +		 * the last ref. Unlock dlm->spinlock to avoid deadlock
> +		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
> +		 * last ref while there, in other path, could be lock requests
> +		 * in normal order: dlm_domain_lock then dlm->spinlock.
> +		 */
> +		if (removed)
> +			spin_unlock(&dlm->spinlock);
>  		dlm_lockres_put(lockres);
> -
> -		/* Avoid adding any scheduling latencies */
> -		cond_resched_lock(&dlm->spinlock);
> +		if (removed)
> +			spin_lock(&dlm->spinlock);
> +		else
> +			/* Avoid adding any scheduling latencies */
> +			cond_resched_lock(&dlm->spinlock);
>  	}
>  
>  	spin_unlock(&dlm->spinlock);
> -- 
> 1.6.6.1
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
  2010-07-19 10:11 ` Wengang Wang
@ 2010-07-19 23:14   ` Sunil Mushran
  2010-07-20  3:28     ` Wengang Wang
  0 siblings, 1 reply; 6+ messages in thread
From: Sunil Mushran @ 2010-07-19 23:14 UTC (permalink / raw)
  To: ocfs2-devel

A better (and simpler) fix would be to not dlm_grab() in dlm_init_lockres().
Then we can remove the corresponding dlm_put() in dlm_lockres_release().
That grab is not required because we don't call dlm_unregister_domain()
based on refcount.

On 07/19/2010 03:11 AM, Wengang Wang wrote:
> Any comment?
>
> On 10-06-21 21:43, Wengang Wang wrote:
>    
>> When we need to take both dlm_domain_lock and dlm->spinlock, we should take
>> them in order of: dlm_domain_lock then dlm->spinlock.
>>
>> There is pathes disobey this order. That is calling dlm_lockres_put() with
>> dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
>> the ref and dlm_put() locks on dlm_domain_lock.
>>
>> Fix:
>> In dlm_run_purge_list, if it could the last ref on the lockres, unlock
>> dlm->spinlock before calling dlm_lockres_put() and lock it back after
>> dlm_lockres_put().
>>
>> Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
>>   1 files changed, 19 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index d4f73ca..b1bd624 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>>   	spin_unlock(&dlm->spinlock);
>>   }
>>
>> +/* returns 1 if the lockres is removed from purge list, 0 otherwise */
>>   static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>   			     struct dlm_lock_resource *res)
>>   {
>> @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>   		     res, master);
>>   		list_del_init(&res->purge);
>>   		spin_unlock(&res->spinlock);
>> +		/* not the last ref, dlm_run_purge_list() holds another */
>>   		dlm_lockres_put(res);
>> +		ret = 1;
>>   		dlm->purge_count--;
>>   	} else
>>   		spin_unlock(&res->spinlock);
>> @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>   		spin_unlock(&res->spinlock);
>>   		wake_up(&res->wq);
>>   	}
>> -	return 0;
>> +	return ret;
>>   }
>>
>>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>   			       int purge_now)
>>   {
>> -	unsigned int run_max, unused;
>> +	unsigned int run_max, unused, removed;
>>   	unsigned long purge_jiffies;
>>   	struct dlm_lock_resource *lockres;
>>
>> @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>>
>>   		/* This may drop and reacquire the dlm spinlock if it
>>   		 * has to do migration. */
>> -		if (dlm_purge_lockres(dlm, lockres))
>> -			BUG();
>> -
>> +		removed = dlm_purge_lockres(dlm, lockres);
>> +		/* If the lockres is removed from purge list, this could be
>> +		 * the last ref. Unlock dlm->spinlock to avoid deadlock
>> +		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
>> +		 * last ref while there, in other path, could be lock requests
>> +		 * in normal order: dlm_domain_lock then dlm->spinlock.
>> +		 */
>> +		if (removed)
>> +			spin_unlock(&dlm->spinlock);
>>   		dlm_lockres_put(lockres);
>> -
>> -		/* Avoid adding any scheduling latencies */
>> -		cond_resched_lock(&dlm->spinlock);
>> +		if (removed)
>> +			spin_lock(&dlm->spinlock);
>> +		else
>> +			/* Avoid adding any scheduling latencies */
>> +			cond_resched_lock(&dlm->spinlock);
>>   	}
>>
>>   	spin_unlock(&dlm->spinlock);
>> -- 
>> 1.6.6.1
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel at oss.oracle.com
>> http://oss.oracle.com/mailman/listinfo/ocfs2-devel
>>      
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel at oss.oracle.com
> http://oss.oracle.com/mailman/listinfo/ocfs2-devel
>    

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
  2010-07-19 23:14   ` Sunil Mushran
@ 2010-07-20  3:28     ` Wengang Wang
  0 siblings, 0 replies; 6+ messages in thread
From: Wengang Wang @ 2010-07-20  3:28 UTC (permalink / raw)
  To: ocfs2-devel

Very cool fix!

I will post the patch after test.

regards,
wengang.
On 10-07-19 16:14, Sunil Mushran wrote:
> A better (and simpler) fix would be to not dlm_grab() in dlm_init_lockres().
> Then we can remove the corresponding dlm_put() in dlm_lockres_release().
> That grab is not required because we don't call dlm_unregister_domain()
> based on refcount.
> 
> On 07/19/2010 03:11 AM, Wengang Wang wrote:
> >Any comment?
> >
> >On 10-06-21 21:43, Wengang Wang wrote:
> >>When we need to take both dlm_domain_lock and dlm->spinlock, we should take
> >>them in order of: dlm_domain_lock then dlm->spinlock.
> >>
> >>There is pathes disobey this order. That is calling dlm_lockres_put() with
> >>dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at
> >>the ref and dlm_put() locks on dlm_domain_lock.
> >>
> >>Fix:
> >>In dlm_run_purge_list, if it could the last ref on the lockres, unlock
> >>dlm->spinlock before calling dlm_lockres_put() and lock it back after
> >>dlm_lockres_put().
> >>
> >>Signed-off-by: Wengang Wang<wen.gang.wang@oracle.com>
> >>---
> >>  fs/ocfs2/dlm/dlmthread.c |   27 +++++++++++++++++++--------
> >>  1 files changed, 19 insertions(+), 8 deletions(-)
> >>
> >>diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> >>index d4f73ca..b1bd624 100644
> >>--- a/fs/ocfs2/dlm/dlmthread.c
> >>+++ b/fs/ocfs2/dlm/dlmthread.c
> >>@@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
> >>  	spin_unlock(&dlm->spinlock);
> >>  }
> >>
> >>+/* returns 1 if the lockres is removed from purge list, 0 otherwise */
> >>  static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >>  			     struct dlm_lock_resource *res)
> >>  {
> >>@@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >>  		     res, master);
> >>  		list_del_init(&res->purge);
> >>  		spin_unlock(&res->spinlock);
> >>+		/* not the last ref, dlm_run_purge_list() holds another */
> >>  		dlm_lockres_put(res);
> >>+		ret = 1;
> >>  		dlm->purge_count--;
> >>  	} else
> >>  		spin_unlock(&res->spinlock);
> >>@@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> >>  		spin_unlock(&res->spinlock);
> >>  		wake_up(&res->wq);
> >>  	}
> >>-	return 0;
> >>+	return ret;
> >>  }
> >>
> >>  static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> >>  			       int purge_now)
> >>  {
> >>-	unsigned int run_max, unused;
> >>+	unsigned int run_max, unused, removed;
> >>  	unsigned long purge_jiffies;
> >>  	struct dlm_lock_resource *lockres;
> >>
> >>@@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> >>
> >>  		/* This may drop and reacquire the dlm spinlock if it
> >>  		 * has to do migration. */
> >>-		if (dlm_purge_lockres(dlm, lockres))
> >>-			BUG();
> >>-
> >>+		removed = dlm_purge_lockres(dlm, lockres);
> >>+		/* If the lockres is removed from purge list, this could be
> >>+		 * the last ref. Unlock dlm->spinlock to avoid deadlock
> >>+		 * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the
> >>+		 * last ref while there, in other path, could be lock requests
> >>+		 * in normal order: dlm_domain_lock then dlm->spinlock.
> >>+		 */
> >>+		if (removed)
> >>+			spin_unlock(&dlm->spinlock);
> >>  		dlm_lockres_put(lockres);
> >>-
> >>-		/* Avoid adding any scheduling latencies */
> >>-		cond_resched_lock(&dlm->spinlock);
> >>+		if (removed)
> >>+			spin_lock(&dlm->spinlock);
> >>+		else
> >>+			/* Avoid adding any scheduling latencies */
> >>+			cond_resched_lock(&dlm->spinlock);
> >>  	}
> >>
> >>  	spin_unlock(&dlm->spinlock);
> >>-- 
> >>1.6.6.1
> >>
> >>
> >>_______________________________________________
> >>Ocfs2-devel mailing list
> >>Ocfs2-devel at oss.oracle.com
> >>http://oss.oracle.com/mailman/listinfo/ocfs2-devel
> >_______________________________________________
> >Ocfs2-devel mailing list
> >Ocfs2-devel at oss.oracle.com
> >http://oss.oracle.com/mailman/listinfo/ocfs2-devel
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-07-20  3:28 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-21 13:43 [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2 Wengang Wang
2010-06-25  1:53 ` Wengang Wang
2010-07-05  9:59   ` Wengang Wang
2010-07-19 10:11 ` Wengang Wang
2010-07-19 23:14   ` Sunil Mushran
2010-07-20  3:28     ` Wengang Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.