ocfs2-devel.oss.oracle.com archive mirror
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-06-22 17:33 Srinivas Eeda
  2010-06-22 23:22 ` Sunil Mushran
  0 siblings, 1 reply; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-22 17:33 UTC (permalink / raw)
  To: ocfs2-devel

There are two problems in dlm_run_purgelist

1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.

2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.

This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
 next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c |   76 ++++++++++++++++++++--------------------------
 1 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..cb74689 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,26 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 	spin_unlock(&dlm->spinlock);
 }
 
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 			     struct dlm_lock_resource *res)
 {
 	int master;
 	int ret = 0;
 
-	spin_lock(&res->spinlock);
-	if (!__dlm_lockres_unused(res)) {
-		mlog(0, "%s:%.*s: tried to purge but not unused\n",
-		     dlm->name, res->lockname.len, res->lockname.name);
-		__dlm_print_one_lock_resource(res);
-		spin_unlock(&res->spinlock);
-		BUG();
-	}
-
-	if (res->state & DLM_LOCK_RES_MIGRATING) {
-		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
-		     "being remastered\n", dlm->name, res->lockname.len,
-		     res->lockname.name);
-		/* Re-add the lockres to the end of the purge list */
-		if (!list_empty(&res->purge)) {
-			list_del_init(&res->purge);
-			list_add_tail(&res->purge, &dlm->purge_list);
-		}
-		spin_unlock(&res->spinlock);
-		return 0;
-	}
+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&res->spinlock);
 
 	master = (res->owner == dlm->node_num);
 
 	if (!master)
 		res->state |= DLM_LOCK_RES_DROPPING_REF;
-	spin_unlock(&res->spinlock);
 
 	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
 	     res->lockname.name, master);
 
 	if (!master) {
 		/* drop spinlock...  retake below */
+		spin_unlock(&res->spinlock);
 		spin_unlock(&dlm->spinlock);
 
 		spin_lock(&res->spinlock);
@@ -208,31 +189,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
 		     dlm->name, res->lockname.len, res->lockname.name, ret);
 		spin_lock(&dlm->spinlock);
+		spin_lock(&res->spinlock);
 	}
 
-	spin_lock(&res->spinlock);
 	if (!list_empty(&res->purge)) {
 		mlog(0, "removing lockres %.*s:%p from purgelist, "
 		     "master = %d\n", res->lockname.len, res->lockname.name,
 		     res, master);
 		list_del_init(&res->purge);
-		spin_unlock(&res->spinlock);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
-	} else
-		spin_unlock(&res->spinlock);
+	}
 
-	__dlm_unhash_lockres(res);
+	if (__dlm_lockres_unused(res))
+		__dlm_unhash_lockres(res);
+	else {
+		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+		__dlm_print_one_lock_resource(res);
+		BUG();
+	}
 
 	/* lockres is not in the hash now.  drop the flag and wake up
 	 * any processes waiting in dlm_get_lock_resource. */
 	if (!master) {
-		spin_lock(&res->spinlock);
 		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
-	}
-	return 0;
+	} else
+		spin_unlock(&res->spinlock);
 }
 
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +236,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		lockres = list_entry(dlm->purge_list.next,
 				     struct dlm_lock_resource, purge);
 
-		/* Status of the lockres *might* change so double
-		 * check. If the lockres is unused, holding the dlm
-		 * spinlock will prevent people from getting and more
-		 * refs on it -- there's no need to keep the lockres
-		 * spinlock. */
 		spin_lock(&lockres->spinlock);
-		unused = __dlm_lockres_unused(lockres);
-		spin_unlock(&lockres->spinlock);
-
-		if (!unused)
-			continue;
 
 		purge_jiffies = lockres->last_used +
 			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +248,30 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			 * in tail order, we can stop at the first
 			 * unpurgable resource -- anyone added after
 			 * him will have a greater last_used value */
+			spin_unlock(&lockres->spinlock);
 			break;
 		}
 
+		/* Status of the lockres *might* change so double
+		 * check. If the lockres is unused, holding the dlm
+		 * spinlock will prevent people from getting and more
+		 * refs on it. */
+		unused = __dlm_lockres_unused(lockres);
+		if (!unused ||
+		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+			mlog(0, "lockres %s:%.*s: is in use or "
+			     "being remastered\n", dlm->name,
+			     lockres->lockname.len, lockres->lockname.name);
+			list_move_tail(&dlm->purge_list, &lockres->purge);
+			spin_unlock(&lockres->spinlock);
+			continue;
+		}
+
 		dlm_lockres_get(lockres);
 
 		/* This may drop and reacquire the dlm spinlock if it
 		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
+		dlm_purge_lockres(dlm, lockres);
 
 		dlm_lockres_put(lockres);
 
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-06-22 17:33 [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3 Srinivas Eeda
@ 2010-06-22 23:22 ` Sunil Mushran
  0 siblings, 0 replies; 10+ messages in thread
From: Sunil Mushran @ 2010-06-22 23:22 UTC (permalink / raw)
  To: ocfs2-devel

Comments inlined. Getting there. Some more cleanups.

On 06/22/2010 10:33 AM, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>   next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
>   fs/ocfs2/dlm/dlmthread.c |   76 ++++++++++++++++++++--------------------------
>   1 files changed, 33 insertions(+), 43 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..cb74689 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,26 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>   	spin_unlock(&dlm->spinlock);
>   }
>
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>   			     struct dlm_lock_resource *res)
>   {
>   	int master;
>   	int ret = 0;
>
> -	spin_lock(&res->spinlock);
> -	if (!__dlm_lockres_unused(res)) {
> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
> -		     dlm->name, res->lockname.len, res->lockname.name);
> -		__dlm_print_one_lock_resource(res);
> -		spin_unlock(&res->spinlock);
> -		BUG();
> -	}
> -
> -	if (res->state&  DLM_LOCK_RES_MIGRATING) {
> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> -		     "being remastered\n", dlm->name, res->lockname.len,
> -		     res->lockname.name);
> -		/* Re-add the lockres to the end of the purge list */
> -		if (!list_empty(&res->purge)) {
> -			list_del_init(&res->purge);
> -			list_add_tail(&res->purge,&dlm->purge_list);
> -		}
> -		spin_unlock(&res->spinlock);
> -		return 0;
> -	}
> +	assert_spin_locked(&dlm->spinlock);
> +	assert_spin_locked(&res->spinlock);
>
>   	master = (res->owner == dlm->node_num);
>
>   	if (!master)
>   		res->state |= DLM_LOCK_RES_DROPPING_REF;
> -	spin_unlock(&res->spinlock);
>
>   	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>   	     res->lockname.name, master);
>
>   	if (!master) {
>   		/* drop spinlock...  retake below */
> +		spin_unlock(&res->spinlock);
>   		spin_unlock(&dlm->spinlock);
>
>   		spin_lock(&res->spinlock);
>    

This can do with more cleanup.

+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&res->spinlock);

  	master = (res->owner == dlm->node_num);

- 	if (!master)
- 		res->state |= DLM_LOCK_RES_DROPPING_REF;
-	spin_unlock(&res->spinlock);

  	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
  	     res->lockname.name, master);

  	if (!master) {
+ 		res->state |= DLM_LOCK_RES_DROPPING_REF;
- 		/* drop spinlock...  retake below */
+		spin_unlock(&res->spinlock);
  		spin_unlock(&dlm->spinlock);

  		spin_lock(&res->spinlock);


> @@ -208,31 +189,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>   		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>   		     dlm->name, res->lockname.len, res->lockname.name, ret);
>   		spin_lock(&dlm->spinlock);
> +		spin_lock(&res->spinlock);
>   	}
>
> -	spin_lock(&res->spinlock);
>   	if (!list_empty(&res->purge)) {
>   		mlog(0, "removing lockres %.*s:%p from purgelist, "
>   		     "master = %d\n", res->lockname.len, res->lockname.name,
>   		     res, master);
>   		list_del_init(&res->purge);
> -		spin_unlock(&res->spinlock);
>   		dlm_lockres_put(res);
>   		dlm->purge_count--;
> -	} else
> -		spin_unlock(&res->spinlock);
> +	}
>
> -	__dlm_unhash_lockres(res);
> +	if (__dlm_lockres_unused(res))
> +		__dlm_unhash_lockres(res);
> +	else {
> +		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> +		     dlm->name, res->lockname.len, res->lockname.name);
> +		__dlm_print_one_lock_resource(res);
> +		BUG();
> +	}
>    

Blocks that BUG should be separated if possible. Improves readability.

+	if (!__dlm_lockres_unused(res)) {
+		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+		__dlm_print_one_lock_resource(res);
+		BUG();
+	}

	__dlm_unhash_lockres(res);


>
>   	/* lockres is not in the hash now.  drop the flag and wake up
>   	 * any processes waiting in dlm_get_lock_resource. */
>   	if (!master) {
> -		spin_lock(&res->spinlock);
>   		res->state&= ~DLM_LOCK_RES_DROPPING_REF;
>   		spin_unlock(&res->spinlock);
>   		wake_up(&res->wq);
> -	}
> -	return 0;
> +	} else
> +		spin_unlock(&res->spinlock);
>   }
>
>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> @@ -251,17 +236,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   		lockres = list_entry(dlm->purge_list.next,
>   				     struct dlm_lock_resource, purge);
>
> -		/* Status of the lockres *might* change so double
> -		 * check. If the lockres is unused, holding the dlm
> -		 * spinlock will prevent people from getting and more
> -		 * refs on it -- there's no need to keep the lockres
> -		 * spinlock. */
>   		spin_lock(&lockres->spinlock);
> -		unused = __dlm_lockres_unused(lockres);
> -		spin_unlock(&lockres->spinlock);
> -
> -		if (!unused)
> -			continue;
>
>   		purge_jiffies = lockres->last_used +
>   			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
> @@ -273,15 +248,30 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   			 * in tail order, we can stop at the first
>   			 * unpurgable resource -- anyone added after
>   			 * him will have a greater last_used value */
> +			spin_unlock(&lockres->spinlock);
>   			break;
>   		}
>
> +		/* Status of the lockres *might* change so double
> +		 * check. If the lockres is unused, holding the dlm
> +		 * spinlock will prevent people from getting and more
> +		 * refs on it. */
> +		unused = __dlm_lockres_unused(lockres);
> +		if (!unused ||
> +		    (lockres->state&  DLM_LOCK_RES_MIGRATING)) {
> +			mlog(0, "lockres %s:%.*s: is in use or "
> +			     "being remastered\n", dlm->name,
> +			     lockres->lockname.len, lockres->lockname.name);
> +			list_move_tail(&dlm->purge_list,&lockres->purge);
> +			spin_unlock(&lockres->spinlock);
> +			continue;
> +		}
> +
>   		dlm_lockres_get(lockres);
>
>   		/* This may drop and reacquire the dlm spinlock if it
>   		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> +		dlm_purge_lockres(dlm, lockres);
>
>   		dlm_lockres_put(lockres);
>
>    

This comment is dated. We don't migrate in purge_lockres.

- 		/* This may drop and reacquire the dlm spinlock if it
- 		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
+		dlm_purge_lockres(dlm, lockres);

  		dlm_lockres_put(lockres);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-06-23  5:48 Srinivas Eeda
  2010-06-23 17:00 ` Sunil Mushran
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-23  5:48 UTC (permalink / raw)
  To: ocfs2-devel

There are two problems in dlm_run_purgelist

1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.

2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.

This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
 next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c |   79 +++++++++++++++++++--------------------------
 1 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..6822f9a 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 	spin_unlock(&dlm->spinlock);
 }
 
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 			     struct dlm_lock_resource *res)
 {
 	int master;
 	int ret = 0;
 
-	spin_lock(&res->spinlock);
-	if (!__dlm_lockres_unused(res)) {
-		mlog(0, "%s:%.*s: tried to purge but not unused\n",
-		     dlm->name, res->lockname.len, res->lockname.name);
-		__dlm_print_one_lock_resource(res);
-		spin_unlock(&res->spinlock);
-		BUG();
-	}
-
-	if (res->state & DLM_LOCK_RES_MIGRATING) {
-		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
-		     "being remastered\n", dlm->name, res->lockname.len,
-		     res->lockname.name);
-		/* Re-add the lockres to the end of the purge list */
-		if (!list_empty(&res->purge)) {
-			list_del_init(&res->purge);
-			list_add_tail(&res->purge, &dlm->purge_list);
-		}
-		spin_unlock(&res->spinlock);
-		return 0;
-	}
+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&res->spinlock);
 
 	master = (res->owner == dlm->node_num);
 
-	if (!master)
-		res->state |= DLM_LOCK_RES_DROPPING_REF;
-	spin_unlock(&res->spinlock);
 
 	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
 	     res->lockname.name, master);
 
 	if (!master) {
+		res->state |= DLM_LOCK_RES_DROPPING_REF;
 		/* drop spinlock...  retake below */
+		spin_unlock(&res->spinlock);
 		spin_unlock(&dlm->spinlock);
 
 		spin_lock(&res->spinlock);
@@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
 		     dlm->name, res->lockname.len, res->lockname.name, ret);
 		spin_lock(&dlm->spinlock);
+		spin_lock(&res->spinlock);
 	}
 
-	spin_lock(&res->spinlock);
 	if (!list_empty(&res->purge)) {
 		mlog(0, "removing lockres %.*s:%p from purgelist, "
 		     "master = %d\n", res->lockname.len, res->lockname.name,
 		     res, master);
 		list_del_init(&res->purge);
-		spin_unlock(&res->spinlock);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
-	} else
-		spin_unlock(&res->spinlock);
+	}
+
+	if (!__dlm_lockres_unused) {
+		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+		__dlm_print_one_lock_resource(res);
+		BUG();
+	}
 
 	__dlm_unhash_lockres(res);
 
 	/* lockres is not in the hash now.  drop the flag and wake up
 	 * any processes waiting in dlm_get_lock_resource. */
 	if (!master) {
-		spin_lock(&res->spinlock);
 		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
-	}
-	return 0;
+	} else
+		spin_unlock(&res->spinlock);
 }
 
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		lockres = list_entry(dlm->purge_list.next,
 				     struct dlm_lock_resource, purge);
 
-		/* Status of the lockres *might* change so double
-		 * check. If the lockres is unused, holding the dlm
-		 * spinlock will prevent people from getting and more
-		 * refs on it -- there's no need to keep the lockres
-		 * spinlock. */
 		spin_lock(&lockres->spinlock);
-		unused = __dlm_lockres_unused(lockres);
-		spin_unlock(&lockres->spinlock);
-
-		if (!unused)
-			continue;
 
 		purge_jiffies = lockres->last_used +
 			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			 * in tail order, we can stop at the first
 			 * unpurgable resource -- anyone added after
 			 * him will have a greater last_used value */
+			spin_unlock(&lockres->spinlock);
 			break;
 		}
 
+		/* Status of the lockres *might* change so double
+		 * check. If the lockres is unused, holding the dlm
+		 * spinlock will prevent people from getting and more
+		 * refs on it. */
+		unused = __dlm_lockres_unused(lockres);
+		if (!unused ||
+		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+			mlog(0, "lockres %s:%.*s: is in use or "
+			     "being remastered\n", dlm->name,
+			     lockres->lockname.len, lockres->lockname.name);
+			list_move_tail(&dlm->purge_list, &lockres->purge);
+			spin_unlock(&lockres->spinlock);
+			continue;
+		}
+
 		dlm_lockres_get(lockres);
 
-		/* This may drop and reacquire the dlm spinlock if it
-		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
+		dlm_purge_lockres(dlm, lockres);
 
 		dlm_lockres_put(lockres);
 
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-06-23  5:48 Srinivas Eeda
@ 2010-06-23 17:00 ` Sunil Mushran
  2010-06-23 17:25   ` Srinivas Eeda
  2010-06-26 10:42 ` Wengang Wang
  2010-07-12 18:21 ` Joel Becker
  2 siblings, 1 reply; 10+ messages in thread
From: Sunil Mushran @ 2010-06-23 17:00 UTC (permalink / raw)
  To: ocfs2-devel

Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>


On 06/22/2010 10:48 PM, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>   next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
>   fs/ocfs2/dlm/dlmthread.c |   79 +++++++++++++++++++--------------------------
>   1 files changed, 33 insertions(+), 46 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..6822f9a 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>   	spin_unlock(&dlm->spinlock);
>   }
>
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>   			     struct dlm_lock_resource *res)
>   {
>   	int master;
>   	int ret = 0;
>
> -	spin_lock(&res->spinlock);
> -	if (!__dlm_lockres_unused(res)) {
> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
> -		     dlm->name, res->lockname.len, res->lockname.name);
> -		__dlm_print_one_lock_resource(res);
> -		spin_unlock(&res->spinlock);
> -		BUG();
> -	}
> -
> -	if (res->state&  DLM_LOCK_RES_MIGRATING) {
> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> -		     "being remastered\n", dlm->name, res->lockname.len,
> -		     res->lockname.name);
> -		/* Re-add the lockres to the end of the purge list */
> -		if (!list_empty(&res->purge)) {
> -			list_del_init(&res->purge);
> -			list_add_tail(&res->purge,&dlm->purge_list);
> -		}
> -		spin_unlock(&res->spinlock);
> -		return 0;
> -	}
> +	assert_spin_locked(&dlm->spinlock);
> +	assert_spin_locked(&res->spinlock);
>
>   	master = (res->owner == dlm->node_num);
>
> -	if (!master)
> -		res->state |= DLM_LOCK_RES_DROPPING_REF;
> -	spin_unlock(&res->spinlock);
>
>   	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>   	     res->lockname.name, master);
>
>   	if (!master) {
> +		res->state |= DLM_LOCK_RES_DROPPING_REF;
>   		/* drop spinlock...  retake below */
> +		spin_unlock(&res->spinlock);
>   		spin_unlock(&dlm->spinlock);
>
>   		spin_lock(&res->spinlock);
> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>   		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>   		     dlm->name, res->lockname.len, res->lockname.name, ret);
>   		spin_lock(&dlm->spinlock);
> +		spin_lock(&res->spinlock);
>   	}
>
> -	spin_lock(&res->spinlock);
>   	if (!list_empty(&res->purge)) {
>   		mlog(0, "removing lockres %.*s:%p from purgelist, "
>   		     "master = %d\n", res->lockname.len, res->lockname.name,
>   		     res, master);
>   		list_del_init(&res->purge);
> -		spin_unlock(&res->spinlock);
>   		dlm_lockres_put(res);
>   		dlm->purge_count--;
> -	} else
> -		spin_unlock(&res->spinlock);
> +	}
> +
> +	if (!__dlm_lockres_unused) {
> +		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> +		     dlm->name, res->lockname.len, res->lockname.name);
> +		__dlm_print_one_lock_resource(res);
> +		BUG();
> +	}
>
>   	__dlm_unhash_lockres(res);
>
>   	/* lockres is not in the hash now.  drop the flag and wake up
>   	 * any processes waiting in dlm_get_lock_resource. */
>   	if (!master) {
> -		spin_lock(&res->spinlock);
>   		res->state&= ~DLM_LOCK_RES_DROPPING_REF;
>   		spin_unlock(&res->spinlock);
>   		wake_up(&res->wq);
> -	}
> -	return 0;
> +	} else
> +		spin_unlock(&res->spinlock);
>   }
>
>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   		lockres = list_entry(dlm->purge_list.next,
>   				     struct dlm_lock_resource, purge);
>
> -		/* Status of the lockres *might* change so double
> -		 * check. If the lockres is unused, holding the dlm
> -		 * spinlock will prevent people from getting and more
> -		 * refs on it -- there's no need to keep the lockres
> -		 * spinlock. */
>   		spin_lock(&lockres->spinlock);
> -		unused = __dlm_lockres_unused(lockres);
> -		spin_unlock(&lockres->spinlock);
> -
> -		if (!unused)
> -			continue;
>
>   		purge_jiffies = lockres->last_used +
>   			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
> @@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>   			 * in tail order, we can stop at the first
>   			 * unpurgable resource -- anyone added after
>   			 * him will have a greater last_used value */
> +			spin_unlock(&lockres->spinlock);
>   			break;
>   		}
>
> +		/* Status of the lockres *might* change so double
> +		 * check. If the lockres is unused, holding the dlm
> +		 * spinlock will prevent people from getting and more
> +		 * refs on it. */
> +		unused = __dlm_lockres_unused(lockres);
> +		if (!unused ||
> +		    (lockres->state&  DLM_LOCK_RES_MIGRATING)) {
> +			mlog(0, "lockres %s:%.*s: is in use or "
> +			     "being remastered\n", dlm->name,
> +			     lockres->lockname.len, lockres->lockname.name);
> +			list_move_tail(&dlm->purge_list,&lockres->purge);
> +			spin_unlock(&lockres->spinlock);
> +			continue;
> +		}
> +
>   		dlm_lockres_get(lockres);
>
> -		/* This may drop and reacquire the dlm spinlock if it
> -		 * has to do migration. */
> -		if (dlm_purge_lockres(dlm, lockres))
> -			BUG();
> +		dlm_purge_lockres(dlm, lockres);
>
>   		dlm_lockres_put(lockres);
>
>    

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-06-23 17:00 ` Sunil Mushran
@ 2010-06-23 17:25   ` Srinivas Eeda
  0 siblings, 0 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-23 17:25 UTC (permalink / raw)
  To: ocfs2-devel

Sunil, Joel, Wengang. Thanks for reviewing the patch and your comments.

On 6/23/2010 10:00 AM, Sunil Mushran wrote:
> Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
>
>
> On 06/22/2010 10:48 PM, Srinivas Eeda wrote:
>> There are two problems in dlm_run_purgelist
>>
>> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying 
>> to purge
>> the same lockres instead of trying the next lockres.
>>
>> 2. When a lockres is found unused, dlm_run_purgelist releases lockres 
>> spinlock
>> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
>> spinlock is reacquired but in this window lockres can get reused. 
>> This leads
>> to BUG.
>>
>> This patch modifies dlm_run_purgelist to skip lockres if it's in use 
>> and purge
>>   next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before 
>> releasing the
>> lockres spinlock protecting it from getting reused.
>>
>> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
>> ---
>>   fs/ocfs2/dlm/dlmthread.c |   79 
>> +++++++++++++++++++--------------------------
>>   1 files changed, 33 insertions(+), 46 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index 11a6d1f..6822f9a 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>>       spin_unlock(&dlm->spinlock);
>>   }
>>
>> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>>                    struct dlm_lock_resource *res)
>>   {
>>       int master;
>>       int ret = 0;
>>
>> -    spin_lock(&res->spinlock);
>> -    if (!__dlm_lockres_unused(res)) {
>> -        mlog(0, "%s:%.*s: tried to purge but not unused\n",
>> -             dlm->name, res->lockname.len, res->lockname.name);
>> -        __dlm_print_one_lock_resource(res);
>> -        spin_unlock(&res->spinlock);
>> -        BUG();
>> -    }
>> -
>> -    if (res->state&  DLM_LOCK_RES_MIGRATING) {
>> -        mlog(0, "%s:%.*s: Delay dropref as this lockres is "
>> -             "being remastered\n", dlm->name, res->lockname.len,
>> -             res->lockname.name);
>> -        /* Re-add the lockres to the end of the purge list */
>> -        if (!list_empty(&res->purge)) {
>> -            list_del_init(&res->purge);
>> -            list_add_tail(&res->purge,&dlm->purge_list);
>> -        }
>> -        spin_unlock(&res->spinlock);
>> -        return 0;
>> -    }
>> +    assert_spin_locked(&dlm->spinlock);
>> +    assert_spin_locked(&res->spinlock);
>>
>>       master = (res->owner == dlm->node_num);
>>
>> -    if (!master)
>> -        res->state |= DLM_LOCK_RES_DROPPING_REF;
>> -    spin_unlock(&res->spinlock);
>>
>>       mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>>            res->lockname.name, master);
>>
>>       if (!master) {
>> +        res->state |= DLM_LOCK_RES_DROPPING_REF;
>>           /* drop spinlock...  retake below */
>> +        spin_unlock(&res->spinlock);
>>           spin_unlock(&dlm->spinlock);
>>
>>           spin_lock(&res->spinlock);
>> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>>           mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>>                dlm->name, res->lockname.len, res->lockname.name, ret);
>>           spin_lock(&dlm->spinlock);
>> +        spin_lock(&res->spinlock);
>>       }
>>
>> -    spin_lock(&res->spinlock);
>>       if (!list_empty(&res->purge)) {
>>           mlog(0, "removing lockres %.*s:%p from purgelist, "
>>                "master = %d\n", res->lockname.len, res->lockname.name,
>>                res, master);
>>           list_del_init(&res->purge);
>> -        spin_unlock(&res->spinlock);
>>           dlm_lockres_put(res);
>>           dlm->purge_count--;
>> -    } else
>> -        spin_unlock(&res->spinlock);
>> +    }
>> +
>> +    if (!__dlm_lockres_unused) {
>> +        mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
>> +             dlm->name, res->lockname.len, res->lockname.name);
>> +        __dlm_print_one_lock_resource(res);
>> +        BUG();
>> +    }
>>
>>       __dlm_unhash_lockres(res);
>>
>>       /* lockres is not in the hash now.  drop the flag and wake up
>>        * any processes waiting in dlm_get_lock_resource. */
>>       if (!master) {
>> -        spin_lock(&res->spinlock);
>>           res->state&= ~DLM_LOCK_RES_DROPPING_REF;
>>           spin_unlock(&res->spinlock);
>>           wake_up(&res->wq);
>> -    }
>> -    return 0;
>> +    } else
>> +        spin_unlock(&res->spinlock);
>>   }
>>
>>   static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>> @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt 
>> *dlm,
>>           lockres = list_entry(dlm->purge_list.next,
>>                        struct dlm_lock_resource, purge);
>>
>> -        /* Status of the lockres *might* change so double
>> -         * check. If the lockres is unused, holding the dlm
>> -         * spinlock will prevent people from getting and more
>> -         * refs on it -- there's no need to keep the lockres
>> -         * spinlock. */
>>           spin_lock(&lockres->spinlock);
>> -        unused = __dlm_lockres_unused(lockres);
>> -        spin_unlock(&lockres->spinlock);
>> -
>> -        if (!unused)
>> -            continue;
>>
>>           purge_jiffies = lockres->last_used +
>>               msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>> @@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt 
>> *dlm,
>>                * in tail order, we can stop at the first
>>                * unpurgable resource -- anyone added after
>>                * him will have a greater last_used value */
>> +            spin_unlock(&lockres->spinlock);
>>               break;
>>           }
>>
>> +        /* Status of the lockres *might* change so double
>> +         * check. If the lockres is unused, holding the dlm
>> +         * spinlock will prevent people from getting and more
>> +         * refs on it. */
>> +        unused = __dlm_lockres_unused(lockres);
>> +        if (!unused ||
>> +            (lockres->state&  DLM_LOCK_RES_MIGRATING)) {
>> +            mlog(0, "lockres %s:%.*s: is in use or "
>> +                 "being remastered\n", dlm->name,
>> +                 lockres->lockname.len, lockres->lockname.name);
>> +            list_move_tail(&dlm->purge_list,&lockres->purge);
>> +            spin_unlock(&lockres->spinlock);
>> +            continue;
>> +        }
>> +
>>           dlm_lockres_get(lockres);
>>
>> -        /* This may drop and reacquire the dlm spinlock if it
>> -         * has to do migration. */
>> -        if (dlm_purge_lockres(dlm, lockres))
>> -            BUG();
>> +        dlm_purge_lockres(dlm, lockres);
>>
>>           dlm_lockres_put(lockres);
>>
>>    
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-06-23  5:48 Srinivas Eeda
  2010-06-23 17:00 ` Sunil Mushran
@ 2010-06-26 10:42 ` Wengang Wang
  2010-07-12 18:21 ` Joel Becker
  2 siblings, 0 replies; 10+ messages in thread
From: Wengang Wang @ 2010-06-26 10:42 UTC (permalink / raw)
  To: ocfs2-devel

A typo in your patch.

On 10-06-22 22:48, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
> 
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
> 
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
> 
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
> 
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
> ---
>  fs/ocfs2/dlm/dlmthread.c |   79 +++++++++++++++++++--------------------------
>  1 files changed, 33 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..6822f9a 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>  	spin_unlock(&dlm->spinlock);
>  }
>  
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>  			     struct dlm_lock_resource *res)
>  {
>  	int master;
>  	int ret = 0;
>  
> -	spin_lock(&res->spinlock);
> -	if (!__dlm_lockres_unused(res)) {
> -		mlog(0, "%s:%.*s: tried to purge but not unused\n",
> -		     dlm->name, res->lockname.len, res->lockname.name);
> -		__dlm_print_one_lock_resource(res);
> -		spin_unlock(&res->spinlock);
> -		BUG();
> -	}
> -
> -	if (res->state & DLM_LOCK_RES_MIGRATING) {
> -		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> -		     "being remastered\n", dlm->name, res->lockname.len,
> -		     res->lockname.name);
> -		/* Re-add the lockres to the end of the purge list */
> -		if (!list_empty(&res->purge)) {
> -			list_del_init(&res->purge);
> -			list_add_tail(&res->purge, &dlm->purge_list);
> -		}
> -		spin_unlock(&res->spinlock);
> -		return 0;
> -	}
> +	assert_spin_locked(&dlm->spinlock);
> +	assert_spin_locked(&res->spinlock);
>  
>  	master = (res->owner == dlm->node_num);
>  
> -	if (!master)
> -		res->state |= DLM_LOCK_RES_DROPPING_REF;
> -	spin_unlock(&res->spinlock);
>  
>  	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>  	     res->lockname.name, master);
>  
>  	if (!master) {
> +		res->state |= DLM_LOCK_RES_DROPPING_REF;
>  		/* drop spinlock...  retake below */
> +		spin_unlock(&res->spinlock);
>  		spin_unlock(&dlm->spinlock);
>  
>  		spin_lock(&res->spinlock);
> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>  		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>  		     dlm->name, res->lockname.len, res->lockname.name, ret);
>  		spin_lock(&dlm->spinlock);
> +		spin_lock(&res->spinlock);
>  	}
>  
> -	spin_lock(&res->spinlock);
>  	if (!list_empty(&res->purge)) {
>  		mlog(0, "removing lockres %.*s:%p from purgelist, "
>  		     "master = %d\n", res->lockname.len, res->lockname.name,
>  		     res, master);
>  		list_del_init(&res->purge);
> -		spin_unlock(&res->spinlock);
>  		dlm_lockres_put(res);
>  		dlm->purge_count--;
> -	} else
> -		spin_unlock(&res->spinlock);
> +	}
> +
> +	if (!__dlm_lockres_unused) {

should not be
+       if (!__dlm_lockres_unused(res)) {

regards,
wengang.

> +		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> +		     dlm->name, res->lockname.len, res->lockname.name);
> +		__dlm_print_one_lock_resource(res);
> +		BUG();
> +	}
>  
>  	__dlm_unhash_lockres(res);
>  

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-06-23  5:48 Srinivas Eeda
  2010-06-23 17:00 ` Sunil Mushran
  2010-06-26 10:42 ` Wengang Wang
@ 2010-07-12 18:21 ` Joel Becker
  2010-07-12 22:24   ` Srinivas Eeda
  2 siblings, 1 reply; 10+ messages in thread
From: Joel Becker @ 2010-07-12 18:21 UTC (permalink / raw)
  To: ocfs2-devel

On Tue, Jun 22, 2010 at 10:48:28PM -0700, Srinivas Eeda wrote:
> +	if (!__dlm_lockres_unused) {
> +		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> +		     dlm->name, res->lockname.len, res->lockname.name);
> +		__dlm_print_one_lock_resource(res);
> +		BUG();
> +	}

/build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c: In function ?dlm_purge_lockres?:
/build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c:203: warning: the address of ?__dlm_lockres_unused? will always evaluate as ?true?

	Was this even tested?  I'm leaving this patch out of 'fixes'
until corrected and tested.

Joel

-- 

"Reader, suppose you were and idiot.  And suppose you were a member of
 Congress.  But I repeat myself."
	- Mark Twain

Joel Becker
Consulting Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-07-12 22:16 Srinivas Eeda
  2010-07-12 22:39 ` Joel Becker
  0 siblings, 1 reply; 10+ messages in thread
From: Srinivas Eeda @ 2010-07-12 22:16 UTC (permalink / raw)
  To: ocfs2-devel

There are two problems in dlm_run_purgelist

1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.

2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.

This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
 next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c |   79 +++++++++++++++++++--------------------------
 1 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..76ab4aa 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 	spin_unlock(&dlm->spinlock);
 }
 
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 			     struct dlm_lock_resource *res)
 {
 	int master;
 	int ret = 0;
 
-	spin_lock(&res->spinlock);
-	if (!__dlm_lockres_unused(res)) {
-		mlog(0, "%s:%.*s: tried to purge but not unused\n",
-		     dlm->name, res->lockname.len, res->lockname.name);
-		__dlm_print_one_lock_resource(res);
-		spin_unlock(&res->spinlock);
-		BUG();
-	}
-
-	if (res->state & DLM_LOCK_RES_MIGRATING) {
-		mlog(0, "%s:%.*s: Delay dropref as this lockres is "
-		     "being remastered\n", dlm->name, res->lockname.len,
-		     res->lockname.name);
-		/* Re-add the lockres to the end of the purge list */
-		if (!list_empty(&res->purge)) {
-			list_del_init(&res->purge);
-			list_add_tail(&res->purge, &dlm->purge_list);
-		}
-		spin_unlock(&res->spinlock);
-		return 0;
-	}
+	assert_spin_locked(&dlm->spinlock);
+	assert_spin_locked(&res->spinlock);
 
 	master = (res->owner == dlm->node_num);
 
-	if (!master)
-		res->state |= DLM_LOCK_RES_DROPPING_REF;
-	spin_unlock(&res->spinlock);
 
 	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
 	     res->lockname.name, master);
 
 	if (!master) {
+		res->state |= DLM_LOCK_RES_DROPPING_REF;
 		/* drop spinlock...  retake below */
+		spin_unlock(&res->spinlock);
 		spin_unlock(&dlm->spinlock);
 
 		spin_lock(&res->spinlock);
@@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
 		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
 		     dlm->name, res->lockname.len, res->lockname.name, ret);
 		spin_lock(&dlm->spinlock);
+		spin_lock(&res->spinlock);
 	}
 
-	spin_lock(&res->spinlock);
 	if (!list_empty(&res->purge)) {
 		mlog(0, "removing lockres %.*s:%p from purgelist, "
 		     "master = %d\n", res->lockname.len, res->lockname.name,
 		     res, master);
 		list_del_init(&res->purge);
-		spin_unlock(&res->spinlock);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
-	} else
-		spin_unlock(&res->spinlock);
+	}
+
+	if (!__dlm_lockres_unused(res)) {
+		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
+		__dlm_print_one_lock_resource(res);
+		BUG();
+	}
 
 	__dlm_unhash_lockres(res);
 
 	/* lockres is not in the hash now.  drop the flag and wake up
 	 * any processes waiting in dlm_get_lock_resource. */
 	if (!master) {
-		spin_lock(&res->spinlock);
 		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
 		spin_unlock(&res->spinlock);
 		wake_up(&res->wq);
-	}
-	return 0;
+	} else
+		spin_unlock(&res->spinlock);
 }
 
 static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		lockres = list_entry(dlm->purge_list.next,
 				     struct dlm_lock_resource, purge);
 
-		/* Status of the lockres *might* change so double
-		 * check. If the lockres is unused, holding the dlm
-		 * spinlock will prevent people from getting and more
-		 * refs on it -- there's no need to keep the lockres
-		 * spinlock. */
 		spin_lock(&lockres->spinlock);
-		unused = __dlm_lockres_unused(lockres);
-		spin_unlock(&lockres->spinlock);
-
-		if (!unused)
-			continue;
 
 		purge_jiffies = lockres->last_used +
 			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			 * in tail order, we can stop at the first
 			 * unpurgable resource -- anyone added after
 			 * him will have a greater last_used value */
+			spin_unlock(&lockres->spinlock);
 			break;
 		}
 
+		/* Status of the lockres *might* change so double
+		 * check. If the lockres is unused, holding the dlm
+		 * spinlock will prevent people from getting and more
+		 * refs on it. */
+		unused = __dlm_lockres_unused(lockres);
+		if (!unused ||
+		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+			mlog(0, "lockres %s:%.*s: is in use or "
+			     "being remastered\n", dlm->name,
+			     lockres->lockname.len, lockres->lockname.name);
+			list_move_tail(&dlm->purge_list, &lockres->purge);
+			spin_unlock(&lockres->spinlock);
+			continue;
+		}
+
 		dlm_lockres_get(lockres);
 
-		/* This may drop and reacquire the dlm spinlock if it
-		 * has to do migration. */
-		if (dlm_purge_lockres(dlm, lockres))
-			BUG();
+		dlm_purge_lockres(dlm, lockres);
 
 		dlm_lockres_put(lockres);
 
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-07-12 18:21 ` Joel Becker
@ 2010-07-12 22:24   ` Srinivas Eeda
  0 siblings, 0 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-07-12 22:24 UTC (permalink / raw)
  To: ocfs2-devel

Joel Becker wrote:
> On Tue, Jun 22, 2010 at 10:48:28PM -0700, Srinivas Eeda wrote:
>   
>> +	if (!__dlm_lockres_unused) {
>> +		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
>> +		     dlm->name, res->lockname.len, res->lockname.name);
>> +		__dlm_print_one_lock_resource(res);
>> +		BUG();
>> +	}
>>     
>
> /build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c: In function ?dlm_purge_lockres?:
> /build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c:203: warning: the address of ?__dlm_lockres_unused? will always evaluate as ?true?
>
> 	Was this even tested?  I'm leaving this patch out of 'fixes'
> until corrected and tested.
>   
Sorry, I had the typo while making the review changes. I ran the usual 
tests but that  didn't catch this problem. I should have payed more 
attention to the build log.

I made the change and tested it, will send you the modified patch
> Joel
>
>   

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
  2010-07-12 22:16 Srinivas Eeda
@ 2010-07-12 22:39 ` Joel Becker
  0 siblings, 0 replies; 10+ messages in thread
From: Joel Becker @ 2010-07-12 22:39 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jul 12, 2010 at 03:16:28PM -0700, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
> 
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
> 
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
> 
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
>  next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
> 
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
> Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>

	Has this been tested with a testcase that would fail before the
change?
	Also, don't add SoB lines for kernel patches.  You can add
Acked-by for Sunil, but SoB only works for the chain that goes upstream.

Joel

-- 

"War doesn't determine who's right; war determines who's left."

Joel Becker
Consulting Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-07-12 22:39 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-22 17:33 [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3 Srinivas Eeda
2010-06-22 23:22 ` Sunil Mushran
  -- strict thread matches above, loose matches on Subject: below --
2010-06-23  5:48 Srinivas Eeda
2010-06-23 17:00 ` Sunil Mushran
2010-06-23 17:25   ` Srinivas Eeda
2010-06-26 10:42 ` Wengang Wang
2010-07-12 18:21 ` Joel Becker
2010-07-12 22:24   ` Srinivas Eeda
2010-07-12 22:16 Srinivas Eeda
2010-07-12 22:39 ` Joel Becker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).