* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-06-22 17:33 Srinivas Eeda
2010-06-22 23:22 ` Sunil Mushran
0 siblings, 1 reply; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-22 17:33 UTC (permalink / raw)
To: ocfs2-devel
There are two problems in dlm_run_purgelist
1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.
2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.
This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.
Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
fs/ocfs2/dlm/dlmthread.c | 76 ++++++++++++++++++++--------------------------
1 files changed, 33 insertions(+), 43 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..cb74689 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,26 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
spin_unlock(&dlm->spinlock);
}
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
int master;
int ret = 0;
- spin_lock(&res->spinlock);
- if (!__dlm_lockres_unused(res)) {
- mlog(0, "%s:%.*s: tried to purge but not unused\n",
- dlm->name, res->lockname.len, res->lockname.name);
- __dlm_print_one_lock_resource(res);
- spin_unlock(&res->spinlock);
- BUG();
- }
-
- if (res->state & DLM_LOCK_RES_MIGRATING) {
- mlog(0, "%s:%.*s: Delay dropref as this lockres is "
- "being remastered\n", dlm->name, res->lockname.len,
- res->lockname.name);
- /* Re-add the lockres to the end of the purge list */
- if (!list_empty(&res->purge)) {
- list_del_init(&res->purge);
- list_add_tail(&res->purge, &dlm->purge_list);
- }
- spin_unlock(&res->spinlock);
- return 0;
- }
+ assert_spin_locked(&dlm->spinlock);
+ assert_spin_locked(&res->spinlock);
master = (res->owner == dlm->node_num);
if (!master)
res->state |= DLM_LOCK_RES_DROPPING_REF;
- spin_unlock(&res->spinlock);
mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
res->lockname.name, master);
if (!master) {
/* drop spinlock... retake below */
+ spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -208,31 +189,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
dlm->name, res->lockname.len, res->lockname.name, ret);
spin_lock(&dlm->spinlock);
+ spin_lock(&res->spinlock);
}
- spin_lock(&res->spinlock);
if (!list_empty(&res->purge)) {
mlog(0, "removing lockres %.*s:%p from purgelist, "
"master = %d\n", res->lockname.len, res->lockname.name,
res, master);
list_del_init(&res->purge);
- spin_unlock(&res->spinlock);
dlm_lockres_put(res);
dlm->purge_count--;
- } else
- spin_unlock(&res->spinlock);
+ }
- __dlm_unhash_lockres(res);
+ if (__dlm_lockres_unused(res))
+ __dlm_unhash_lockres(res);
+ else {
+ mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
/* lockres is not in the hash now. drop the flag and wake up
* any processes waiting in dlm_get_lock_resource. */
if (!master) {
- spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock);
wake_up(&res->wq);
- }
- return 0;
+ } else
+ spin_unlock(&res->spinlock);
}
static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +236,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
lockres = list_entry(dlm->purge_list.next,
struct dlm_lock_resource, purge);
- /* Status of the lockres *might* change so double
- * check. If the lockres is unused, holding the dlm
- * spinlock will prevent people from getting and more
- * refs on it -- there's no need to keep the lockres
- * spinlock. */
spin_lock(&lockres->spinlock);
- unused = __dlm_lockres_unused(lockres);
- spin_unlock(&lockres->spinlock);
-
- if (!unused)
- continue;
purge_jiffies = lockres->last_used +
msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +248,30 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* in tail order, we can stop at the first
* unpurgable resource -- anyone added after
* him will have a greater last_used value */
+ spin_unlock(&lockres->spinlock);
break;
}
+ /* Status of the lockres *might* change so double
+ * check. If the lockres is unused, holding the dlm
+ * spinlock will prevent people from getting and more
+ * refs on it. */
+ unused = __dlm_lockres_unused(lockres);
+ if (!unused ||
+ (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+ mlog(0, "lockres %s:%.*s: is in use or "
+ "being remastered\n", dlm->name,
+ lockres->lockname.len, lockres->lockname.name);
+ list_move_tail(&dlm->purge_list, &lockres->purge);
+ spin_unlock(&lockres->spinlock);
+ continue;
+ }
+
dlm_lockres_get(lockres);
/* This may drop and reacquire the dlm spinlock if it
* has to do migration. */
- if (dlm_purge_lockres(dlm, lockres))
- BUG();
+ dlm_purge_lockres(dlm, lockres);
dlm_lockres_put(lockres);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-06-22 17:33 [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3 Srinivas Eeda
@ 2010-06-22 23:22 ` Sunil Mushran
0 siblings, 0 replies; 10+ messages in thread
From: Sunil Mushran @ 2010-06-22 23:22 UTC (permalink / raw)
To: ocfs2-devel
Comments inlined. Getting there. Some more cleanups.
On 06/22/2010 10:33 AM, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
> next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
> fs/ocfs2/dlm/dlmthread.c | 76 ++++++++++++++++++++--------------------------
> 1 files changed, 33 insertions(+), 43 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..cb74689 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,26 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
> spin_unlock(&dlm->spinlock);
> }
>
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
> struct dlm_lock_resource *res)
> {
> int master;
> int ret = 0;
>
> - spin_lock(&res->spinlock);
> - if (!__dlm_lockres_unused(res)) {
> - mlog(0, "%s:%.*s: tried to purge but not unused\n",
> - dlm->name, res->lockname.len, res->lockname.name);
> - __dlm_print_one_lock_resource(res);
> - spin_unlock(&res->spinlock);
> - BUG();
> - }
> -
> - if (res->state& DLM_LOCK_RES_MIGRATING) {
> - mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> - "being remastered\n", dlm->name, res->lockname.len,
> - res->lockname.name);
> - /* Re-add the lockres to the end of the purge list */
> - if (!list_empty(&res->purge)) {
> - list_del_init(&res->purge);
> - list_add_tail(&res->purge,&dlm->purge_list);
> - }
> - spin_unlock(&res->spinlock);
> - return 0;
> - }
> + assert_spin_locked(&dlm->spinlock);
> + assert_spin_locked(&res->spinlock);
>
> master = (res->owner == dlm->node_num);
>
> if (!master)
> res->state |= DLM_LOCK_RES_DROPPING_REF;
> - spin_unlock(&res->spinlock);
>
> mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
> res->lockname.name, master);
>
> if (!master) {
> /* drop spinlock... retake below */
> + spin_unlock(&res->spinlock);
> spin_unlock(&dlm->spinlock);
>
> spin_lock(&res->spinlock);
>
This can do with more cleanup.
+ assert_spin_locked(&dlm->spinlock);
+ assert_spin_locked(&res->spinlock);
master = (res->owner == dlm->node_num);
- if (!master)
- res->state |= DLM_LOCK_RES_DROPPING_REF;
- spin_unlock(&res->spinlock);
mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
res->lockname.name, master);
if (!master) {
+ res->state |= DLM_LOCK_RES_DROPPING_REF;
- /* drop spinlock... retake below */
+ spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
spin_lock(&res->spinlock);
> @@ -208,31 +189,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
> dlm->name, res->lockname.len, res->lockname.name, ret);
> spin_lock(&dlm->spinlock);
> + spin_lock(&res->spinlock);
> }
>
> - spin_lock(&res->spinlock);
> if (!list_empty(&res->purge)) {
> mlog(0, "removing lockres %.*s:%p from purgelist, "
> "master = %d\n", res->lockname.len, res->lockname.name,
> res, master);
> list_del_init(&res->purge);
> - spin_unlock(&res->spinlock);
> dlm_lockres_put(res);
> dlm->purge_count--;
> - } else
> - spin_unlock(&res->spinlock);
> + }
>
> - __dlm_unhash_lockres(res);
> + if (__dlm_lockres_unused(res))
> + __dlm_unhash_lockres(res);
> + else {
> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> + dlm->name, res->lockname.len, res->lockname.name);
> + __dlm_print_one_lock_resource(res);
> + BUG();
> + }
>
Blocks that BUG should be separated if possible. Improves readability.
+ if (!__dlm_lockres_unused(res)) {
+ mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
__dlm_unhash_lockres(res);
>
> /* lockres is not in the hash now. drop the flag and wake up
> * any processes waiting in dlm_get_lock_resource. */
> if (!master) {
> - spin_lock(&res->spinlock);
> res->state&= ~DLM_LOCK_RES_DROPPING_REF;
> spin_unlock(&res->spinlock);
> wake_up(&res->wq);
> - }
> - return 0;
> + } else
> + spin_unlock(&res->spinlock);
> }
>
> static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> @@ -251,17 +236,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> lockres = list_entry(dlm->purge_list.next,
> struct dlm_lock_resource, purge);
>
> - /* Status of the lockres *might* change so double
> - * check. If the lockres is unused, holding the dlm
> - * spinlock will prevent people from getting and more
> - * refs on it -- there's no need to keep the lockres
> - * spinlock. */
> spin_lock(&lockres->spinlock);
> - unused = __dlm_lockres_unused(lockres);
> - spin_unlock(&lockres->spinlock);
> -
> - if (!unused)
> - continue;
>
> purge_jiffies = lockres->last_used +
> msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
> @@ -273,15 +248,30 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> * in tail order, we can stop at the first
> * unpurgable resource -- anyone added after
> * him will have a greater last_used value */
> + spin_unlock(&lockres->spinlock);
> break;
> }
>
> + /* Status of the lockres *might* change so double
> + * check. If the lockres is unused, holding the dlm
> + * spinlock will prevent people from getting and more
> + * refs on it. */
> + unused = __dlm_lockres_unused(lockres);
> + if (!unused ||
> + (lockres->state& DLM_LOCK_RES_MIGRATING)) {
> + mlog(0, "lockres %s:%.*s: is in use or "
> + "being remastered\n", dlm->name,
> + lockres->lockname.len, lockres->lockname.name);
> + list_move_tail(&dlm->purge_list,&lockres->purge);
> + spin_unlock(&lockres->spinlock);
> + continue;
> + }
> +
> dlm_lockres_get(lockres);
>
> /* This may drop and reacquire the dlm spinlock if it
> * has to do migration. */
> - if (dlm_purge_lockres(dlm, lockres))
> - BUG();
> + dlm_purge_lockres(dlm, lockres);
>
> dlm_lockres_put(lockres);
>
>
This comment is dated. We don't migrate in purge_lockres.
- /* This may drop and reacquire the dlm spinlock if it
- * has to do migration. */
- if (dlm_purge_lockres(dlm, lockres))
- BUG();
+ dlm_purge_lockres(dlm, lockres);
dlm_lockres_put(lockres);
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-06-23 5:48 Srinivas Eeda
2010-06-23 17:00 ` Sunil Mushran
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-23 5:48 UTC (permalink / raw)
To: ocfs2-devel
There are two problems in dlm_run_purgelist
1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.
2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.
This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.
Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
---
fs/ocfs2/dlm/dlmthread.c | 79 +++++++++++++++++++--------------------------
1 files changed, 33 insertions(+), 46 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..6822f9a 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
spin_unlock(&dlm->spinlock);
}
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
int master;
int ret = 0;
- spin_lock(&res->spinlock);
- if (!__dlm_lockres_unused(res)) {
- mlog(0, "%s:%.*s: tried to purge but not unused\n",
- dlm->name, res->lockname.len, res->lockname.name);
- __dlm_print_one_lock_resource(res);
- spin_unlock(&res->spinlock);
- BUG();
- }
-
- if (res->state & DLM_LOCK_RES_MIGRATING) {
- mlog(0, "%s:%.*s: Delay dropref as this lockres is "
- "being remastered\n", dlm->name, res->lockname.len,
- res->lockname.name);
- /* Re-add the lockres to the end of the purge list */
- if (!list_empty(&res->purge)) {
- list_del_init(&res->purge);
- list_add_tail(&res->purge, &dlm->purge_list);
- }
- spin_unlock(&res->spinlock);
- return 0;
- }
+ assert_spin_locked(&dlm->spinlock);
+ assert_spin_locked(&res->spinlock);
master = (res->owner == dlm->node_num);
- if (!master)
- res->state |= DLM_LOCK_RES_DROPPING_REF;
- spin_unlock(&res->spinlock);
mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
res->lockname.name, master);
if (!master) {
+ res->state |= DLM_LOCK_RES_DROPPING_REF;
/* drop spinlock... retake below */
+ spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
dlm->name, res->lockname.len, res->lockname.name, ret);
spin_lock(&dlm->spinlock);
+ spin_lock(&res->spinlock);
}
- spin_lock(&res->spinlock);
if (!list_empty(&res->purge)) {
mlog(0, "removing lockres %.*s:%p from purgelist, "
"master = %d\n", res->lockname.len, res->lockname.name,
res, master);
list_del_init(&res->purge);
- spin_unlock(&res->spinlock);
dlm_lockres_put(res);
dlm->purge_count--;
- } else
- spin_unlock(&res->spinlock);
+ }
+
+ if (!__dlm_lockres_unused) {
+ mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
__dlm_unhash_lockres(res);
/* lockres is not in the hash now. drop the flag and wake up
* any processes waiting in dlm_get_lock_resource. */
if (!master) {
- spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock);
wake_up(&res->wq);
- }
- return 0;
+ } else
+ spin_unlock(&res->spinlock);
}
static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
lockres = list_entry(dlm->purge_list.next,
struct dlm_lock_resource, purge);
- /* Status of the lockres *might* change so double
- * check. If the lockres is unused, holding the dlm
- * spinlock will prevent people from getting and more
- * refs on it -- there's no need to keep the lockres
- * spinlock. */
spin_lock(&lockres->spinlock);
- unused = __dlm_lockres_unused(lockres);
- spin_unlock(&lockres->spinlock);
-
- if (!unused)
- continue;
purge_jiffies = lockres->last_used +
msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* in tail order, we can stop at the first
* unpurgable resource -- anyone added after
* him will have a greater last_used value */
+ spin_unlock(&lockres->spinlock);
break;
}
+ /* Status of the lockres *might* change so double
+ * check. If the lockres is unused, holding the dlm
+ * spinlock will prevent people from getting and more
+ * refs on it. */
+ unused = __dlm_lockres_unused(lockres);
+ if (!unused ||
+ (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+ mlog(0, "lockres %s:%.*s: is in use or "
+ "being remastered\n", dlm->name,
+ lockres->lockname.len, lockres->lockname.name);
+ list_move_tail(&dlm->purge_list, &lockres->purge);
+ spin_unlock(&lockres->spinlock);
+ continue;
+ }
+
dlm_lockres_get(lockres);
- /* This may drop and reacquire the dlm spinlock if it
- * has to do migration. */
- if (dlm_purge_lockres(dlm, lockres))
- BUG();
+ dlm_purge_lockres(dlm, lockres);
dlm_lockres_put(lockres);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-06-23 5:48 Srinivas Eeda
@ 2010-06-23 17:00 ` Sunil Mushran
2010-06-23 17:25 ` Srinivas Eeda
2010-06-26 10:42 ` Wengang Wang
2010-07-12 18:21 ` Joel Becker
2 siblings, 1 reply; 10+ messages in thread
From: Sunil Mushran @ 2010-06-23 17:00 UTC (permalink / raw)
To: ocfs2-devel
Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
On 06/22/2010 10:48 PM, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
> next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
> ---
> fs/ocfs2/dlm/dlmthread.c | 79 +++++++++++++++++++--------------------------
> 1 files changed, 33 insertions(+), 46 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..6822f9a 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
> spin_unlock(&dlm->spinlock);
> }
>
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
> struct dlm_lock_resource *res)
> {
> int master;
> int ret = 0;
>
> - spin_lock(&res->spinlock);
> - if (!__dlm_lockres_unused(res)) {
> - mlog(0, "%s:%.*s: tried to purge but not unused\n",
> - dlm->name, res->lockname.len, res->lockname.name);
> - __dlm_print_one_lock_resource(res);
> - spin_unlock(&res->spinlock);
> - BUG();
> - }
> -
> - if (res->state& DLM_LOCK_RES_MIGRATING) {
> - mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> - "being remastered\n", dlm->name, res->lockname.len,
> - res->lockname.name);
> - /* Re-add the lockres to the end of the purge list */
> - if (!list_empty(&res->purge)) {
> - list_del_init(&res->purge);
> - list_add_tail(&res->purge,&dlm->purge_list);
> - }
> - spin_unlock(&res->spinlock);
> - return 0;
> - }
> + assert_spin_locked(&dlm->spinlock);
> + assert_spin_locked(&res->spinlock);
>
> master = (res->owner == dlm->node_num);
>
> - if (!master)
> - res->state |= DLM_LOCK_RES_DROPPING_REF;
> - spin_unlock(&res->spinlock);
>
> mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
> res->lockname.name, master);
>
> if (!master) {
> + res->state |= DLM_LOCK_RES_DROPPING_REF;
> /* drop spinlock... retake below */
> + spin_unlock(&res->spinlock);
> spin_unlock(&dlm->spinlock);
>
> spin_lock(&res->spinlock);
> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
> dlm->name, res->lockname.len, res->lockname.name, ret);
> spin_lock(&dlm->spinlock);
> + spin_lock(&res->spinlock);
> }
>
> - spin_lock(&res->spinlock);
> if (!list_empty(&res->purge)) {
> mlog(0, "removing lockres %.*s:%p from purgelist, "
> "master = %d\n", res->lockname.len, res->lockname.name,
> res, master);
> list_del_init(&res->purge);
> - spin_unlock(&res->spinlock);
> dlm_lockres_put(res);
> dlm->purge_count--;
> - } else
> - spin_unlock(&res->spinlock);
> + }
> +
> + if (!__dlm_lockres_unused) {
> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> + dlm->name, res->lockname.len, res->lockname.name);
> + __dlm_print_one_lock_resource(res);
> + BUG();
> + }
>
> __dlm_unhash_lockres(res);
>
> /* lockres is not in the hash now. drop the flag and wake up
> * any processes waiting in dlm_get_lock_resource. */
> if (!master) {
> - spin_lock(&res->spinlock);
> res->state&= ~DLM_LOCK_RES_DROPPING_REF;
> spin_unlock(&res->spinlock);
> wake_up(&res->wq);
> - }
> - return 0;
> + } else
> + spin_unlock(&res->spinlock);
> }
>
> static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> lockres = list_entry(dlm->purge_list.next,
> struct dlm_lock_resource, purge);
>
> - /* Status of the lockres *might* change so double
> - * check. If the lockres is unused, holding the dlm
> - * spinlock will prevent people from getting and more
> - * refs on it -- there's no need to keep the lockres
> - * spinlock. */
> spin_lock(&lockres->spinlock);
> - unused = __dlm_lockres_unused(lockres);
> - spin_unlock(&lockres->spinlock);
> -
> - if (!unused)
> - continue;
>
> purge_jiffies = lockres->last_used +
> msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
> @@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
> * in tail order, we can stop at the first
> * unpurgable resource -- anyone added after
> * him will have a greater last_used value */
> + spin_unlock(&lockres->spinlock);
> break;
> }
>
> + /* Status of the lockres *might* change so double
> + * check. If the lockres is unused, holding the dlm
> + * spinlock will prevent people from getting and more
> + * refs on it. */
> + unused = __dlm_lockres_unused(lockres);
> + if (!unused ||
> + (lockres->state& DLM_LOCK_RES_MIGRATING)) {
> + mlog(0, "lockres %s:%.*s: is in use or "
> + "being remastered\n", dlm->name,
> + lockres->lockname.len, lockres->lockname.name);
> + list_move_tail(&dlm->purge_list,&lockres->purge);
> + spin_unlock(&lockres->spinlock);
> + continue;
> + }
> +
> dlm_lockres_get(lockres);
>
> - /* This may drop and reacquire the dlm spinlock if it
> - * has to do migration. */
> - if (dlm_purge_lockres(dlm, lockres))
> - BUG();
> + dlm_purge_lockres(dlm, lockres);
>
> dlm_lockres_put(lockres);
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-06-23 17:00 ` Sunil Mushran
@ 2010-06-23 17:25 ` Srinivas Eeda
0 siblings, 0 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-06-23 17:25 UTC (permalink / raw)
To: ocfs2-devel
Sunil, Joel, Wengang. Thanks for reviewing the patch and your comments.
On 6/23/2010 10:00 AM, Sunil Mushran wrote:
> Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
>
>
> On 06/22/2010 10:48 PM, Srinivas Eeda wrote:
>> There are two problems in dlm_run_purgelist
>>
>> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying
>> to purge
>> the same lockres instead of trying the next lockres.
>>
>> 2. When a lockres is found unused, dlm_run_purgelist releases lockres
>> spinlock
>> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
>> spinlock is reacquired but in this window lockres can get reused.
>> This leads
>> to BUG.
>>
>> This patch modifies dlm_run_purgelist to skip lockres if it's in use
>> and purge
>> next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before
>> releasing the
>> lockres spinlock protecting it from getting reused.
>>
>> Signed-off-by: Srinivas Eeda<srinivas.eeda@oracle.com>
>> ---
>> fs/ocfs2/dlm/dlmthread.c | 79
>> +++++++++++++++++++--------------------------
>> 1 files changed, 33 insertions(+), 46 deletions(-)
>>
>> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
>> index 11a6d1f..6822f9a 100644
>> --- a/fs/ocfs2/dlm/dlmthread.c
>> +++ b/fs/ocfs2/dlm/dlmthread.c
>> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
>> spin_unlock(&dlm->spinlock);
>> }
>>
>> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
>> struct dlm_lock_resource *res)
>> {
>> int master;
>> int ret = 0;
>>
>> - spin_lock(&res->spinlock);
>> - if (!__dlm_lockres_unused(res)) {
>> - mlog(0, "%s:%.*s: tried to purge but not unused\n",
>> - dlm->name, res->lockname.len, res->lockname.name);
>> - __dlm_print_one_lock_resource(res);
>> - spin_unlock(&res->spinlock);
>> - BUG();
>> - }
>> -
>> - if (res->state& DLM_LOCK_RES_MIGRATING) {
>> - mlog(0, "%s:%.*s: Delay dropref as this lockres is "
>> - "being remastered\n", dlm->name, res->lockname.len,
>> - res->lockname.name);
>> - /* Re-add the lockres to the end of the purge list */
>> - if (!list_empty(&res->purge)) {
>> - list_del_init(&res->purge);
>> - list_add_tail(&res->purge,&dlm->purge_list);
>> - }
>> - spin_unlock(&res->spinlock);
>> - return 0;
>> - }
>> + assert_spin_locked(&dlm->spinlock);
>> + assert_spin_locked(&res->spinlock);
>>
>> master = (res->owner == dlm->node_num);
>>
>> - if (!master)
>> - res->state |= DLM_LOCK_RES_DROPPING_REF;
>> - spin_unlock(&res->spinlock);
>>
>> mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
>> res->lockname.name, master);
>>
>> if (!master) {
>> + res->state |= DLM_LOCK_RES_DROPPING_REF;
>> /* drop spinlock... retake below */
>> + spin_unlock(&res->spinlock);
>> spin_unlock(&dlm->spinlock);
>>
>> spin_lock(&res->spinlock);
>> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
>> mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
>> dlm->name, res->lockname.len, res->lockname.name, ret);
>> spin_lock(&dlm->spinlock);
>> + spin_lock(&res->spinlock);
>> }
>>
>> - spin_lock(&res->spinlock);
>> if (!list_empty(&res->purge)) {
>> mlog(0, "removing lockres %.*s:%p from purgelist, "
>> "master = %d\n", res->lockname.len, res->lockname.name,
>> res, master);
>> list_del_init(&res->purge);
>> - spin_unlock(&res->spinlock);
>> dlm_lockres_put(res);
>> dlm->purge_count--;
>> - } else
>> - spin_unlock(&res->spinlock);
>> + }
>> +
>> + if (!__dlm_lockres_unused) {
>> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> + __dlm_print_one_lock_resource(res);
>> + BUG();
>> + }
>>
>> __dlm_unhash_lockres(res);
>>
>> /* lockres is not in the hash now. drop the flag and wake up
>> * any processes waiting in dlm_get_lock_resource. */
>> if (!master) {
>> - spin_lock(&res->spinlock);
>> res->state&= ~DLM_LOCK_RES_DROPPING_REF;
>> spin_unlock(&res->spinlock);
>> wake_up(&res->wq);
>> - }
>> - return 0;
>> + } else
>> + spin_unlock(&res->spinlock);
>> }
>>
>> static void dlm_run_purge_list(struct dlm_ctxt *dlm,
>> @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt
>> *dlm,
>> lockres = list_entry(dlm->purge_list.next,
>> struct dlm_lock_resource, purge);
>>
>> - /* Status of the lockres *might* change so double
>> - * check. If the lockres is unused, holding the dlm
>> - * spinlock will prevent people from getting and more
>> - * refs on it -- there's no need to keep the lockres
>> - * spinlock. */
>> spin_lock(&lockres->spinlock);
>> - unused = __dlm_lockres_unused(lockres);
>> - spin_unlock(&lockres->spinlock);
>> -
>> - if (!unused)
>> - continue;
>>
>> purge_jiffies = lockres->last_used +
>> msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
>> @@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt
>> *dlm,
>> * in tail order, we can stop at the first
>> * unpurgable resource -- anyone added after
>> * him will have a greater last_used value */
>> + spin_unlock(&lockres->spinlock);
>> break;
>> }
>>
>> + /* Status of the lockres *might* change so double
>> + * check. If the lockres is unused, holding the dlm
>> + * spinlock will prevent people from getting and more
>> + * refs on it. */
>> + unused = __dlm_lockres_unused(lockres);
>> + if (!unused ||
>> + (lockres->state& DLM_LOCK_RES_MIGRATING)) {
>> + mlog(0, "lockres %s:%.*s: is in use or "
>> + "being remastered\n", dlm->name,
>> + lockres->lockname.len, lockres->lockname.name);
>> + list_move_tail(&dlm->purge_list,&lockres->purge);
>> + spin_unlock(&lockres->spinlock);
>> + continue;
>> + }
>> +
>> dlm_lockres_get(lockres);
>>
>> - /* This may drop and reacquire the dlm spinlock if it
>> - * has to do migration. */
>> - if (dlm_purge_lockres(dlm, lockres))
>> - BUG();
>> + dlm_purge_lockres(dlm, lockres);
>>
>> dlm_lockres_put(lockres);
>>
>>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-06-23 5:48 Srinivas Eeda
2010-06-23 17:00 ` Sunil Mushran
@ 2010-06-26 10:42 ` Wengang Wang
2010-07-12 18:21 ` Joel Becker
2 siblings, 0 replies; 10+ messages in thread
From: Wengang Wang @ 2010-06-26 10:42 UTC (permalink / raw)
To: ocfs2-devel
A typo in your patch.
On 10-06-22 22:48, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
> next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
> ---
> fs/ocfs2/dlm/dlmthread.c | 79 +++++++++++++++++++--------------------------
> 1 files changed, 33 insertions(+), 46 deletions(-)
>
> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
> index 11a6d1f..6822f9a 100644
> --- a/fs/ocfs2/dlm/dlmthread.c
> +++ b/fs/ocfs2/dlm/dlmthread.c
> @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
> spin_unlock(&dlm->spinlock);
> }
>
> -static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> +static void dlm_purge_lockres(struct dlm_ctxt *dlm,
> struct dlm_lock_resource *res)
> {
> int master;
> int ret = 0;
>
> - spin_lock(&res->spinlock);
> - if (!__dlm_lockres_unused(res)) {
> - mlog(0, "%s:%.*s: tried to purge but not unused\n",
> - dlm->name, res->lockname.len, res->lockname.name);
> - __dlm_print_one_lock_resource(res);
> - spin_unlock(&res->spinlock);
> - BUG();
> - }
> -
> - if (res->state & DLM_LOCK_RES_MIGRATING) {
> - mlog(0, "%s:%.*s: Delay dropref as this lockres is "
> - "being remastered\n", dlm->name, res->lockname.len,
> - res->lockname.name);
> - /* Re-add the lockres to the end of the purge list */
> - if (!list_empty(&res->purge)) {
> - list_del_init(&res->purge);
> - list_add_tail(&res->purge, &dlm->purge_list);
> - }
> - spin_unlock(&res->spinlock);
> - return 0;
> - }
> + assert_spin_locked(&dlm->spinlock);
> + assert_spin_locked(&res->spinlock);
>
> master = (res->owner == dlm->node_num);
>
> - if (!master)
> - res->state |= DLM_LOCK_RES_DROPPING_REF;
> - spin_unlock(&res->spinlock);
>
> mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
> res->lockname.name, master);
>
> if (!master) {
> + res->state |= DLM_LOCK_RES_DROPPING_REF;
> /* drop spinlock... retake below */
> + spin_unlock(&res->spinlock);
> spin_unlock(&dlm->spinlock);
>
> spin_lock(&res->spinlock);
> @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
> mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
> dlm->name, res->lockname.len, res->lockname.name, ret);
> spin_lock(&dlm->spinlock);
> + spin_lock(&res->spinlock);
> }
>
> - spin_lock(&res->spinlock);
> if (!list_empty(&res->purge)) {
> mlog(0, "removing lockres %.*s:%p from purgelist, "
> "master = %d\n", res->lockname.len, res->lockname.name,
> res, master);
> list_del_init(&res->purge);
> - spin_unlock(&res->spinlock);
> dlm_lockres_put(res);
> dlm->purge_count--;
> - } else
> - spin_unlock(&res->spinlock);
> + }
> +
> + if (!__dlm_lockres_unused) {
should not be
+ if (!__dlm_lockres_unused(res)) {
regards,
wengang.
> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> + dlm->name, res->lockname.len, res->lockname.name);
> + __dlm_print_one_lock_resource(res);
> + BUG();
> + }
>
> __dlm_unhash_lockres(res);
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-06-23 5:48 Srinivas Eeda
2010-06-23 17:00 ` Sunil Mushran
2010-06-26 10:42 ` Wengang Wang
@ 2010-07-12 18:21 ` Joel Becker
2010-07-12 22:24 ` Srinivas Eeda
2 siblings, 1 reply; 10+ messages in thread
From: Joel Becker @ 2010-07-12 18:21 UTC (permalink / raw)
To: ocfs2-devel
On Tue, Jun 22, 2010 at 10:48:28PM -0700, Srinivas Eeda wrote:
> + if (!__dlm_lockres_unused) {
> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
> + dlm->name, res->lockname.len, res->lockname.name);
> + __dlm_print_one_lock_resource(res);
> + BUG();
> + }
/build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c: In function ?dlm_purge_lockres?:
/build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c:203: warning: the address of ?__dlm_lockres_unused? will always evaluate as ?true?
Was this even tested? I'm leaving this patch out of 'fixes'
until corrected and tested.
Joel
--
"Reader, suppose you were and idiot. And suppose you were a member of
Congress. But I repeat myself."
- Mark Twain
Joel Becker
Consulting Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
@ 2010-07-12 22:16 Srinivas Eeda
2010-07-12 22:39 ` Joel Becker
0 siblings, 1 reply; 10+ messages in thread
From: Srinivas Eeda @ 2010-07-12 22:16 UTC (permalink / raw)
To: ocfs2-devel
There are two problems in dlm_run_purgelist
1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
the same lockres instead of trying the next lockres.
2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
spinlock is reacquired but in this window lockres can get reused. This leads
to BUG.
This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
lockres spinlock protecting it from getting reused.
Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
---
fs/ocfs2/dlm/dlmthread.c | 79 +++++++++++++++++++--------------------------
1 files changed, 33 insertions(+), 46 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..76ab4aa 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
spin_unlock(&dlm->spinlock);
}
-static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+static void dlm_purge_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
int master;
int ret = 0;
- spin_lock(&res->spinlock);
- if (!__dlm_lockres_unused(res)) {
- mlog(0, "%s:%.*s: tried to purge but not unused\n",
- dlm->name, res->lockname.len, res->lockname.name);
- __dlm_print_one_lock_resource(res);
- spin_unlock(&res->spinlock);
- BUG();
- }
-
- if (res->state & DLM_LOCK_RES_MIGRATING) {
- mlog(0, "%s:%.*s: Delay dropref as this lockres is "
- "being remastered\n", dlm->name, res->lockname.len,
- res->lockname.name);
- /* Re-add the lockres to the end of the purge list */
- if (!list_empty(&res->purge)) {
- list_del_init(&res->purge);
- list_add_tail(&res->purge, &dlm->purge_list);
- }
- spin_unlock(&res->spinlock);
- return 0;
- }
+ assert_spin_locked(&dlm->spinlock);
+ assert_spin_locked(&res->spinlock);
master = (res->owner == dlm->node_num);
- if (!master)
- res->state |= DLM_LOCK_RES_DROPPING_REF;
- spin_unlock(&res->spinlock);
mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
res->lockname.name, master);
if (!master) {
+ res->state |= DLM_LOCK_RES_DROPPING_REF;
/* drop spinlock... retake below */
+ spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
spin_lock(&res->spinlock);
@@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
dlm->name, res->lockname.len, res->lockname.name, ret);
spin_lock(&dlm->spinlock);
+ spin_lock(&res->spinlock);
}
- spin_lock(&res->spinlock);
if (!list_empty(&res->purge)) {
mlog(0, "removing lockres %.*s:%p from purgelist, "
"master = %d\n", res->lockname.len, res->lockname.name,
res, master);
list_del_init(&res->purge);
- spin_unlock(&res->spinlock);
dlm_lockres_put(res);
dlm->purge_count--;
- } else
- spin_unlock(&res->spinlock);
+ }
+
+ if (!__dlm_lockres_unused(res)) {
+ mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
__dlm_unhash_lockres(res);
/* lockres is not in the hash now. drop the flag and wake up
* any processes waiting in dlm_get_lock_resource. */
if (!master) {
- spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_DROPPING_REF;
spin_unlock(&res->spinlock);
wake_up(&res->wq);
- }
- return 0;
+ } else
+ spin_unlock(&res->spinlock);
}
static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
lockres = list_entry(dlm->purge_list.next,
struct dlm_lock_resource, purge);
- /* Status of the lockres *might* change so double
- * check. If the lockres is unused, holding the dlm
- * spinlock will prevent people from getting and more
- * refs on it -- there's no need to keep the lockres
- * spinlock. */
spin_lock(&lockres->spinlock);
- unused = __dlm_lockres_unused(lockres);
- spin_unlock(&lockres->spinlock);
-
- if (!unused)
- continue;
purge_jiffies = lockres->last_used +
msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +247,28 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
* in tail order, we can stop at the first
* unpurgable resource -- anyone added after
* him will have a greater last_used value */
+ spin_unlock(&lockres->spinlock);
break;
}
+ /* Status of the lockres *might* change so double
+ * check. If the lockres is unused, holding the dlm
+ * spinlock will prevent people from getting and more
+ * refs on it. */
+ unused = __dlm_lockres_unused(lockres);
+ if (!unused ||
+ (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+ mlog(0, "lockres %s:%.*s: is in use or "
+ "being remastered\n", dlm->name,
+ lockres->lockname.len, lockres->lockname.name);
+ list_move_tail(&dlm->purge_list, &lockres->purge);
+ spin_unlock(&lockres->spinlock);
+ continue;
+ }
+
dlm_lockres_get(lockres);
- /* This may drop and reacquire the dlm spinlock if it
- * has to do migration. */
- if (dlm_purge_lockres(dlm, lockres))
- BUG();
+ dlm_purge_lockres(dlm, lockres);
dlm_lockres_put(lockres);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-07-12 18:21 ` Joel Becker
@ 2010-07-12 22:24 ` Srinivas Eeda
0 siblings, 0 replies; 10+ messages in thread
From: Srinivas Eeda @ 2010-07-12 22:24 UTC (permalink / raw)
To: ocfs2-devel
Joel Becker wrote:
> On Tue, Jun 22, 2010 at 10:48:28PM -0700, Srinivas Eeda wrote:
>
>> + if (!__dlm_lockres_unused) {
>> + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
>> + dlm->name, res->lockname.len, res->lockname.name);
>> + __dlm_print_one_lock_resource(res);
>> + BUG();
>> + }
>>
>
> /build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c: In function ?dlm_purge_lockres?:
> /build/jlbec/linux-2.6/working/fs/ocfs2/dlm/dlmthread.c:203: warning: the address of ?__dlm_lockres_unused? will always evaluate as ?true?
>
> Was this even tested? I'm leaving this patch out of 'fixes'
> until corrected and tested.
>
Sorry, I had the typo while making the review changes. I ran the usual
tests but that didn't catch this problem. I should have payed more
attention to the build log.
I made the change and tested it, will send you the modified patch
> Joel
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3
2010-07-12 22:16 Srinivas Eeda
@ 2010-07-12 22:39 ` Joel Becker
0 siblings, 0 replies; 10+ messages in thread
From: Joel Becker @ 2010-07-12 22:39 UTC (permalink / raw)
To: ocfs2-devel
On Mon, Jul 12, 2010 at 03:16:28PM -0700, Srinivas Eeda wrote:
> There are two problems in dlm_run_purgelist
>
> 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge
> the same lockres instead of trying the next lockres.
>
> 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock
> before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres.
> spinlock is reacquired but in this window lockres can get reused. This leads
> to BUG.
>
> This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge
> next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the
> lockres spinlock protecting it from getting reused.
>
> Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
> Signed-off-by: Sunil Mushran<sunil.mushran@oracle.com>
Has this been tested with a testcase that would fail before the
change?
Also, don't add SoB lines for kernel patches. You can add
Acked-by for Sunil, but SoB only works for the chain that goes upstream.
Joel
--
"War doesn't determine who's right; war determines who's left."
Joel Becker
Consulting Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2010-07-12 22:39 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-22 17:33 [Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist(bug 9094491) - rev3 Srinivas Eeda
2010-06-22 23:22 ` Sunil Mushran
-- strict thread matches above, loose matches on Subject: below --
2010-06-23 5:48 Srinivas Eeda
2010-06-23 17:00 ` Sunil Mushran
2010-06-23 17:25 ` Srinivas Eeda
2010-06-26 10:42 ` Wengang Wang
2010-07-12 18:21 ` Joel Becker
2010-07-12 22:24 ` Srinivas Eeda
2010-07-12 22:16 Srinivas Eeda
2010-07-12 22:39 ` Joel Becker
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).