From: Wengang Wang <wen.gang.wang@oracle.com>
To: ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] Deadlock in DLM code still there
Date: Fri, 14 May 2010 22:50:54 +0800 [thread overview]
Message-ID: <20100514145054.GA3431@laptop.oracle.com> (raw)
In-Reply-To: <20100513194320.GA28367@quack.suse.cz>
Hi,
On 10-05-13 21:43, Jan Kara wrote:
> Hi,
>
> in http://www.mail-archive.com/ocfs2-devel at oss.oracle.com/msg03188.html
> (more than an year ago) I've reported a lock inversion between dlm->ast_lock
> and res->spinlock. The deadlock seems to be still there in 2.6.34-rc7:
>
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.34-rc7-xen #4
> -------------------------------------------------------
> dlm_thread/2001 is trying to acquire lock:
> (&(&dlm->ast_lock)->rlock){+.+...}, at: [<ffffffffa0119785>] dlm_queue_bast+0x55/0x1e0 [ocfs2_dlm]
>
> but task is already holding lock:
> (&(&res->spinlock)->rlock){+.+...}, at: [<ffffffffa010452d>] dlm_thread+0x7cd/0x17f0 [ocfs2_dlm]
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&(&res->spinlock)->rlock){+.+...}:
> [<ffffffff810746bf>] __lock_acquire+0x109f/0x1720
> [<ffffffff81074da9>] lock_acquire+0x69/0x90
> [<ffffffff81328c6c>] _raw_spin_lock+0x2c/0x40
> [<ffffffff8117e158>] _atomic_dec_and_lock+0x78/0xa0
> [<ffffffffa010ebb9>] dlm_lockres_release_ast+0x29/0xb0 [ocfs2_dlm]
> [<ffffffffa0104e41>] dlm_thread+0x10e1/0x17f0 [ocfs2_dlm]
> [<ffffffff81060e1e>] kthread+0x8e/0xa0
> [<ffffffff8100bda4>] kernel_thread_helper+0x4/0x10
>
> -> #0 (&(&dlm->ast_lock)->rlock){+.+...}:
> [<ffffffff81074b18>] __lock_acquire+0x14f8/0x1720
> [<ffffffff81074da9>] lock_acquire+0x69/0x90
> [<ffffffff81328c6c>] _raw_spin_lock+0x2c/0x40
> [<ffffffffa0119785>] dlm_queue_bast+0x55/0x1e0 [ocfs2_dlm]
> [<ffffffffa010494f>] dlm_thread+0xbef/0x17f0 [ocfs2_dlm]
> [<ffffffff81060e1e>] kthread+0x8e/0xa0
> [<ffffffff8100bda4>] kernel_thread_helper+0x4/0x10
The code path is simple:
in dlm_thread()
677 spin_lock(&res->spinlock);
.......
714 /* called while holding lockres lock */
715 dlm_shuffle_lists(dlm, res);
716 res->state &= ~DLM_LOCK_RES_DIRTY;
717 spin_unlock(&res->spinlock);
In dlm_shuffle_lists(), dlm_queue_bast()/dlm_queue_ast() are called.
In the later two functions, dlm->ast_lock is required.
Since we can't release res->spinlock, we have to take dlm->ast_lock
just before taking the res->spinlock(line 677) and release it after
res->spinlock is released. And use __dlm_queue_bast()/__dlm_queue_ast(),
the nolock version, in dlm_shuffle_lists().
There are no too many locks on a lockres, I think there is no performance
harm.
Comments?
Attached the patch(not well tested yet, will be tested if this approach is good).
At least with this patch Jan can continue with his test :P
Regards,
wengang.
-------------- next part --------------
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 12d5eb7..992b0ae 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
return 0;
}
-static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
mlog_entry_void();
@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
}
-static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
mlog_entry_void();
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be3..453d955 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
+void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
+void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_do_local_ast(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1f..d4f73ca 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
* spinlock, and because we know that it is not migrating/
* recovering/in-progress, it is fine to reserve asts and
* basts right before queueing them all throughout */
+ assert_spin_locked(&dlm->ast_lock);
assert_spin_locked(&res->spinlock);
BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
DLM_LOCK_RES_RECOVERING|
@@ -337,7 +338,7 @@ converting:
/* queue the BAST if not already */
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
- dlm_queue_bast(dlm, lock);
+ __dlm_queue_bast(dlm, lock);
}
/* update the highest_blocked if needed */
if (lock->ml.highest_blocked < target->ml.convert_type)
@@ -355,7 +356,7 @@ converting:
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
- dlm_queue_bast(dlm, lock);
+ __dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.convert_type)
lock->ml.highest_blocked =
@@ -383,7 +384,7 @@ converting:
spin_unlock(&target->spinlock);
__dlm_lockres_reserve_ast(res);
- dlm_queue_ast(dlm, target);
+ __dlm_queue_ast(dlm, target);
/* go back and check for more */
goto converting;
}
@@ -402,7 +403,7 @@ blocked:
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
- dlm_queue_bast(dlm, lock);
+ __dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.type)
lock->ml.highest_blocked = target->ml.type;
@@ -418,7 +419,7 @@ blocked:
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
- dlm_queue_bast(dlm, lock);
+ __dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.type)
lock->ml.highest_blocked = target->ml.type;
@@ -444,7 +445,7 @@ blocked:
spin_unlock(&target->spinlock);
__dlm_lockres_reserve_ast(res);
- dlm_queue_ast(dlm, target);
+ __dlm_queue_ast(dlm, target);
/* go back and check for more */
goto converting;
}
@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
/* lockres can be re-dirtied/re-added to the
* dirty_list in this gap, but that is ok */
+ spin_lock(&dlm->ast_lock);
spin_lock(&res->spinlock);
if (res->owner != dlm->node_num) {
__dlm_print_one_lock_resource(res);
@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
/* move it to the tail and keep going */
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->ast_lock);
mlog(0, "delaying list shuffling for in-"
"progress lockres %.*s, state=%d\n",
res->lockname.len, res->lockname.name,
@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
dlm_shuffle_lists(dlm, res);
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->ast_lock);
dlm_lockres_calc_usage(dlm, res);
next prev parent reply other threads:[~2010-05-14 14:50 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-13 19:43 [Ocfs2-devel] Deadlock in DLM code still there Jan Kara
2010-05-13 22:25 ` Sunil Mushran
2010-05-14 14:50 ` Wengang Wang [this message]
2010-05-14 18:30 ` Sunil Mushran
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100514145054.GA3431@laptop.oracle.com \
--to=wen.gang.wang@oracle.com \
--cc=ocfs2-devel@oss.oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.