* [PATCH] jfs: possible deadlocks - continue
@ 2006-06-04 15:44 Evgeniy Dushistov
2006-06-04 21:49 ` Dave Kleikamp
0 siblings, 1 reply; 4+ messages in thread
From: Evgeniy Dushistov @ 2006-06-04 15:44 UTC (permalink / raw)
To: Dave Kleikamp; +Cc: jfs-discussion, linux-kernel, linux-fsdevel
For some reasons my post about "possible deadlocks"
didn't appear in jfs-discussion@lists.sourceforge.net.
>====================================
>[ BUG: possible deadlock detected! ]
>------------------------------------
>mount/5587 is trying to acquire lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>but task is already holding lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>which could potentially lead to deadlocks!
>
>other info that might help us debug this:
>2 locks held by mount/5587:
> #0: (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> #1: (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>stack backtrace:
> [<c0103095>] show_trace+0x16/0x19
> [<c0103562>] dump_stack+0x1a/0x1f
> [<c012ddd7>] __lockdep_acquire+0x6c6/0x907
> [<c012e063>] lockdep_acquire+0x4b/0x63
> [<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
> [<c02f7096>] mutex_lock+0x12/0x15
> [<c01b99be>] jfs_create+0x90/0x2b8
> [<c0161016>] vfs_create+0x91/0xda
> [<c0163939>] open_namei+0x15a/0x5b0
> [<c015326c>] do_filp_open+0x22/0x39
> [<c01541a8>] do_sys_open+0x40/0xbc
> [<c015424d>] sys_open+0x13/0x15
> [<c02f875d>] sysenter_past_esp+0x56/0x8d
I should add that this happened during boot, when root jfs
file system become from ro->rw
I look at code, and see that
1)locks wasn't release in the opposite order in which
they were taken
2)in jfs_rename we lock new_ip, and in "error path" we didn't unlock it
3)I see strange expression: "! !"
May be this worth to fix?
Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
---
Index: linux-2.6.16/fs/jfs/namei.c
===================================================================
--- linux-2.6.16.orig/fs/jfs/namei.c
+++ linux-2.6.16/fs/jfs/namei.c
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip,
out3:
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
if (rc) {
free_ea_wmap(ip);
ip->i_nlink = 0;
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip,
out3:
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
if (rc) {
free_ea_wmap(ip);
ip->i_nlink = 0;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip,
if (rc == -EIO)
txAbort(tid, 1);
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
goto out2;
}
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip,
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
/*
* Truncating the directory index table is not guaranteed. It
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip,
if (rc == -EIO)
txAbort(tid, 1); /* Marks FS Dirty */
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
IWRITE_UNLOCK(ip);
goto out1;
}
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip,
if ((new_size = commitZeroLink(tid, ip)) < 0) {
txAbort(tid, 1); /* Marks FS Dirty */
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
IWRITE_UNLOCK(ip);
rc = new_size;
goto out1;
@@ -556,9 +556,8 @@ static int jfs_unlink(struct inode *dip,
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
-
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
while (new_size && (rc == 0)) {
tid = txBegin(dip->i_sb, 0);
@@ -847,8 +846,8 @@ static int jfs_link(struct dentry *old_d
out:
txEnd(tid);
- mutex_unlock(&JFS_IP(dir)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dir)->commit_mutex);
jfs_info("jfs_link: rc:%d", rc);
return rc;
@@ -1037,8 +1036,8 @@ static int jfs_symlink(struct inode *dip
out3:
txEnd(tid);
- mutex_unlock(&JFS_IP(dip)->commit_mutex);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(dip)->commit_mutex);
if (rc) {
free_ea_wmap(ip);
ip->i_nlink = 0;
@@ -1160,10 +1159,11 @@ static int jfs_rename(struct inode *old_
if (S_ISDIR(new_ip->i_mode)) {
new_ip->i_nlink--;
if (new_ip->i_nlink) {
- mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
- mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
if (old_dir != new_dir)
mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+ mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
if (!S_ISDIR(old_ip->i_mode) && new_ip)
IWRITE_UNLOCK(new_ip);
jfs_error(new_ip->i_sb,
@@ -1281,13 +1281,12 @@ static int jfs_rename(struct inode *old_
out4:
txEnd(tid);
-
- mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
- mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
- if (old_dir != new_dir)
- mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
if (new_ip)
mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
+ if (old_dir != new_dir)
+ mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+ mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+ mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
while (new_size && (rc == 0)) {
tid = txBegin(new_ip->i_sb, 0);
Index: linux-2.6.16/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.16.orig/fs/jfs/jfs_txnmgr.c
+++ linux-2.6.16/fs/jfs/jfs_txnmgr.c
@@ -2944,7 +2944,7 @@ int jfs_sync(void *arg)
* Inode is being freed
*/
list_del_init(&jfs_ip->anon_inode_list);
- } else if (! !mutex_trylock(&jfs_ip->commit_mutex)) {
+ } else if (mutex_trylock(&jfs_ip->commit_mutex)) {
/*
* inode will be removed from anonymous list
* when it is committed
--
/Evgeniy
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] jfs: possible deadlocks - continue
2006-06-04 15:44 [PATCH] jfs: possible deadlocks - continue Evgeniy Dushistov
@ 2006-06-04 21:49 ` Dave Kleikamp
2006-06-05 5:00 ` Evgeniy Dushistov
0 siblings, 1 reply; 4+ messages in thread
From: Dave Kleikamp @ 2006-06-04 21:49 UTC (permalink / raw)
To: Evgeniy Dushistov; +Cc: jfs-discussion, linux-kernel, linux-fsdevel
On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> For some reasons my post about "possible deadlocks"
> didn't appear in jfs-discussion@lists.sourceforge.net.
Nothings showing up there. I guess it's a sourceforge problem.
> >====================================
> >[ BUG: possible deadlock detected! ]
> >------------------------------------
> >mount/5587 is trying to acquire lock:
> > (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >but task is already holding lock:
> > (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >which could potentially lead to deadlocks!
> >
> >other info that might help us debug this:
> >2 locks held by mount/5587:
> > #0: (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> > #1: (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >stack backtrace:
> > [<c0103095>] show_trace+0x16/0x19
> > [<c0103562>] dump_stack+0x1a/0x1f
> > [<c012ddd7>] __lockdep_acquire+0x6c6/0x907
> > [<c012e063>] lockdep_acquire+0x4b/0x63
> > [<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
> > [<c02f7096>] mutex_lock+0x12/0x15
> > [<c01b99be>] jfs_create+0x90/0x2b8
> > [<c0161016>] vfs_create+0x91/0xda
> > [<c0163939>] open_namei+0x15a/0x5b0
> > [<c015326c>] do_filp_open+0x22/0x39
> > [<c01541a8>] do_sys_open+0x40/0xbc
> > [<c015424d>] sys_open+0x13/0x15
> > [<c02f875d>] sysenter_past_esp+0x56/0x8d
>
> I should add that this happened during boot, when root jfs
> file system become from ro->rw
>
> I look at code, and see that
> 1)locks wasn't release in the opposite order in which
> they were taken
Why does this matter?
> 2)in jfs_rename we lock new_ip, and in "error path" we didn't unlock it
Good catch! This isn't related to the warning, but it's potentially
worse.
> 3)I see strange expression: "! !"
I hadn't noticed this. It was introduced when changing from semaphores
to mutexes.
>
> May be this worth to fix?
2 & 3 for sure. I don't see the need for fixing 1.
I think the warning needs to be fixed by introducing mutex_lock_nested
in some places. I'll take a look at it.
Thanks,
Shaggy
--
David Kleikamp
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] jfs: possible deadlocks - continue
2006-06-04 21:49 ` Dave Kleikamp
@ 2006-06-05 5:00 ` Evgeniy Dushistov
2006-06-05 11:57 ` Dave Kleikamp
0 siblings, 1 reply; 4+ messages in thread
From: Evgeniy Dushistov @ 2006-06-05 5:00 UTC (permalink / raw)
To: Dave Kleikamp, Ingo Molnar; +Cc: jfs-discussion, linux-kernel, linux-fsdevel
On Sun, Jun 04, 2006 at 04:49:56PM -0500, Dave Kleikamp wrote:
> On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> > I should add that this happened during boot, when root jfs
> > file system become from ro->rw
> >
> > I look at code, and see that
> > 1)locks wasn't release in the opposite order in which
> > they were taken
>
> Why does this matter?
>
Like "3" it make code more understandable,
reader of code don't have to think why
code looks like:
lock A, lock B unlock A, unlock B
while a normal practice in kernel:
lock A, lock B unlock B unlock A
the goal of change just simplicity and clearness,
and nothing more.
> I think the warning needs to be fixed by introducing mutex_lock_nested
> in some places. I'll take a look at it.
>
To avoid incomprehension, previous patch just make code more
understandable and unlock mutex on "error path", it doesn't fix
this warning and I have no idea why it happend:
====================================
[ BUG: possible deadlock detected! ]
------------------------------------
mount/5587 is trying to acquire lock:
(&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
but task is already holding lock:
(&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
which could potentially lead to deadlocks!
other info that might help us debug this:
2 locks held by mount/5587:
#0: (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
#1: (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
stack backtrace:
[<c0103095>] show_trace+0x16/0x19
[<c0103562>] dump_stack+0x1a/0x1f
[<c012ddd7>] __lockdep_acquire+0x6c6/0x907
[<c012e063>] lockdep_acquire+0x4b/0x63
[<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
[<c02f7096>] mutex_lock+0x12/0x15
[<c01b99be>] jfs_create+0x90/0x2b8
[<c0161016>] vfs_create+0x91/0xda
[<c0163939>] open_namei+0x15a/0x5b0
[<c015326c>] do_filp_open+0x22/0x39
[<c01541a8>] do_sys_open+0x40/0xbc
[<c015424d>] sys_open+0x13/0x15
[<c02f875d>] sysenter_past_esp+0x56/0x8d
--
/Evgeniy
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] jfs: possible deadlocks - continue
2006-06-05 5:00 ` Evgeniy Dushistov
@ 2006-06-05 11:57 ` Dave Kleikamp
0 siblings, 0 replies; 4+ messages in thread
From: Dave Kleikamp @ 2006-06-05 11:57 UTC (permalink / raw)
To: Evgeniy Dushistov
Cc: Ingo Molnar, jfs-discussion, linux-kernel, linux-fsdevel
On Mon, 2006-06-05 at 09:00 +0400, Evgeniy Dushistov wrote:
> On Sun, Jun 04, 2006 at 04:49:56PM -0500, Dave Kleikamp wrote:
> > On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> > > I should add that this happened during boot, when root jfs
> > > file system become from ro->rw
> > >
> > > I look at code, and see that
> > > 1)locks wasn't release in the opposite order in which
> > > they were taken
> >
> > Why does this matter?
> >
> Like "3" it make code more understandable,
> reader of code don't have to think why
> code looks like:
> lock A, lock B unlock A, unlock B
> while a normal practice in kernel:
> lock A, lock B unlock B unlock A
> the goal of change just simplicity and clearness,
> and nothing more.
Well, when the locks are released at one place, I don't buy that it make
the code any easier to understand. However, I understand that it makes
it easier for the validator to do its job, so I'm happy with all three
parts of the patch. I'll push it to the -mm tree.
>
> > I think the warning needs to be fixed by introducing mutex_lock_nested
> > in some places. I'll take a look at it.
> >
> To avoid incomprehension, previous patch just make code more
> understandable and unlock mutex on "error path", it doesn't fix
> this warning and I have no idea why it happend:
I understand the warning. jfs is taking the same mutex (commit_mutex)
on two inodes, which has the potential to for two threads to deadlock.
I believe the order the locks are taken is safe (always taking the
parent first). In fact, I think any operations involving more than one
inode are already protected by i_mutex on the parents. I'm sure I can
fix it with mutex_lock_nested.
> ====================================
> [ BUG: possible deadlock detected! ]
> ------------------------------------
> mount/5587 is trying to acquire lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
> but task is already holding lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
> which could potentially lead to deadlocks!
>
> other info that might help us debug this:
> 2 locks held by mount/5587:
> #0: (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> #1: (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
Thanks,
Shaggy
--
David Kleikamp
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2006-06-05 11:57 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-04 15:44 [PATCH] jfs: possible deadlocks - continue Evgeniy Dushistov
2006-06-04 21:49 ` Dave Kleikamp
2006-06-05 5:00 ` Evgeniy Dushistov
2006-06-05 11:57 ` Dave Kleikamp
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).