linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] jfs: possible deadlocks - continue
@ 2006-06-04 15:44 Evgeniy Dushistov
  2006-06-04 21:49 ` Dave Kleikamp
  0 siblings, 1 reply; 4+ messages in thread
From: Evgeniy Dushistov @ 2006-06-04 15:44 UTC (permalink / raw)
  To: Dave Kleikamp; +Cc: jfs-discussion, linux-kernel, linux-fsdevel

For some reasons my post about "possible deadlocks"
didn't appear in jfs-discussion@lists.sourceforge.net.

>====================================
>[ BUG: possible deadlock detected! ]
>------------------------------------
>mount/5587 is trying to acquire lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>but task is already holding lock:
> (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>which could potentially lead to deadlocks!
>
>other info that might help us debug this:
>2 locks held by mount/5587:
> #0:  (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> #1:  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>
>stack backtrace:
> [<c0103095>] show_trace+0x16/0x19
> [<c0103562>] dump_stack+0x1a/0x1f
> [<c012ddd7>] __lockdep_acquire+0x6c6/0x907
> [<c012e063>] lockdep_acquire+0x4b/0x63
> [<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
> [<c02f7096>] mutex_lock+0x12/0x15
> [<c01b99be>] jfs_create+0x90/0x2b8
> [<c0161016>] vfs_create+0x91/0xda
> [<c0163939>] open_namei+0x15a/0x5b0
> [<c015326c>] do_filp_open+0x22/0x39
> [<c01541a8>] do_sys_open+0x40/0xbc
> [<c015424d>] sys_open+0x13/0x15
> [<c02f875d>] sysenter_past_esp+0x56/0x8d

I should add that this happened during boot, when root jfs
file system become from ro->rw

I look at code, and see that
1)locks wasn't release in the opposite order in which
they were taken
2)in jfs_rename we lock new_ip, and in "error path" we didn't unlock it
3)I see strange expression: "! !"

May be this worth to fix?

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>

---

Index: linux-2.6.16/fs/jfs/namei.c
===================================================================
--- linux-2.6.16.orig/fs/jfs/namei.c
+++ linux-2.6.16/fs/jfs/namei.c
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip,
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, 
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, 
 		if (rc == -EIO)
 			txAbort(tid, 1);
 		txEnd(tid);
-		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 		goto out2;
 	}
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, 
 
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip,
 		if (rc == -EIO)
 			txAbort(tid, 1);	/* Marks FS Dirty */
 		txEnd(tid);
-		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		IWRITE_UNLOCK(ip);
 		goto out1;
 	}
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip,
 		if ((new_size = commitZeroLink(tid, ip)) < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
 			txEnd(tid);
-			mutex_unlock(&JFS_IP(dip)->commit_mutex);
 			mutex_unlock(&JFS_IP(ip)->commit_mutex);
+			mutex_unlock(&JFS_IP(dip)->commit_mutex);
 			IWRITE_UNLOCK(ip);
 			rc = new_size;
 			goto out1;
@@ -556,9 +556,8 @@ static int jfs_unlink(struct inode *dip,
 
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
-
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(dip->i_sb, 0);
@@ -847,8 +846,8 @@ static int jfs_link(struct dentry *old_d
       out:
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 
 	jfs_info("jfs_link: rc:%d", rc);
 	return rc;
@@ -1037,8 +1036,8 @@ static int jfs_symlink(struct inode *dip
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -1160,10 +1159,11 @@ static int jfs_rename(struct inode *old_
 		if (S_ISDIR(new_ip->i_mode)) {
 			new_ip->i_nlink--;
 			if (new_ip->i_nlink) {
-				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
-				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+				mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 				if (old_dir != new_dir)
 					mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
 				if (!S_ISDIR(old_ip->i_mode) && new_ip)
 					IWRITE_UNLOCK(new_ip);
 				jfs_error(new_ip->i_sb,
@@ -1281,13 +1281,12 @@ static int jfs_rename(struct inode *old_
 
       out4:
 	txEnd(tid);
-
-	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
-	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
-	if (old_dir != new_dir)
-		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 	if (new_ip)
 		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
+	if (old_dir != new_dir)
+		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(new_ip->i_sb, 0);
Index: linux-2.6.16/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.16.orig/fs/jfs/jfs_txnmgr.c
+++ linux-2.6.16/fs/jfs/jfs_txnmgr.c
@@ -2944,7 +2944,7 @@ int jfs_sync(void *arg)
 				 * Inode is being freed
 				 */
 				list_del_init(&jfs_ip->anon_inode_list);
-			} else if (! !mutex_trylock(&jfs_ip->commit_mutex)) {
+			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed

-- 
/Evgeniy


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] jfs: possible deadlocks - continue
  2006-06-04 15:44 [PATCH] jfs: possible deadlocks - continue Evgeniy Dushistov
@ 2006-06-04 21:49 ` Dave Kleikamp
  2006-06-05  5:00   ` Evgeniy Dushistov
  0 siblings, 1 reply; 4+ messages in thread
From: Dave Kleikamp @ 2006-06-04 21:49 UTC (permalink / raw)
  To: Evgeniy Dushistov; +Cc: jfs-discussion, linux-kernel, linux-fsdevel

On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> For some reasons my post about "possible deadlocks"
> didn't appear in jfs-discussion@lists.sourceforge.net.

Nothings showing up there.  I guess it's a sourceforge problem.

> >====================================
> >[ BUG: possible deadlock detected! ]
> >------------------------------------
> >mount/5587 is trying to acquire lock:
> > (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >but task is already holding lock:
> > (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >which could potentially lead to deadlocks!
> >
> >other info that might help us debug this:
> >2 locks held by mount/5587:
> > #0:  (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> > #1:  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> >
> >stack backtrace:
> > [<c0103095>] show_trace+0x16/0x19
> > [<c0103562>] dump_stack+0x1a/0x1f
> > [<c012ddd7>] __lockdep_acquire+0x6c6/0x907
> > [<c012e063>] lockdep_acquire+0x4b/0x63
> > [<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
> > [<c02f7096>] mutex_lock+0x12/0x15
> > [<c01b99be>] jfs_create+0x90/0x2b8
> > [<c0161016>] vfs_create+0x91/0xda
> > [<c0163939>] open_namei+0x15a/0x5b0
> > [<c015326c>] do_filp_open+0x22/0x39
> > [<c01541a8>] do_sys_open+0x40/0xbc
> > [<c015424d>] sys_open+0x13/0x15
> > [<c02f875d>] sysenter_past_esp+0x56/0x8d
> 
> I should add that this happened during boot, when root jfs
> file system become from ro->rw
> 
> I look at code, and see that
> 1)locks wasn't release in the opposite order in which
> they were taken

Why does this matter?

> 2)in jfs_rename we lock new_ip, and in "error path" we didn't unlock it

Good catch!  This isn't related to the warning, but it's potentially
worse.

> 3)I see strange expression: "! !"

I hadn't noticed this.  It was introduced when changing from semaphores
to mutexes.

> 
> May be this worth to fix?

2 & 3 for sure.  I don't see the need for fixing 1.

I think the warning needs to be fixed by introducing mutex_lock_nested
in some places.  I'll take a look at it.

Thanks,
Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] jfs: possible deadlocks - continue
  2006-06-04 21:49 ` Dave Kleikamp
@ 2006-06-05  5:00   ` Evgeniy Dushistov
  2006-06-05 11:57     ` Dave Kleikamp
  0 siblings, 1 reply; 4+ messages in thread
From: Evgeniy Dushistov @ 2006-06-05  5:00 UTC (permalink / raw)
  To: Dave Kleikamp, Ingo Molnar; +Cc: jfs-discussion, linux-kernel, linux-fsdevel

On Sun, Jun 04, 2006 at 04:49:56PM -0500, Dave Kleikamp wrote:
> On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> > I should add that this happened during boot, when root jfs
> > file system become from ro->rw
> > 
> > I look at code, and see that
> > 1)locks wasn't release in the opposite order in which
> > they were taken
> 
> Why does this matter?
> 
Like "3" it make code more understandable,
reader of code don't have to think why
code looks like:
lock A, lock B unlock A, unlock B
while a normal practice in kernel:
lock A, lock B unlock B unlock A
the goal of change just simplicity and clearness,
and nothing more.


> I think the warning needs to be fixed by introducing mutex_lock_nested
> in some places.  I'll take a look at it.
> 
To avoid incomprehension, previous patch just make code more
understandable and unlock mutex on "error path", it doesn't fix
this warning and I have no idea why it happend:

====================================
[ BUG: possible deadlock detected! ]
------------------------------------
mount/5587 is trying to acquire lock:
 (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15

but task is already holding lock:
 (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15

which could potentially lead to deadlocks!

other info that might help us debug this:
2 locks held by mount/5587:
 #0:  (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
 #1:  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15

stack backtrace:
 [<c0103095>] show_trace+0x16/0x19
 [<c0103562>] dump_stack+0x1a/0x1f
 [<c012ddd7>] __lockdep_acquire+0x6c6/0x907
 [<c012e063>] lockdep_acquire+0x4b/0x63
 [<c02f6f0c>] __mutex_lock_slowpath+0xa4/0x21c
 [<c02f7096>] mutex_lock+0x12/0x15
 [<c01b99be>] jfs_create+0x90/0x2b8
 [<c0161016>] vfs_create+0x91/0xda
 [<c0163939>] open_namei+0x15a/0x5b0
 [<c015326c>] do_filp_open+0x22/0x39
 [<c01541a8>] do_sys_open+0x40/0xbc
 [<c015424d>] sys_open+0x13/0x15
 [<c02f875d>] sysenter_past_esp+0x56/0x8d

-- 
/Evgeniy


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] jfs: possible deadlocks - continue
  2006-06-05  5:00   ` Evgeniy Dushistov
@ 2006-06-05 11:57     ` Dave Kleikamp
  0 siblings, 0 replies; 4+ messages in thread
From: Dave Kleikamp @ 2006-06-05 11:57 UTC (permalink / raw)
  To: Evgeniy Dushistov
  Cc: Ingo Molnar, jfs-discussion, linux-kernel, linux-fsdevel

On Mon, 2006-06-05 at 09:00 +0400, Evgeniy Dushistov wrote:
> On Sun, Jun 04, 2006 at 04:49:56PM -0500, Dave Kleikamp wrote:
> > On Sun, 2006-06-04 at 19:44 +0400, Evgeniy Dushistov wrote:
> > > I should add that this happened during boot, when root jfs
> > > file system become from ro->rw
> > > 
> > > I look at code, and see that
> > > 1)locks wasn't release in the opposite order in which
> > > they were taken
> > 
> > Why does this matter?
> > 
> Like "3" it make code more understandable,
> reader of code don't have to think why
> code looks like:
> lock A, lock B unlock A, unlock B
> while a normal practice in kernel:
> lock A, lock B unlock B unlock A
> the goal of change just simplicity and clearness,
> and nothing more.

Well, when the locks are released at one place, I don't buy that it make
the code any easier to understand.  However, I understand that it makes
it easier for the validator to do its job, so I'm happy with all three
parts of the patch.  I'll push it to the -mm tree.

> 
> > I think the warning needs to be fixed by introducing mutex_lock_nested
> > in some places.  I'll take a look at it.
> > 
> To avoid incomprehension, previous patch just make code more
> understandable and unlock mutex on "error path", it doesn't fix
> this warning and I have no idea why it happend:

I understand the warning.  jfs is taking the same mutex (commit_mutex)
on two inodes, which has the potential to for two threads to deadlock.
I believe the order the locks are taken is safe (always taking the
parent first).  In fact, I think any operations involving more than one
inode are already protected by i_mutex on the parents.  I'm sure I can
fix it with mutex_lock_nested.

> ====================================
> [ BUG: possible deadlock detected! ]
> ------------------------------------
> mount/5587 is trying to acquire lock:
>  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> 
> but task is already holding lock:
>  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
> 
> which could potentially lead to deadlocks!
> 
> other info that might help us debug this:
> 2 locks held by mount/5587:
>  #0:  (&inode->i_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15
>  #1:  (&jfs_ip->commit_mutex){--..}, at: [<c02f7096>] mutex_lock+0x12/0x15

Thanks,
Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-06-05 11:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-04 15:44 [PATCH] jfs: possible deadlocks - continue Evgeniy Dushistov
2006-06-04 21:49 ` Dave Kleikamp
2006-06-05  5:00   ` Evgeniy Dushistov
2006-06-05 11:57     ` Dave Kleikamp

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).