[Cluster-devel] [GFS2] Put back semaphore to avoid umount problem

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem
@ 2007-01-26  9:08 Steven Whitehouse
  2007-01-26 10:36 ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Whitehouse @ 2007-01-26  9:08 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From b99f08ce80721ea5ad8013ac6aed8c9a1dd517b7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 26 Jan 2007 09:02:09 -0500
Subject: [PATCH] [GFS2] Put back semaphore to avoid umount problem

Dave Teigland fixed this bug a while back, but I managed to mistakenly
remove the semaphore during later development. It is required to avoid
the list of inodes changing during an invalidate_inodes call. I have
made it an rwsem since the read side will be taken frequently during
normal filesystem operation. The write site will only happen during
umount of the file system.

Also the bug only triggers when using the DLM lock manager and only then
under certain conditions as its timing related.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Teigland <teigland@redhat.com>
---
 fs/gfs2/glock.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c070ede..679aa25 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -20,6 +20,7 @@ #include <linux/gfs2_ondisk.h>
 #include <linux/list.h>
 #include <linux/lm_interface.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -45,6 +46,7 @@ static int dump_glock(struct gfs2_glock 
 static int dump_inode(struct gfs2_inode *ip);
 static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
+static DECLARE_RWSEM(gfs2_umount_flush_sem);
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
@@ -190,6 +192,7 @@ int gfs2_glock_put(struct gfs2_glock *gl
 	int rv = 0;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 
+	down_read(&gfs2_umount_flush_sem);
 	write_lock(gl_lock_addr(gl->gl_hash));
 	if (atomic_dec_and_test(&gl->gl_ref)) {
 		hlist_del(&gl->gl_list);
@@ -207,6 +210,7 @@ int gfs2_glock_put(struct gfs2_glock *gl
 	}
 	write_unlock(gl_lock_addr(gl->gl_hash));
 out:
+	up_read(&gfs2_umount_flush_sem);
 	return rv;
 }
 
@@ -1828,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd 
 			t = jiffies;
 		}
 
+		down_write(&gfs2_umount_flush_sem);
 		invalidate_inodes(sdp->sd_vfs);
+		up_write(&gfs2_umount_flush_sem);
 		msleep(10);
 	}
 }
-- 
1.4.1





^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem
  2007-01-26  9:08 [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem Steven Whitehouse
@ 2007-01-26 10:36 ` Steven Whitehouse
  2007-01-26 15:37   ` David Teigland
  0 siblings, 1 reply; 4+ messages in thread
From: Steven Whitehouse @ 2007-01-26 10:36 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Hmm. This doesn't seem to be quite the thing:

[  123.148471] kobject dm-3: registering. parent: <NULL>, set: gfs2
[  123.148621] kobject_uevent_env
[  123.148626] fill_kobj_path: path = '/fs/gfs2/dm-3'
[  123.148925] GFS2: fsid=: Trying to join cluster "lock_nolock", "dm-3"
[  123.148973] GFS2: fsid=dm-3.0: Joined cluster. Now mounting FS...
[  123.364244] GFS2: fsid=dm-3.0: jid=0, already locked for use
[  123.364251] GFS2: fsid=dm-3.0: jid=0: Looking at journal...
[  123.414919] GFS2: fsid=dm-3.0: jid=0: Done
[  123.476489] kobject_uevent_env
[  123.476498] fill_kobj_path: path = '/block/dm-3'
[60501.642323] kobject_uevent_env
[60501.642337] fill_kobj_path: path = '/block/dm-3'
[60509.936916] 
[60509.936919] =======================================================
[60509.936939] [ INFO: possible circular locking dependency detected ]
[60509.936952] 2.6.20-rc5 #128
[60509.936962] -------------------------------------------------------
[60509.936977] umount/7340 is trying to acquire lock:
[60509.936989]  (iprune_mutex){--..}, at: [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.937032] 
[60509.937035] but task is already holding lock:
[60509.937057]  (gfs2_umount_flush_sem){----}, at: [<ffffffff803102c5>] gfs2_gl_hash_clear+0x4bd/0x4fa
[60509.937117] 
[60509.937121] which lock already depends on the new lock.
[60509.937140] 
[60509.937159] 
[60509.937161] the existing dependency chain (in reverse order) is:
[60509.937188] 
[60509.937191] -> #1 (gfs2_umount_flush_sem){----}:
[60509.937227]        [<ffffffff80243069>] __lock_acquire+0x932/0xa55
[60509.937288]        [<ffffffff8024346f>] lock_acquire+0x4c/0x65
[60509.937351]        [<ffffffff8030fc07>] gfs2_glock_put+0x1d/0x160
[60509.937403]        [<ffffffff8023ed77>] down_read+0x39/0x45
[60509.937437]        [<ffffffff8030fc07>] gfs2_glock_put+0x1d/0x160
[60509.937472]        [<ffffffff8031ea83>] gfs2_clear_inode+0x40/0x65
[60509.937504]        [<ffffffff8029138c>] clear_inode+0xe0/0x139
[60509.937542]        [<ffffffff802916bd>] dispose_list+0x43/0xe9
[60509.937573]        [<ffffffff80291a2b>] invalidate_inodes+0xd2/0xf0
[60509.937605]        [<ffffffff802805e8>] generic_shutdown_super+0x3a/0xd8
[60509.937638]        [<ffffffff802806ac>] kill_block_super+0x26/0x3b
[60509.937669]        [<ffffffff8031b2b4>] gfs2_kill_sb+0x9/0xb
[60509.937700]        [<ffffffff80280797>] deactivate_super+0x6f/0x84
[60509.937731]        [<ffffffff80293558>] mntput_no_expire+0x56/0x87
[60509.937763]        [<ffffffff80285439>] path_release_on_umount+0x1d/0x21
[60509.937795]        [<ffffffff80293d8c>] sys_umount+0x21f/0x254
[60509.937826]        [<ffffffff80254b8c>] audit_syscall_entry+0x148/0x17e
[60509.937858]        [<ffffffff8020c96f>] syscall_trace_enter+0xb7/0xbb
[60509.937891]        [<ffffffff80209825>] tracesys+0xdc/0xe1
[60509.937922]        [<ffffffffffffffff>] 0xffffffffffffffff
[60509.937966] 
[60509.937967] -> #0 (iprune_mutex){--..}:
[60509.937995]        [<ffffffff80242f7b>] __lock_acquire+0x844/0xa55
[60509.938027]        [<ffffffff8024346f>] lock_acquire+0x4c/0x65
[60509.938058]        [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.938089]        [<ffffffff80533d99>] __mutex_lock_slowpath+0xef/0x279
[60509.938121]        [<ffffffff802422a3>] trace_hardirqs_on+0x11b/0x13f
[60509.938153]        [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.938183]        [<ffffffff80291985>] invalidate_inodes+0x2c/0xf0
[60509.938215]        [<ffffffff803102cd>] gfs2_gl_hash_clear+0x4c5/0x4fa
[60509.938247]        [<ffffffff8031e7fe>] gfs2_put_super+0x1db/0x1fb
[60509.938279]        [<ffffffff80280611>] generic_shutdown_super+0x63/0xd8
[60509.938311]        [<ffffffff802806ac>] kill_block_super+0x26/0x3b
[60509.938342]        [<ffffffff8031b2b4>] gfs2_kill_sb+0x9/0xb
[60509.938373]        [<ffffffff80280797>] deactivate_super+0x6f/0x84
[60509.938404]        [<ffffffff80293558>] mntput_no_expire+0x56/0x87
[60509.938435]        [<ffffffff80285439>] path_release_on_umount+0x1d/0x21
[60509.938467]        [<ffffffff80293d8c>] sys_umount+0x21f/0x254
[60509.938498]        [<ffffffff80254b8c>] audit_syscall_entry+0x148/0x17e
[60509.938530]        [<ffffffff8020c96f>] syscall_trace_enter+0xb7/0xbb
[60509.938561]        [<ffffffff80209825>] tracesys+0xdc/0xe1
[60509.938592]        [<ffffffffffffffff>] 0xffffffffffffffff
[60509.938623] 
[60509.938624] other info that might help us debug this:
[60509.938625] 
[60509.938656] 3 locks held by umount/7340:
[60509.938668]  #0:  (&type->s_umount_key#21){----}, at: [<ffffffff8028078f>] deactivate_super+0x67/0x84
[60509.938710]  #1:  (&type->s_lock_key#11){--..}, at: [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.938750]  #2:  (gfs2_umount_flush_sem){----}, at: [<ffffffff803102c5>] gfs2_gl_hash_clear+0x4bd/0x4fa
[60509.938788] 
[60509.938790] stack backtrace:
[60509.938809] 
[60509.938810] Call Trace:
[60509.938834]  [<ffffffff80241764>] print_circular_bug_tail+0x70/0x7b
[60509.938853]  [<ffffffff80242f7b>] __lock_acquire+0x844/0xa55
[60509.938871]  [<ffffffff8024346f>] lock_acquire+0x4c/0x65
[60509.938887]  [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.938903]  [<ffffffff80533d99>] __mutex_lock_slowpath+0xef/0x279
[60509.938921]  [<ffffffff802422a3>] trace_hardirqs_on+0x11b/0x13f
[60509.938939]  [<ffffffff80533f48>] mutex_lock+0x25/0x29
[60509.938955]  [<ffffffff80291985>] invalidate_inodes+0x2c/0xf0
[60509.938974]  [<ffffffff803102cd>] gfs2_gl_hash_clear+0x4c5/0x4fa
[60509.938992]  [<ffffffff8031e7fe>] gfs2_put_super+0x1db/0x1fb
[60509.939009]  [<ffffffff80280611>] generic_shutdown_super+0x63/0xd8
[60509.939026]  [<ffffffff802806ac>] kill_block_super+0x26/0x3b
[60509.939044]  [<ffffffff8031b2b4>] gfs2_kill_sb+0x9/0xb
[60509.939060]  [<ffffffff80280797>] deactivate_super+0x6f/0x84
[60509.939077]  [<ffffffff80293558>] mntput_no_expire+0x56/0x87
[60509.939095]  [<ffffffff80285439>] path_release_on_umount+0x1d/0x21
[60509.939112]  [<ffffffff80293d8c>] sys_umount+0x21f/0x254
[60509.939129]  [<ffffffff80254b8c>] audit_syscall_entry+0x148/0x17e
[60509.939147]  [<ffffffff8020c96f>] syscall_trace_enter+0xb7/0xbb
[60509.939164]  [<ffffffff80209825>] tracesys+0xdc/0xe1
[60509.939180] 
[60509.960496] kobject dm-3: unregistering
[60509.960514] kobject_uevent_env
[60509.960529] fill_kobj_path: path = '/fs/gfs2/dm-3'
[60509.960576] kobject dm-3: cleaning up

Looks like we might have to think again on this one,

Steve.


On Fri, 2007-01-26 at 09:08 +0000, Steven Whitehouse wrote:
> >From b99f08ce80721ea5ad8013ac6aed8c9a1dd517b7 Mon Sep 17 00:00:00 2001
> From: Steven Whitehouse <swhiteho@redhat.com>
> Date: Fri, 26 Jan 2007 09:02:09 -0500
> Subject: [PATCH] [GFS2] Put back semaphore to avoid umount problem
> 
> Dave Teigland fixed this bug a while back, but I managed to mistakenly
> remove the semaphore during later development. It is required to avoid
> the list of inodes changing during an invalidate_inodes call. I have
> made it an rwsem since the read side will be taken frequently during
> normal filesystem operation. The write site will only happen during
> umount of the file system.
> 
> Also the bug only triggers when using the DLM lock manager and only then
> under certain conditions as its timing related.
> 
> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
> Cc: David Teigland <teigland@redhat.com>
> ---
>  fs/gfs2/glock.c |    6 ++++++
>  1 files changed, 6 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index c070ede..679aa25 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -20,6 +20,7 @@ #include <linux/gfs2_ondisk.h>
>  #include <linux/list.h>
>  #include <linux/lm_interface.h>
>  #include <linux/wait.h>
> +#include <linux/rwsem.h>
>  #include <asm/uaccess.h>
>  
>  #include "gfs2.h"
> @@ -45,6 +46,7 @@ static int dump_glock(struct gfs2_glock 
>  static int dump_inode(struct gfs2_inode *ip);
>  static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
>  static void gfs2_glock_drop_th(struct gfs2_glock *gl);
> +static DECLARE_RWSEM(gfs2_umount_flush_sem);
>  
>  #define GFS2_GL_HASH_SHIFT      15
>  #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
> @@ -190,6 +192,7 @@ int gfs2_glock_put(struct gfs2_glock *gl
>  	int rv = 0;
>  	struct gfs2_sbd *sdp = gl->gl_sbd;
>  
> +	down_read(&gfs2_umount_flush_sem);
>  	write_lock(gl_lock_addr(gl->gl_hash));
>  	if (atomic_dec_and_test(&gl->gl_ref)) {
>  		hlist_del(&gl->gl_list);
> @@ -207,6 +210,7 @@ int gfs2_glock_put(struct gfs2_glock *gl
>  	}
>  	write_unlock(gl_lock_addr(gl->gl_hash));
>  out:
> +	up_read(&gfs2_umount_flush_sem);
>  	return rv;
>  }
>  
> @@ -1828,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd 
>  			t = jiffies;
>  		}
>  
> +		down_write(&gfs2_umount_flush_sem);
>  		invalidate_inodes(sdp->sd_vfs);
> +		up_write(&gfs2_umount_flush_sem);
>  		msleep(10);
>  	}
>  }



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem
  2007-01-26 10:36 ` Steven Whitehouse
@ 2007-01-26 15:37   ` David Teigland
  2007-01-29 11:51     ` Steven Whitehouse
  0 siblings, 1 reply; 4+ messages in thread
From: David Teigland @ 2007-01-26 15:37 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Fri, Jan 26, 2007 at 10:36:58AM +0000, Steven Whitehouse wrote:
> Hi,
> 
> Hmm. This doesn't seem to be quite the thing:

Yep, glock_put is being called from within invalidate_inodes, so we're
recursively taking the new sem.  Here's the original comment I added
describing the problem:

/* invalidate_inodes() requires that the sb inodes list
   not change, but an async completion callback for an
   unlock can occur which does glock_put() which
   can call iput() which will change the sb inodes list.
   invalidate_inodes_mutex prevents glock_put()'s during
   an invalidate_inodes() */

So, we're trying to prevent async completions from mucking with the inodes
while we're in invalidate_inodes.  Perhaps we could take the new read sem
inside gfs2_glock_cb which still blocks async completions when we need to
but not in a code path that's called from elsewhere.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem
  2007-01-26 15:37   ` David Teigland
@ 2007-01-29 11:51     ` Steven Whitehouse
  0 siblings, 0 replies; 4+ messages in thread
From: Steven Whitehouse @ 2007-01-29 11:51 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, 2007-01-26 at 09:37 -0600, David Teigland wrote:
> On Fri, Jan 26, 2007 at 10:36:58AM +0000, Steven Whitehouse wrote:
> > Hi,
> > 
> > Hmm. This doesn't seem to be quite the thing:
> 
> Yep, glock_put is being called from within invalidate_inodes, so we're
> recursively taking the new sem.  Here's the original comment I added
> describing the problem:
> 
> /* invalidate_inodes() requires that the sb inodes list
>    not change, but an async completion callback for an
>    unlock can occur which does glock_put() which
>    can call iput() which will change the sb inodes list.
>    invalidate_inodes_mutex prevents glock_put()'s during
>    an invalidate_inodes() */
> 
> So, we're trying to prevent async completions from mucking with the inodes
> while we're in invalidate_inodes.  Perhaps we could take the new read sem
> inside gfs2_glock_cb which still blocks async completions when we need to
> but not in a code path that's called from elsewhere.
> 

Yes, that seems reasonable for now. Patch below (which I've tested and
appears to work fine). It also has the advantage of getting the read
side of this lock out of the glock_put fast path:

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c070ede..6618c11 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -20,6 +20,7 @@ #include <linux/gfs2_ondisk.h>
 #include <linux/list.h>
 #include <linux/lm_interface.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -45,6 +46,7 @@ static int dump_glock(struct gfs2_glock 
 static int dump_inode(struct gfs2_inode *ip);
 static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
+static DECLARE_RWSEM(gfs2_umount_flush_sem);
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
@@ -1578,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsign
 		struct lm_async_cb *async = data;
 		struct gfs2_glock *gl;
 
+		down_read(&gfs2_umount_flush_sem);
 		gl = gfs2_glock_find(sdp, &async->lc_name);
 		if (gfs2_assert_warn(sdp, gl))
 			return;
 		if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
 			gl->gl_req_bh(gl, async->lc_ret);
 		gfs2_glock_put(gl);
+		up_read(&gfs2_umount_flush_sem);
 		return;
 	}
 
@@ -1828,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd 
 			t = jiffies;
 		}
 
+		down_write(&gfs2_umount_flush_sem);
 		invalidate_inodes(sdp->sd_vfs);
+		up_write(&gfs2_umount_flush_sem);
 		msleep(10);
 	}
 }




^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-01-29 11:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-26  9:08 [Cluster-devel] [GFS2] Put back semaphore to avoid umount problem Steven Whitehouse
2007-01-26 10:36 ` Steven Whitehouse
2007-01-26 15:37   ` David Teigland
2007-01-29 11:51     ` Steven Whitehouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).