From mboxrd@z Thu Jan 1 00:00:00 1970 From: Fabio M. Di Nitto Date: Tue, 26 Jan 2010 09:13:18 +0100 Subject: [Cluster-devel] GFS2: Wait for unlock completion on umount In-Reply-To: <1263461489.2611.11.camel@localhost> References: <1263461489.2611.11.camel@localhost> Message-ID: <4B5EA41E.9060500@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit still not good. setup: 8 nodes cluster (node1-4 x86, node5-8 x86_64) mounting one partition -> OK umounting is OOPS?orama. http://fabbione.fedorapeople.org/oops.tar.bz2 5 out of 8 nodes do OOPS (node1/2/3 are ok and can continue mounting again). This happens at the very first iteration. Kernel is Fedora rawhide (2.6.33-rcX...) + -nwm fixes. Fabio On 1/14/2010 10:31 AM, Steven Whitehouse wrote: > > This patch adds a wait on umount between the point at which we > dispose of all glocks and the point at which we unmount the > lock protocol. This ensures that we've received all the replies > to our unlock requests before we stop the locking. > > Signed-off-by: Steven Whitehouse > Reported-by: Fabio M. Di Nitto > > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index f93f9b9..b8025e5 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -543,6 +543,8 @@ struct gfs2_sbd { > struct gfs2_holder sd_live_gh; > struct gfs2_glock *sd_rename_gl; > struct gfs2_glock *sd_trans_gl; > + wait_queue_head_t sd_glock_wait; > + atomic_t sd_glock_disposal; > > /* Inode Stuff */ > > diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c > index 094839e..484411c 100644 > --- a/fs/gfs2/lock_dlm.c > +++ b/fs/gfs2/lock_dlm.c > @@ -21,6 +21,7 @@ static void gdlm_ast(void *arg) > { > struct gfs2_glock *gl = arg; > unsigned ret = gl->gl_state; > + struct gfs2_sbd *sdp = gl->gl_sbd; > > BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); > > @@ -33,6 +34,8 @@ static void gdlm_ast(void *arg) > kmem_cache_free(gfs2_glock_aspace_cachep, gl); > else > kmem_cache_free(gfs2_glock_cachep, gl); > + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) > + wake_up(&sdp->sd_glock_wait); > return; > case -DLM_ECANCEL: /* Cancel while getting lock */ > ret |= LM_OUT_CANCELED; > @@ -170,7 +173,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, > static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) > { > struct gfs2_glock *gl = ptr; > - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; > + struct gfs2_sbd *sdp = gl->gl_sbd; > + struct lm_lockstruct *ls = &sdp->sd_lockstruct; > int error; > > if (gl->gl_lksb.sb_lkid == 0) { > @@ -186,6 +190,7 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) > (unsigned long long)gl->gl_name.ln_number, error); > return; > } > + atomic_inc(&sdp->sd_glock_disposal); > } > > static void gdlm_cancel(struct gfs2_glock *gl) > diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c > index 968a99f..9baa566 100644 > --- a/fs/gfs2/ops_fstype.c > +++ b/fs/gfs2/ops_fstype.c > @@ -81,6 +81,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) > > gfs2_tune_init(&sdp->sd_tune); > > + init_waitqueue_head(&sdp->sd_glock_wait); > + atomic_set(&sdp->sd_glock_disposal, 0); > spin_lock_init(&sdp->sd_statfs_spin); > > spin_lock_init(&sdp->sd_rindex_spin); > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c > index c008b08..e2bf19f 100644 > --- a/fs/gfs2/super.c > +++ b/fs/gfs2/super.c > @@ -21,6 +21,7 @@ > #include > #include > #include > +#include > > #include "gfs2.h" > #include "incore.h" > @@ -860,6 +861,8 @@ restart: > /* Take apart glock structures and buffer lists */ > invalidate_inodes(sdp->sd_vfs); > gfs2_gl_hash_clear(sdp); > + /* Wait for dlm to reply to all our unlock requests */ > + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); > /* Unmount the locking protocol */ > gfs2_lm_unmount(sdp); > >