* [Cluster-devel] Fix for unlink deadlock
@ 2007-01-29 23:13 Russell Cattelan
2007-01-30 13:18 ` Steven Whitehouse
0 siblings, 1 reply; 5+ messages in thread
From: Russell Cattelan @ 2007-01-29 23:13 UTC (permalink / raw)
To: cluster-devel.redhat.com
Note the current glock code in the tree is completely busted
and will dead lock almost immediately.
I have reverted several changes to glock.c and have
tested this patch with the older glock code.
dbench will now run through to completion.
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217356
--
Russell Cattelan <cattelan@redhat.com>
-------------- next part --------------
commit 83f2406e817e4964e4d0a25454da54e73005bf43
Author: Russell Cattelan <cattelan@xenon.msp.redhat.com>
Date: Mon Jan 29 17:06:06 2007 -0600
[GFS2] Fix unlink deadlocks
Move the glock acquisition to outside of the transactions.
Lock odering must be preserved in order to prevent ABBA
deadlocks. The current gfs2_change_nlink code would tries
to grab the glock after having started a transaction and thus is holding
the log lock. This is inconsistent with other code paths in
gfs that grab the resource group glock prior to staring
a tranactions.
One problem with this fix is that the resource group
lock is always grabbed now even if the inode still has
ref count and can not be marked for unlink.
Signed-off-by: Russell Cattelan <cattelan@redhat.com>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 88fcfb4..0d6831a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -280,50 +280,6 @@ out:
return error;
}
-static int gfs2_change_nlink_i(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
- struct gfs2_inode *rindex = GFS2_I(sdp->sd_rindex);
- struct gfs2_glock *ri_gl = rindex->i_gl;
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh, rg_gh;
- int existing, error;
-
- /* if we come from rename path, we could have the lock already */
- existing = gfs2_glock_is_locked_by_me(ri_gl);
- if (!existing) {
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto out;
- }
-
- /* find the matching rgd */
- error = -EIO;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
- if (!rgd)
- goto out_norgrp;
-
- /*
- * Eventually we may want to move rgd(s) to a linked list
- * and piggyback the free logic into one of gfs2 daemons
- * to gain some performance.
- */
- if (!rgd->rd_gl || !gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
- if (error)
- goto out_norgrp;
-
- gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
- gfs2_glock_dq_uninit(&rg_gh);
- }
-
-out_norgrp:
- if (!existing)
- gfs2_glock_dq_uninit(&ri_gh);
-out:
- return error;
-}
-
/**
* gfs2_change_nlink - Change nlink count on inode
* @ip: The GFS2 inode
@@ -365,7 +321,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
mark_inode_dirty(&ip->i_inode);
if (ip->i_inode.i_nlink == 0)
- error = gfs2_change_nlink_i(ip);
+ gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
return error;
}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 5591f89..f40a848 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
+
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[4], r_gh;
+ struct gfs2_holder ghs[5], r_gh;
+ struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
int alloc_required;
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (nip) {
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
num_gh++;
+ /* grab the resource lock for unlink flag twiddling
+ * this is the case of the target file already existing
+ * so we unlink before doing the rename
+ */
+ nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+ if (nrgd)
+ gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
error = gfs2_glock_nq_m(num_gh, ghs);
^ permalink raw reply related [flat|nested] 5+ messages in thread* [Cluster-devel] Fix for unlink deadlock
2007-01-29 23:13 [Cluster-devel] Fix for unlink deadlock Russell Cattelan
@ 2007-01-30 13:18 ` Steven Whitehouse
2007-02-01 0:17 ` Wendy Cheng
0 siblings, 1 reply; 5+ messages in thread
From: Steven Whitehouse @ 2007-01-30 13:18 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
On Mon, 2007-01-29 at 17:13 -0600, Russell Cattelan wrote:
> Note the current glock code in the tree is completely busted
> and will dead lock almost immediately.
>
> I have reverted several changes to glock.c and have
> tested this patch with the older glock code.
> dbench will now run through to completion.
>
>
> https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217356
Now applied to the -nmw git tree. I think the way to get around the
problem with not knowing whether the link count will hit zero is just to
remove the code which changes the link count from gfs2_change_nlink and
to turn that into a function which just syncs the current link count to
the on-disk inode.
It would also solve the problem of needing to work out whether to use
inc_nlink() or dec_nlink() to make the change to the link count. Due to
the forthcoming read-only bind mounts work, we must always use the
macros to change the link count rather than doing it directly.
Steve.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Cluster-devel] Fix for unlink deadlock
2007-01-30 13:18 ` Steven Whitehouse
@ 2007-02-01 0:17 ` Wendy Cheng
2007-02-01 0:22 ` Wendy Cheng
2007-02-01 0:46 ` Russell Cattelan
0 siblings, 2 replies; 5+ messages in thread
From: Wendy Cheng @ 2007-02-01 0:17 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Tue, 2007-01-30 at 13:18 +0000, Steven Whitehouse wrote:
> Hi,
>
> On Mon, 2007-01-29 at 17:13 -0600, Russell Cattelan wrote:
> > Note the current glock code in the tree is completely busted
> > and will dead lock almost immediately.
> >
> > I have reverted several changes to glock.c and have
> > tested this patch with the older glock code.
> > dbench will now run through to completion.
> >
> >
> > https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217356
>
> Now applied to the -nmw git tree.
hmm.. just untar Steve's git tree he sent me this morning few minutes
ago and spot this - sorry I should have read this post earlier ... Look
to me this will deadlock gfs2_rename() *again* (since it tries to grab
rindex lock multiple times). Will test this out tomorrow when I get the
connectathon cluster up.
-- Wendy
> I think the way to get around the
> problem with not knowing whether the link count will hit zero is just to
> remove the code which changes the link count from gfs2_change_nlink and
> to turn that into a function which just syncs the current link count to
> the on-disk inode.
>
> It would also solve the problem of needing to work out whether to use
> inc_nlink() or dec_nlink() to make the change to the link count. Due to
> the forthcoming read-only bind mounts work, we must always use the
> macros to change the link count rather than doing it directly.
>
> Steve.
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Cluster-devel] Fix for unlink deadlock
2007-02-01 0:17 ` Wendy Cheng
@ 2007-02-01 0:22 ` Wendy Cheng
2007-02-01 0:46 ` Russell Cattelan
1 sibling, 0 replies; 5+ messages in thread
From: Wendy Cheng @ 2007-02-01 0:22 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Wed, 2007-01-31 at 19:17 -0500, Wendy Cheng wrote:
> On Tue, 2007-01-30 at 13:18 +0000, Steven Whitehouse wrote:
> > Hi,
> >
> > On Mon, 2007-01-29 at 17:13 -0600, Russell Cattelan wrote:
> > > Note the current glock code in the tree is completely busted
> > > and will dead lock almost immediately.
> > >
> > > I have reverted several changes to glock.c and have
> > > tested this patch with the older glock code.
> > > dbench will now run through to completion.
> > >
> > >
> > > https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217356
> >
> > Now applied to the -nmw git tree.
>
> hmm.. just untar Steve's git tree he sent me this morning few minutes
> ago and spot this - sorry I should have read this post earlier ... Look
> to me this will deadlock gfs2_rename() *again* (since it tries to grab
> rindex lock multiple times). Will test this out tomorrow when I get the
> connectathon cluster up.
>
>
BTW, Is it too late to NACK this patch ? :) ... Wendy
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Cluster-devel] Fix for unlink deadlock
2007-02-01 0:17 ` Wendy Cheng
2007-02-01 0:22 ` Wendy Cheng
@ 2007-02-01 0:46 ` Russell Cattelan
1 sibling, 0 replies; 5+ messages in thread
From: Russell Cattelan @ 2007-02-01 0:46 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Wed, 2007-01-31 at 19:17 -0500, Wendy Cheng wrote:
> On Tue, 2007-01-30 at 13:18 +0000, Steven Whitehouse wrote:
> > Hi,
> >
> > On Mon, 2007-01-29 at 17:13 -0600, Russell Cattelan wrote:
> > > Note the current glock code in the tree is completely busted
> > > and will dead lock almost immediately.
> > >
> > > I have reverted several changes to glock.c and have
> > > tested this patch with the older glock code.
> > > dbench will now run through to completion.
> > >
> > >
> > > https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=217356
> >
> > Now applied to the -nmw git tree.
>
> hmm.. just untar Steve's git tree he sent me this morning few minutes
> ago and spot this - sorry I should have read this post earlier ... Look
> to me this will deadlock gfs2_rename() *again* (since it tries to grab
> rindex lock multiple times). Will test this out tomorrow when I get the
> connectathon cluster up.
>
I never understood what sequence of events that you found that would
cause the recursive locking, but I tried to set it up such that rename
will only grab the rsgr lock once and outside log locks.
I tested the touch foo foo1 ; mv foo foo1 scenario and it works.
>
> -- Wendy
>
>
> > I think the way to get around the
> > problem with not knowing whether the link count will hit zero is just to
> > remove the code which changes the link count from gfs2_change_nlink and
> > to turn that into a function which just syncs the current link count to
> > the on-disk inode.
> >
> > It would also solve the problem of needing to work out whether to use
> > inc_nlink() or dec_nlink() to make the change to the link count. Due to
> > the forthcoming read-only bind mounts work, we must always use the
> > macros to change the link count rather than doing it directly.
> >
> > Steve.
> >
> >
--
Russell Cattelan <cattelan@redhat.com>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-02-01 0:46 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-29 23:13 [Cluster-devel] Fix for unlink deadlock Russell Cattelan
2007-01-30 13:18 ` Steven Whitehouse
2007-02-01 0:17 ` Wendy Cheng
2007-02-01 0:22 ` Wendy Cheng
2007-02-01 0:46 ` Russell Cattelan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).