* [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic
@ 2008-08-12 18:39 Bob Peterson
2008-08-13 10:18 ` Steven Whitehouse
0 siblings, 1 reply; 5+ messages in thread
From: Bob Peterson @ 2008-08-12 18:39 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
This patch is a revision of the one I sent earlier.
This patch fixes a problem whereby simultaneous unlink, rmdir,
rename and link operations (e.g. rm -fR *) from multiple nodes
on the same GFS2 file system can cause kernel panics, hangs,
and/or memory corruption. It also gets rid of all the non-rgrp
calls to gfs2_glock_nq_m.
Regards,
Bob Peterson
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
fs/gfs2/ops_inode.c | 56 ++++++++++++++++++++++++++++++++++----------------
1 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f7..35f6f03 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
@@ -245,8 +249,10 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(dip);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
-out:
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_glock_dq(ghs);
+out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
- goto out_rgrp;
+ goto out_gunlock;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
+out_gunlock:
gfs2_glock_dq(ghs + 2);
out_rgrp:
gfs2_holder_uninit(ghs + 2);
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_holder ri_gh;
int error;
-
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
return error;
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
- error = gfs2_glock_nq_m(3, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ if (error)
+ goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(3, ghs);
-out:
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs + 2);
+out_rgrp:
gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -639,9 +657,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
- error = gfs2_glock_nq_m(num_gh, ghs);
- if (error)
- goto out_uninit;
+ for (x = 0; x < num_gh; x++) {
+ error = gfs2_glock_nq(ghs + x);
+ if (error)
+ goto out_gunlock;
+ }
/* Check out the old directory */
@@ -804,10 +824,10 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(ndip);
out_gunlock:
- gfs2_glock_dq_m(num_gh, ghs);
-out_uninit:
- for (x = 0; x < num_gh; x++)
+ while (x--) {
+ gfs2_glock_dq(ghs + x);
gfs2_holder_uninit(ghs + x);
+ }
out_gunlock_r:
if (dir_rename)
gfs2_glock_dq_uninit(&r_gh);
^ permalink raw reply related [flat|nested] 5+ messages in thread* [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic
2008-08-12 18:39 [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic Bob Peterson
@ 2008-08-13 10:18 ` Steven Whitehouse
0 siblings, 0 replies; 5+ messages in thread
From: Steven Whitehouse @ 2008-08-13 10:18 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
Now in the -nmw git tree. Thanks,
Steve.
On Tue, 2008-08-12 at 13:39 -0500, Bob Peterson wrote:
> Hi,
>
> This patch is a revision of the one I sent earlier.
>
> This patch fixes a problem whereby simultaneous unlink, rmdir,
> rename and link operations (e.g. rm -fR *) from multiple nodes
> on the same GFS2 file system can cause kernel panics, hangs,
> and/or memory corruption. It also gets rid of all the non-rgrp
> calls to gfs2_glock_nq_m.
>
> Regards,
>
> Bob Peterson
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> --
> fs/gfs2/ops_inode.c | 56 ++++++++++++++++++++++++++++++++++----------------
> 1 files changed, 38 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
> index e2c62f7..35f6f03 100644
> --- a/fs/gfs2/ops_inode.c
> +++ b/fs/gfs2/ops_inode.c
> @@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
> gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
> gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
>
> - error = gfs2_glock_nq_m(2, ghs);
> + error = gfs2_glock_nq(ghs); /* parent */
> if (error)
> - goto out;
> + goto out_parent;
> +
> + error = gfs2_glock_nq(ghs + 1); /* child */
> + if (error)
> + goto out_child;
>
> error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
> if (error)
> @@ -245,8 +249,10 @@ out_alloc:
> if (alloc_required)
> gfs2_alloc_put(dip);
> out_gunlock:
> - gfs2_glock_dq_m(2, ghs);
> -out:
> + gfs2_glock_dq(ghs + 1);
> +out_child:
> + gfs2_glock_dq(ghs);
> +out_parent:
> gfs2_holder_uninit(ghs);
> gfs2_holder_uninit(ghs + 1);
> if (!error) {
> @@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
>
> error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
> if (error)
> - goto out_rgrp;
> + goto out_gunlock;
>
> error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
> if (error)
> @@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
>
> out_end_trans:
> gfs2_trans_end(sdp);
> +out_gunlock:
> gfs2_glock_dq(ghs + 2);
> out_rgrp:
> gfs2_holder_uninit(ghs + 2);
> @@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
> struct gfs2_holder ri_gh;
> int error;
>
> -
> error = gfs2_rindex_hold(sdp, &ri_gh);
> if (error)
> return error;
> @@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
> rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
> gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
>
> - error = gfs2_glock_nq_m(3, ghs);
> + error = gfs2_glock_nq(ghs); /* parent */
> if (error)
> - goto out;
> + goto out_parent;
> +
> + error = gfs2_glock_nq(ghs + 1); /* child */
> + if (error)
> + goto out_child;
> +
> + error = gfs2_glock_nq(ghs + 2); /* rgrp */
> + if (error)
> + goto out_rgrp;
>
> error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
> if (error)
> @@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
> gfs2_trans_end(sdp);
>
> out_gunlock:
> - gfs2_glock_dq_m(3, ghs);
> -out:
> - gfs2_holder_uninit(ghs);
> - gfs2_holder_uninit(ghs + 1);
> + gfs2_glock_dq(ghs + 2);
> +out_rgrp:
> gfs2_holder_uninit(ghs + 2);
> + gfs2_glock_dq(ghs + 1);
> +out_child:
> + gfs2_holder_uninit(ghs + 1);
> + gfs2_glock_dq(ghs);
> +out_parent:
> + gfs2_holder_uninit(ghs);
> gfs2_glock_dq_uninit(&ri_gh);
> return error;
> }
> @@ -639,9 +657,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
> gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
> }
>
> - error = gfs2_glock_nq_m(num_gh, ghs);
> - if (error)
> - goto out_uninit;
> + for (x = 0; x < num_gh; x++) {
> + error = gfs2_glock_nq(ghs + x);
> + if (error)
> + goto out_gunlock;
> + }
>
> /* Check out the old directory */
>
> @@ -804,10 +824,10 @@ out_alloc:
> if (alloc_required)
> gfs2_alloc_put(ndip);
> out_gunlock:
> - gfs2_glock_dq_m(num_gh, ghs);
> -out_uninit:
> - for (x = 0; x < num_gh; x++)
> + while (x--) {
> + gfs2_glock_dq(ghs + x);
> gfs2_holder_uninit(ghs + x);
> + }
> out_gunlock_r:
> if (dir_rename)
> gfs2_glock_dq_uninit(&r_gh);
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic
@ 2008-08-12 3:41 Bob Peterson
2008-08-12 8:53 ` Steven Whitehouse
0 siblings, 1 reply; 5+ messages in thread
From: Bob Peterson @ 2008-08-12 3:41 UTC (permalink / raw)
To: cluster-devel.redhat.com
This patch fixes a problem whereby simultaneous delete operations
(e.g. rm -fR *) from multiple nodes on the same GFS2 file system
can cause kernel panics, hangs, and/or memory corruption.
Regards,
Bob Peterson
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
--
fs/gfs2/ops_inode.c | 24 ++++++------------------
1 files changed, 6 insertions(+), 18 deletions(-)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f7..a072c9a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -288,25 +288,17 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
- error = gfs2_glock_nq(ghs); /* parent */
- if (error)
- goto out_parent;
-
- error = gfs2_glock_nq(ghs + 1); /* child */
- if (error)
- goto out_child;
-
- error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
- goto out_rgrp;
+ goto out;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
- goto out_rgrp;
+ goto out;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
- goto out_rgrp;
+ goto out;
error = gfs2_dir_del(dip, &dentry->d_name);
if (error)
@@ -316,14 +308,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
- gfs2_glock_dq(ghs + 2);
-out_rgrp:
+out:
+ gfs2_glock_dq_m(3, ghs);
gfs2_holder_uninit(ghs + 2);
- gfs2_glock_dq(ghs + 1);
-out_child:
gfs2_holder_uninit(ghs + 1);
- gfs2_glock_dq(ghs);
-out_parent:
gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
^ permalink raw reply related [flat|nested] 5+ messages in thread* [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic
2008-08-12 3:41 Bob Peterson
@ 2008-08-12 8:53 ` Steven Whitehouse
2008-08-12 9:10 ` Steven Whitehouse
0 siblings, 1 reply; 5+ messages in thread
From: Steven Whitehouse @ 2008-08-12 8:53 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
This looks like the correct lock order to me. I guess it must be the
"other" process thats violating the correct order?
Steve.
On Mon, 2008-08-11 at 22:41 -0500, Bob Peterson wrote:
> This patch fixes a problem whereby simultaneous delete operations
> (e.g. rm -fR *) from multiple nodes on the same GFS2 file system
> can cause kernel panics, hangs, and/or memory corruption.
>
> Regards,
>
> Bob Peterson
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> --
> fs/gfs2/ops_inode.c | 24 ++++++------------------
> 1 files changed, 6 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
> index e2c62f7..a072c9a 100644
> --- a/fs/gfs2/ops_inode.c
> +++ b/fs/gfs2/ops_inode.c
> @@ -288,25 +288,17 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
> gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
>
>
> - error = gfs2_glock_nq(ghs); /* parent */
> - if (error)
> - goto out_parent;
> -
> - error = gfs2_glock_nq(ghs + 1); /* child */
> - if (error)
> - goto out_child;
> -
> - error = gfs2_glock_nq(ghs + 2); /* rgrp */
> + error = gfs2_glock_nq_m(3, ghs);
> if (error)
> - goto out_rgrp;
> + goto out;
>
> error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
> if (error)
> - goto out_rgrp;
> + goto out;
>
> error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
> if (error)
> - goto out_rgrp;
> + goto out;
>
> error = gfs2_dir_del(dip, &dentry->d_name);
> if (error)
> @@ -316,14 +308,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
>
> out_end_trans:
> gfs2_trans_end(sdp);
> - gfs2_glock_dq(ghs + 2);
> -out_rgrp:
> +out:
> + gfs2_glock_dq_m(3, ghs);
> gfs2_holder_uninit(ghs + 2);
> - gfs2_glock_dq(ghs + 1);
> -out_child:
> gfs2_holder_uninit(ghs + 1);
> - gfs2_glock_dq(ghs);
> -out_parent:
> gfs2_holder_uninit(ghs);
> gfs2_glock_dq_uninit(&ri_gh);
> return error;
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic
2008-08-12 8:53 ` Steven Whitehouse
@ 2008-08-12 9:10 ` Steven Whitehouse
0 siblings, 0 replies; 5+ messages in thread
From: Steven Whitehouse @ 2008-08-12 9:10 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
Actually now I read the code again, I think its your diff thats
backwards :-) Can you send me one the other way around?
Steve.
On Tue, 2008-08-12 at 09:53 +0100, Steven Whitehouse wrote:
> Hi,
>
> This looks like the correct lock order to me. I guess it must be the
> "other" process thats violating the correct order?
>
> Steve.
>
> On Mon, 2008-08-11 at 22:41 -0500, Bob Peterson wrote:
> > This patch fixes a problem whereby simultaneous delete operations
> > (e.g. rm -fR *) from multiple nodes on the same GFS2 file system
> > can cause kernel panics, hangs, and/or memory corruption.
> >
> > Regards,
> >
> > Bob Peterson
> >
> > Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> > --
> > fs/gfs2/ops_inode.c | 24 ++++++------------------
> > 1 files changed, 6 insertions(+), 18 deletions(-)
> >
> > diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
> > index e2c62f7..a072c9a 100644
> > --- a/fs/gfs2/ops_inode.c
> > +++ b/fs/gfs2/ops_inode.c
> > @@ -288,25 +288,17 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
> > gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
> >
> >
> > - error = gfs2_glock_nq(ghs); /* parent */
> > - if (error)
> > - goto out_parent;
> > -
> > - error = gfs2_glock_nq(ghs + 1); /* child */
> > - if (error)
> > - goto out_child;
> > -
> > - error = gfs2_glock_nq(ghs + 2); /* rgrp */
> > + error = gfs2_glock_nq_m(3, ghs);
> > if (error)
> > - goto out_rgrp;
> > + goto out;
> >
> > error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
> > if (error)
> > - goto out_rgrp;
> > + goto out;
> >
> > error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
> > if (error)
> > - goto out_rgrp;
> > + goto out;
> >
> > error = gfs2_dir_del(dip, &dentry->d_name);
> > if (error)
> > @@ -316,14 +308,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
> >
> > out_end_trans:
> > gfs2_trans_end(sdp);
> > - gfs2_glock_dq(ghs + 2);
> > -out_rgrp:
> > +out:
> > + gfs2_glock_dq_m(3, ghs);
> > gfs2_holder_uninit(ghs + 2);
> > - gfs2_glock_dq(ghs + 1);
> > -out_child:
> > gfs2_holder_uninit(ghs + 1);
> > - gfs2_glock_dq(ghs);
> > -out_parent:
> > gfs2_holder_uninit(ghs);
> > gfs2_glock_dq_uninit(&ri_gh);
> > return error;
> >
> >
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-08-13 10:18 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-12 18:39 [Cluster-devel] [GFS2 Patch] bz458289: rm on multiple nodes causes panic Bob Peterson
2008-08-13 10:18 ` Steven Whitehouse
-- strict thread matches above, loose matches on Subject: below --
2008-08-12 3:41 Bob Peterson
2008-08-12 8:53 ` Steven Whitehouse
2008-08-12 9:10 ` Steven Whitehouse
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).