From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Wed, 19 Sep 2007 13:22:50 +0100 Subject: [Cluster-devel] [PATCH] [GFS2] bz288581 alternate gfs2_iget to avoid looking up inodes being freed In-Reply-To: <20070918183317.GA1533@ether.msp.redhat.com> References: <20070918183317.GA1533@ether.msp.redhat.com> Message-ID: <1190204570.1068.86.camel@quoit> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, Now in the -nmw git tree. Thanks, Steve. On Tue, 2007-09-18 at 13:33 -0500, Benjamin Marzinski wrote: > There is a possible deadlock between two processes on the same node, where one > process is deleting an inode, and another process is looking for allocated but > unused inodes to delete in order to create more space. > > process A does an iput() on inode X, and it's i_count drops to 0. This causes > iput_final() to be called, which puts an inode into state I_FREEING at > generic_delete_inode(). There no point between when iput_final() is called, and > when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is > set, no other process on that node can successfully look up that inode until > the delete finishes. > > process B locks the the resource group for the same inode in get_local_rgrp(), > which is called by gfs2_inplace_reserve_i() > > process A tries to lock the resource group for the inode in > gfs2_dinode_dealloc(), but it's already locked by process B > > process B waits in find_inode for the inode to have the I_FREEING state cleared. > > Deadlock. > > This patch solves the problem by adding an alternative to gfs2_iget(), > gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING > state.o The alternate test function is just like the original one, except that > it fails if the inode is being freed, and sets a skipped flag. The alternate > set function is just like the original, except that it fails if the skipped > flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of > gfs2_iget(). > > Signed-off-by: Benjamin E. Marzinski > plain text document attachment (upstream_288581.patch) > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/dir.c gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c > --- gfs2-2.6-nmw/fs/gfs2/dir.c 2007-09-14 14:00:39.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c 2007-09-18 11:42:04.000000000 -0500 > @@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct ino > inode = gfs2_inode_lookup(dir->i_sb, > be16_to_cpu(dent->de_type), > be64_to_cpu(dent->de_inum.no_addr), > - be64_to_cpu(dent->de_inum.no_formal_ino)); > + be64_to_cpu(dent->de_inum.no_formal_ino), 0); > brelse(bh); > return inode; > } > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.c gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c > --- gfs2-2.6-nmw/fs/gfs2/inode.c 2007-09-14 14:00:39.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c 2007-09-18 11:42:04.000000000 -0500 > @@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct su > return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); > } > > +struct gfs2_skip_data { > + u64 no_addr; > + int skipped; > +}; > + > +static int iget_skip_test(struct inode *inode, void *opaque) > +{ > + struct gfs2_inode *ip = GFS2_I(inode); > + struct gfs2_skip_data *data = opaque; > + > + if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ > + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ > + data->skipped = 1; > + return 0; > + } > + return 1; > + } > + return 0; > +} > + > +static int iget_skip_set(struct inode *inode, void *opaque) > +{ > + struct gfs2_inode *ip = GFS2_I(inode); > + struct gfs2_skip_data *data = opaque; > + > + if (data->skipped) > + return 1; > + inode->i_ino = (unsigned long)(data->no_addr); > + ip->i_no_addr = data->no_addr; > + return 0; > +} > + > +static struct inode *gfs2_iget_skip(struct super_block *sb, > + u64 no_addr) > +{ > + struct gfs2_skip_data data; > + unsigned long hash = (unsigned long)no_addr; > + > + data.no_addr = no_addr; > + data.skipped = 0; > + return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); > +} > + > /** > * GFS2 lookup code fills in vfs inode contents based on info obtained > * from directory entry inside gfs2_inode_lookup(). This has caused issues > @@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode) > * @sb: The super block > * @no_addr: The inode number > * @type: The type of the inode > + * @skip_freeing: set this not return an inode if it is currently being freed. > * > * Returns: A VFS inode, or an error > */ > @@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode) > struct inode *gfs2_inode_lookup(struct super_block *sb, > unsigned int type, > u64 no_addr, > - u64 no_formal_ino) > + u64 no_formal_ino, int skip_freeing) > { > - struct inode *inode = gfs2_iget(sb, no_addr); > - struct gfs2_inode *ip = GFS2_I(inode); > + struct inode *inode; > + struct gfs2_inode *ip; > struct gfs2_glock *io_gl; > int error; > > + if (skip_freeing) > + inode = gfs2_iget_skip(sb, no_addr); > + else > + inode = gfs2_iget(sb, no_addr); > + ip = GFS2_I(inode); > + > if (!inode) > return ERR_PTR(-ENOBUFS); > > @@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_h > > inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), > inum.no_addr, > - inum.no_formal_ino); > + inum.no_formal_ino, 0); > if (IS_ERR(inode)) > goto fail_gunlock2; > > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.h gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h > --- gfs2-2.6-nmw/fs/gfs2/inode.h 2007-09-14 14:00:39.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h 2007-09-18 13:03:27.000000000 -0500 > @@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const s > void gfs2_inode_attr_in(struct gfs2_inode *ip); > void gfs2_set_iop(struct inode *inode); > struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, > - u64 no_addr, u64 no_formal_ino); > + u64 no_addr, u64 no_formal_ino, > + int skip_freeing); > struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); > > int gfs2_inode_refresh(struct gfs2_inode *ip); > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_export.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c > --- gfs2-2.6-nmw/fs/gfs2/ops_export.c 2007-09-14 14:00:39.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c 2007-09-18 11:42:04.000000000 -0500 > @@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(st > > inode = gfs2_inode_lookup(sb, DT_UNKNOWN, > inum->no_addr, > - 0); > + 0, 0); > if (!inode) > goto fail; > if (IS_ERR(inode)) { > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_fstype.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c > --- gfs2-2.6-nmw/fs/gfs2/ops_fstype.c 2007-09-18 11:24:03.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c 2007-09-18 11:42:04.000000000 -0500 > @@ -230,7 +230,7 @@ fail: > static inline struct inode *gfs2_lookup_root(struct super_block *sb, > u64 no_addr) > { > - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); > + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); > } > > static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) > diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/rgrp.c gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c > --- gfs2-2.6-nmw/fs/gfs2/rgrp.c 2007-09-14 14:00:39.000000000 -0500 > +++ gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c 2007-09-18 11:42:04.000000000 -0500 > @@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(str > continue; > *last_unlinked = no_addr; > inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, > - no_addr, -1); > + no_addr, -1, 1); > if (!IS_ERR(inode)) > return inode; > }