From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] [GFS2] bz288581 alternate gfs2_iget to avoid looking up inodes being freed
Date: Wed, 19 Sep 2007 13:22:50 +0100 [thread overview]
Message-ID: <1190204570.1068.86.camel@quoit> (raw)
In-Reply-To: <20070918183317.GA1533@ether.msp.redhat.com>
Hi,
Now in the -nmw git tree. Thanks,
Steve.
On Tue, 2007-09-18 at 13:33 -0500, Benjamin Marzinski wrote:
> There is a possible deadlock between two processes on the same node, where one
> process is deleting an inode, and another process is looking for allocated but
> unused inodes to delete in order to create more space.
>
> process A does an iput() on inode X, and it's i_count drops to 0. This causes
> iput_final() to be called, which puts an inode into state I_FREEING at
> generic_delete_inode(). There no point between when iput_final() is called, and
> when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is
> set, no other process on that node can successfully look up that inode until
> the delete finishes.
>
> process B locks the the resource group for the same inode in get_local_rgrp(),
> which is called by gfs2_inplace_reserve_i()
>
> process A tries to lock the resource group for the inode in
> gfs2_dinode_dealloc(), but it's already locked by process B
>
> process B waits in find_inode for the inode to have the I_FREEING state cleared.
>
> Deadlock.
>
> This patch solves the problem by adding an alternative to gfs2_iget(),
> gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING
> state.o The alternate test function is just like the original one, except that
> it fails if the inode is being freed, and sets a skipped flag. The alternate
> set function is just like the original, except that it fails if the skipped
> flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of
> gfs2_iget().
>
> Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
> plain text document attachment (upstream_288581.patch)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/dir.c gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c
> --- gfs2-2.6-nmw/fs/gfs2/dir.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/dir.c 2007-09-18 11:42:04.000000000 -0500
> @@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct ino
> inode = gfs2_inode_lookup(dir->i_sb,
> be16_to_cpu(dent->de_type),
> be64_to_cpu(dent->de_inum.no_addr),
> - be64_to_cpu(dent->de_inum.no_formal_ino));
> + be64_to_cpu(dent->de_inum.no_formal_ino), 0);
> brelse(bh);
> return inode;
> }
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.c gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c
> --- gfs2-2.6-nmw/fs/gfs2/inode.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.c 2007-09-18 11:42:04.000000000 -0500
> @@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct su
> return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
> }
>
> +struct gfs2_skip_data {
> + u64 no_addr;
> + int skipped;
> +};
> +
> +static int iget_skip_test(struct inode *inode, void *opaque)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_skip_data *data = opaque;
> +
> + if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
> + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
> + data->skipped = 1;
> + return 0;
> + }
> + return 1;
> + }
> + return 0;
> +}
> +
> +static int iget_skip_set(struct inode *inode, void *opaque)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_skip_data *data = opaque;
> +
> + if (data->skipped)
> + return 1;
> + inode->i_ino = (unsigned long)(data->no_addr);
> + ip->i_no_addr = data->no_addr;
> + return 0;
> +}
> +
> +static struct inode *gfs2_iget_skip(struct super_block *sb,
> + u64 no_addr)
> +{
> + struct gfs2_skip_data data;
> + unsigned long hash = (unsigned long)no_addr;
> +
> + data.no_addr = no_addr;
> + data.skipped = 0;
> + return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
> +}
> +
> /**
> * GFS2 lookup code fills in vfs inode contents based on info obtained
> * from directory entry inside gfs2_inode_lookup(). This has caused issues
> @@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
> * @sb: The super block
> * @no_addr: The inode number
> * @type: The type of the inode
> + * @skip_freeing: set this not return an inode if it is currently being freed.
> *
> * Returns: A VFS inode, or an error
> */
> @@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
> struct inode *gfs2_inode_lookup(struct super_block *sb,
> unsigned int type,
> u64 no_addr,
> - u64 no_formal_ino)
> + u64 no_formal_ino, int skip_freeing)
> {
> - struct inode *inode = gfs2_iget(sb, no_addr);
> - struct gfs2_inode *ip = GFS2_I(inode);
> + struct inode *inode;
> + struct gfs2_inode *ip;
> struct gfs2_glock *io_gl;
> int error;
>
> + if (skip_freeing)
> + inode = gfs2_iget_skip(sb, no_addr);
> + else
> + inode = gfs2_iget(sb, no_addr);
> + ip = GFS2_I(inode);
> +
> if (!inode)
> return ERR_PTR(-ENOBUFS);
>
> @@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_h
>
> inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
> inum.no_addr,
> - inum.no_formal_ino);
> + inum.no_formal_ino, 0);
> if (IS_ERR(inode))
> goto fail_gunlock2;
>
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/inode.h gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h
> --- gfs2-2.6-nmw/fs/gfs2/inode.h 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/inode.h 2007-09-18 13:03:27.000000000 -0500
> @@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const s
> void gfs2_inode_attr_in(struct gfs2_inode *ip);
> void gfs2_set_iop(struct inode *inode);
> struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
> - u64 no_addr, u64 no_formal_ino);
> + u64 no_addr, u64 no_formal_ino,
> + int skip_freeing);
> struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
>
> int gfs2_inode_refresh(struct gfs2_inode *ip);
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_export.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_export.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_export.c 2007-09-18 11:42:04.000000000 -0500
> @@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(st
>
> inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
> inum->no_addr,
> - 0);
> + 0, 0);
> if (!inode)
> goto fail;
> if (IS_ERR(inode)) {
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/ops_fstype.c gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c
> --- gfs2-2.6-nmw/fs/gfs2/ops_fstype.c 2007-09-18 11:24:03.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/ops_fstype.c 2007-09-18 11:42:04.000000000 -0500
> @@ -230,7 +230,7 @@ fail:
> static inline struct inode *gfs2_lookup_root(struct super_block *sb,
> u64 no_addr)
> {
> - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
> + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
> }
>
> static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
> diff -urpN --exclude-from=gfs2-2.6-nmw/Documentation/dontdiff gfs2-2.6-nmw/fs/gfs2/rgrp.c gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c
> --- gfs2-2.6-nmw/fs/gfs2/rgrp.c 2007-09-14 14:00:39.000000000 -0500
> +++ gfs2-2.6-nmw-bz288581/fs/gfs2/rgrp.c 2007-09-18 11:42:04.000000000 -0500
> @@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(str
> continue;
> *last_unlinked = no_addr;
> inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
> - no_addr, -1);
> + no_addr, -1, 1);
> if (!IS_ERR(inode))
> return inode;
> }
prev parent reply other threads:[~2007-09-19 12:22 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-09-18 18:33 [Cluster-devel] [PATCH] [GFS2] bz288581 alternate gfs2_iget to avoid looking up inodes being freed Benjamin Marzinski
2007-09-19 12:22 ` Steven Whitehouse [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1190204570.1068.86.camel@quoit \
--to=swhiteho@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.