cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: swhiteho@redhat.com <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 47/51] [GFS2] Alternate gfs2_iget to avoid looking up inodes being freed
Date: Thu,  4 Oct 2007 09:49:40 +0100	[thread overview]
Message-ID: <11914878753861-git-send-email-swhiteho@redhat.com> (raw)
In-Reply-To: <119148787317-git-send-email-swhiteho@redhat.com>

From: Benjamin Marzinski <bmarzins@redhat.com>

There is a possible deadlock between two processes on the same node, where one
process is deleting an inode, and another process is looking for allocated but
unused inodes to delete in order to create more space.

process A does an iput() on inode X, and it's i_count drops to 0. This causes
iput_final() to be called, which puts an inode into state I_FREEING at
generic_delete_inode(). There no point between when iput_final() is called, and
when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is
set, no other process on that node can successfully look up that inode until
the delete finishes.

process B locks the the resource group for the same inode in get_local_rgrp(),
which is called by gfs2_inplace_reserve_i()

process A tries to lock the resource group for the inode in
gfs2_dinode_dealloc(), but it's already locked by process B

process B waits in find_inode for the inode to have the I_FREEING state cleared.

Deadlock.

This patch solves the problem by adding an alternative to gfs2_iget(),
gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING
state.o The alternate test function is just like the original one, except that
it fails if the inode is being freed, and sets a skipped flag. The alternate
set function is just like the original, except that it fails if the skipped
flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of
gfs2_iget().

Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 08c6dd0..9949bb7 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
 		inode = gfs2_inode_lookup(dir->i_sb, 
 				be16_to_cpu(dent->de_type),
 				be64_to_cpu(dent->de_inum.no_addr),
-				be64_to_cpu(dent->de_inum.no_formal_ino));
+				be64_to_cpu(dent->de_inum.no_formal_ino), 0);
 		brelse(bh);
 		return inode;
 	}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 013f00b..5f6dc32 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
 
+struct gfs2_skip_data {
+	u64	no_addr;
+	int	skipped;
+};
+
+static int iget_skip_test(struct inode *inode, void *opaque)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_skip_data *data = opaque;
+
+	if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
+			data->skipped = 1;
+			return 0;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int iget_skip_set(struct inode *inode, void *opaque)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_skip_data *data = opaque;
+
+	if (data->skipped)
+		return 1;
+	inode->i_ino = (unsigned long)(data->no_addr);
+	ip->i_no_addr = data->no_addr;
+	return 0;
+}
+
+static struct inode *gfs2_iget_skip(struct super_block *sb,
+				    u64 no_addr)
+{
+	struct gfs2_skip_data data;
+	unsigned long hash = (unsigned long)no_addr;
+
+	data.no_addr = no_addr;
+	data.skipped = 0;
+	return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
+}
+
 /**
  * GFS2 lookup code fills in vfs inode contents based on info obtained
  * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
  * @sb: The super block
  * @no_addr: The inode number
  * @type: The type of the inode
+ * @skip_freeing: set this not return an inode if it is currently being freed.
  *
  * Returns: A VFS inode, or an error
  */
@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
 struct inode *gfs2_inode_lookup(struct super_block *sb, 
 				unsigned int type,
 				u64 no_addr,
-				u64 no_formal_ino)
+				u64 no_formal_ino, int skip_freeing)
 {
-	struct inode *inode = gfs2_iget(sb, no_addr);
-	struct gfs2_inode *ip = GFS2_I(inode);
+	struct inode *inode;
+	struct gfs2_inode *ip;
 	struct gfs2_glock *io_gl;
 	int error;
 
+	if (skip_freeing)
+		inode = gfs2_iget_skip(sb, no_addr);
+	else
+		inode = gfs2_iget(sb, no_addr);
+	ip = GFS2_I(inode);
+
 	if (!inode)
 		return ERR_PTR(-ENOBUFS);
 
@@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 
 	inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
 					inum.no_addr,
-					inum.no_formal_ino);
+					inum.no_formal_ino, 0);
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 4517ac8..351ac87 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 void gfs2_set_iop(struct inode *inode);
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				u64 no_addr, u64 no_formal_ino);
+				u64 no_addr, u64 no_formal_ino,
+				int skip_freeing);
 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b8312ed..e2d1347 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 
 	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
 					inum->no_addr,
-					0);
+					0, 0);
 	if (!inode)
 		goto fail;
 	if (IS_ERR(inode)) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ac9afa..17de58e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -230,7 +230,7 @@ fail:
 static inline struct inode *gfs2_lookup_root(struct super_block *sb,
 					     u64 no_addr)
 {
-	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
 }
 
 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2d7f7ea..708c287 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
 			continue;
 		*last_unlinked = no_addr;
 		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
-					  no_addr, -1);
+					  no_addr, -1, 1);
 		if (!IS_ERR(inode))
 			return inode;
 	}
-- 
1.5.1.2



  reply	other threads:[~2007-10-04  8:49 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-04  8:48 [Cluster-devel] [GFS2/DLM] Pre-pull patch posting swhiteho
2007-10-04  8:48 ` [Cluster-devel] [PATCH 01/51] [GFS2] Fix two races relating to glock callbacks swhiteho
2007-10-04  8:48   ` [Cluster-devel] [PATCH 02/51] [GFS2] Fix calculation of demote state swhiteho
2007-10-04  8:48     ` [Cluster-devel] [PATCH 03/51] [GFS2] Clean up duplicate includes in fs/gfs2/ swhiteho
2007-10-04  8:48       ` [Cluster-devel] [PATCH 04/51] [GFS2] GFS2 not checking pointer on create when running under nfsd swhiteho
2007-10-04  8:48         ` [Cluster-devel] [PATCH 05/51] [GFS2] Fix an oops in glock dumping swhiteho
2007-10-04  8:48           ` [Cluster-devel] [PATCH 06/51] [GFS2] Move some code inside the log lock swhiteho
2007-10-04  8:49             ` [Cluster-devel] [PATCH 07/51] [GFS2] Revert part of earlier log.c changes swhiteho
2007-10-04  8:49               ` [Cluster-devel] [PATCH 08/51] [GFS2] Prevent infinite loop in try_rgrp_unlink() swhiteho
2007-10-04  8:49                 ` [Cluster-devel] [PATCH 09/51] [GFS2] use an temp variable to reduce a spin_unlock swhiteho
2007-10-04  8:49                   ` [Cluster-devel] [PATCH 10/51] [GFS2] Detach buf data during in-place writeback swhiteho
2007-10-04  8:49                     ` [Cluster-devel] [PATCH 11/51] [GFS2] mark struct *_operations const swhiteho
2007-10-04  8:49                       ` [Cluster-devel] [PATCH 12/51] [GFS2] use the declaration of gfs2_dops in the header file instead swhiteho
2007-10-04  8:49                         ` [Cluster-devel] [PATCH 13/51] [GFS2] Reduce number of gfs2_scand processes to one swhiteho
2007-10-04  8:49                           ` [Cluster-devel] [PATCH 14/51] [GFS2] invalid metadata block - REVISED swhiteho
2007-10-04  8:49                             ` [Cluster-devel] [PATCH 15/51] [GFS2] Ensure journal file cache is flushed after recovery swhiteho
2007-10-04  8:49                               ` [Cluster-devel] [PATCH 16/51] [GFS2] use list_for_each_entry instead swhiteho
2007-10-04  8:49                                 ` [Cluster-devel] [PATCH 17/51] [GFS2] unneeded typecast swhiteho
2007-10-04  8:49                                   ` [Cluster-devel] [PATCH 18/51] [GFS2] better code for translating characters swhiteho
2007-10-04  8:49                                     ` [Cluster-devel] [PATCH 19/51] [GFS2] Force unstuff of hidden quota inode swhiteho
2007-10-04  8:49                                       ` [Cluster-devel] [PATCH 20/51] [GFS2] fixed a NULL pointer assignment BUG swhiteho
2007-10-04  8:49                                         ` [Cluster-devel] [PATCH 21/51] [GFS2] Fix quota do_list operation hang swhiteho
2007-10-04  8:49                                           ` [Cluster-devel] [PATCH 22/51] [GFS2] Clean up invalidatepage/releasepage swhiteho
2007-10-04  8:49                                             ` [Cluster-devel] [PATCH 23/51] [GFS2] Add a missing gfs2_trans_add_bh() swhiteho
2007-10-04  8:49                                               ` [Cluster-devel] [PATCH 24/51] [GFS2] Add NULL entry to token table swhiteho
2007-10-04  8:49                                                 ` [Cluster-devel] [PATCH 25/51] [GFS2] Reduce truncate IO traffic swhiteho
2007-10-04  8:49                                                   ` [Cluster-devel] [PATCH 26/51] [DLM] Fix lowcomms socket closing swhiteho
2007-10-04  8:49                                                     ` [Cluster-devel] [PATCH 27/51] [GFS2] Wendy's dump lockname in hex & fix glock dump swhiteho
2007-10-04  8:49                                                       ` [Cluster-devel] [PATCH 28/51] [GFS2] Patch to protect sd_log_num_jdata swhiteho
2007-10-04  8:49                                                         ` [Cluster-devel] [PATCH 29/51] [GFS2] panic after can't parse mount arguments swhiteho
2007-10-04  8:49                                                           ` [Cluster-devel] [PATCH 30/51] [GFS2] delay glock demote for a minimum hold time swhiteho
2007-10-04  8:49                                                             ` [Cluster-devel] [PATCH 31/51] [GFS2] fix inode meta data corruption swhiteho
2007-10-04  8:49                                                               ` [Cluster-devel] [PATCH 32/51] [GFS2] Correct lock ordering in unlink swhiteho
2007-10-04  8:49                                                                 ` [Cluster-devel] [PATCH 33/51] [GFS2] Introduce gfs2_remove_from_ail swhiteho
2007-10-04  8:49                                                                   ` [Cluster-devel] [PATCH 34/51] [GFS2] Don't mark jdata dirty in gfs2_unstuffer_page() swhiteho
2007-10-04  8:49                                                                     ` [Cluster-devel] [PATCH 35/51] [GFS2] Move pin/unpin into lops.c, clean up locking swhiteho
2007-10-04  8:49                                                                       ` [Cluster-devel] [PATCH 36/51] [GFS2] Clean up ordered write code swhiteho
2007-10-04  8:49                                                                         ` [Cluster-devel] [PATCH 37/51] [GFS2] Fix ordering of dirty/journal for ordered buffer unstuffing swhiteho
2007-10-04  8:49                                                                           ` [Cluster-devel] [PATCH 38/51] [GFS2] Replace revoke structure with bufdata structure swhiteho
2007-10-04  8:49                                                                             ` [Cluster-devel] [PATCH 39/51] [GFS2] Use slab operations for all gfs2_bufdata allocations swhiteho
2007-10-04  8:49                                                                               ` [Cluster-devel] [PATCH 40/51] [GFS2] Clean up gfs2_trans_add_revoke() swhiteho
2007-10-04  8:49                                                                                 ` [Cluster-devel] [PATCH 41/51] [GFS2] flocks from same process trip kernel BUG at fs/gfs2/glock.c:1118! swhiteho
2007-10-04  8:49                                                                                   ` [Cluster-devel] [PATCH 42/51] [GFS2] Move inode deletion out of blocking_cb swhiteho
2007-10-04  8:49                                                                                     ` [Cluster-devel] [PATCH 43/51] [DLM] Make dlm_sendd cond_resched more swhiteho
2007-10-04  8:49                                                                                       ` [Cluster-devel] [PATCH 44/51] [GFS2] GFS2: chmod hung - fix race in thread creation swhiteho
2007-10-04  8:49                                                                                         ` [Cluster-devel] [PATCH 45/51] [GFS2] Clean up journaled data writing swhiteho
2007-10-04  8:49                                                                                           ` [Cluster-devel] [PATCH 46/51] [GFS2] Data corruption fix swhiteho
2007-10-04  8:49                                                                                             ` swhiteho [this message]
2007-10-04  8:49                                                                                               ` [Cluster-devel] [PATCH 48/51] [GFS2] Don't try to remove buffers that don't exist swhiteho
2007-10-04  8:49                                                                                                 ` [Cluster-devel] [PATCH 49/51] [GFS2] Get superblock a different way swhiteho
2007-10-04  8:49                                                                                                   ` [Cluster-devel] [PATCH 50/51] [DLM] don't overwrite castparam if it's NULL swhiteho
2007-10-04  8:49                                                                                                     ` [Cluster-devel] [PATCH 51/51] [DLM] block dlm_recv in recovery transition swhiteho
2007-10-12  7:47 ` [Cluster-devel] [GFS2/DLM] Pull request Steven Whitehouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11914878753861-git-send-email-swhiteho@redhat.com \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).