cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: swhiteho@redhat.com <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 34/48] [GFS2] Fix a page lock / glock deadlock
Date: Thu, 17 Apr 2008 09:39:10 +0100	[thread overview]
Message-ID: <12084216391215-git-send-email-swhiteho@redhat.com> (raw)
In-Reply-To: <12084216372363-git-send-email-swhiteho@redhat.com>

From: Steven Whitehouse <swhiteho@redhat.com>

We've previously been using a "try lock" in readpage on the basis that
it would prevent deadlocks due to the inverted lock ordering (our normal
lock ordering is glock first and then page lock). Unfortunately tests
have shown that this isn't enough. If the glock has a demote request
queued such that run_queue() in the glock code tries to do a demote when
its called under readpage then it will try and write out all the dirty
pages which requires locking them. This then deadlocks with the page
locked by readpage.

The solution is to always require two calls into readpage. The first
unlocks the page, gets the glock and returns AOP_TRUNCATED_PAGE, the
second does the actual readpage and unlocks the glock & page as
required.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index ace5770..cdad3e6 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -32,24 +32,23 @@
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
 
-static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
+static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh;
-	int locked = 0;
 	struct pid *pid;
 
 	/* Look in glock's list of holders for one with current task as owner */
 	spin_lock(&gl->gl_spin);
 	pid = task_pid(current);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (gh->gh_owner_pid == pid) {
-			locked = 1;
-			break;
-		}
+		if (gh->gh_owner_pid == pid)
+			goto out;
 	}
+	gh = NULL;
+out:
 	spin_unlock(&gl->gl_spin);
 
-	return locked;
+	return gh;
 }
 
 static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5f50dd5..810ff02 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -493,7 +493,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 		return dir;
 	}
 
-	if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
 		if (error)
 			return ERR_PTR(error);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 7523999..fbb4a6a 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -508,23 +508,26 @@ static int __gfs2_readpage(void *file, struct page *page)
 static int gfs2_readpage(struct file *file, struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-	struct gfs2_holder gh;
+	struct gfs2_holder *gh;
 	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-	error = gfs2_glock_nq_atime(&gh);
-	if (unlikely(error)) {
+	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
+	if (!gh) {
+		gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS);
+		if (!gh)
+			return -ENOBUFS;
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh);
 		unlock_page(page);
-		goto out;
+		error = gfs2_glock_nq_atime(gh);
+		if (likely(error != 0))
+			goto out;
+		return AOP_TRUNCATED_PAGE;
 	}
 	error = __gfs2_readpage(file, page);
-	gfs2_glock_dq(&gh);
+	gfs2_glock_dq(gh);
 out:
-	gfs2_holder_uninit(&gh);
-	if (error == GLR_TRYFAILED) {
-		yield();
-		return AOP_TRUNCATED_PAGE;
-	}
+	gfs2_holder_uninit(gh);
+	kfree(gh);
 	return error;
 }
 
@@ -826,7 +829,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	unsigned int to = from + len;
 	int ret;
 
-	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0);
+	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(ret)) {
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 793e334..4a5e676 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct gfs2_holder d_gh;
 	struct gfs2_inode *ip = NULL;
 	int error;
-	int had_lock=0;
+	int had_lock = 0;
 
 	if (inode) {
 		if (is_bad_inode(inode))
@@ -54,7 +54,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	if (sdp->sd_args.ar_localcaching)
 		goto valid;
 
-	had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
+	had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
 	if (!had_lock) {
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
 		if (error)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 301c945..af7097a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -898,7 +898,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	int error;
 	int unlock = 0;
 
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
 			return error;
@@ -1065,7 +1065,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	int error;
 	int unlock = 0;
 
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
 		if (error)
 			return error;
-- 
1.5.1.2



  reply	other threads:[~2008-04-17  8:39 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-17  8:37 [Cluster-devel] [GFS2] Pre-pull patch posting swhiteho
2008-04-17  8:38 ` [Cluster-devel] [PATCH 01/48] [GFS2] Speed up gfs2_write_alloc_required, deprecate gfs2_extent_map swhiteho
2008-04-17  8:38   ` [Cluster-devel] [PATCH 02/48] [GFS2] Streamline indirect pointer tree height calculation swhiteho
2008-04-17  8:38     ` [Cluster-devel] [PATCH 03/48] [GFS2] Get rid of unneeded parameter in gfs2_rlist_alloc swhiteho
2008-04-17  8:38       ` [Cluster-devel] [PATCH 04/48] [GFS2] Fix debug inode printing swhiteho
2008-04-17  8:38         ` [Cluster-devel] [PATCH 05/48] [GFS2] Only do lo_incore_commit once swhiteho
2008-04-17  8:38           ` [Cluster-devel] [PATCH 06/48] [GFS2] Misc fixups swhiteho
2008-04-17  8:38             ` [Cluster-devel] [PATCH 07/48] [GFS2] Only wake the reclaim daemon if we need to swhiteho
2008-04-17  8:38               ` [Cluster-devel] [PATCH 08/48] [GFS2] make gfs2_glock_hold() static swhiteho
2008-04-17  8:38                 ` [Cluster-devel] [PATCH 09/48] [GFS2] Plug an unlikely leak swhiteho
2008-04-17  8:38                   ` [Cluster-devel] [PATCH 10/48] [GFS2] Allocate gfs2_rgrpd from slab memory swhiteho
2008-04-17  8:38                     ` [Cluster-devel] [PATCH 11/48] [GFS2] Combine rg_flags and rd_flags swhiteho
2008-04-17  8:38                       ` [Cluster-devel] [PATCH 12/48] [GFS2] Get rid of gl_waiters2 swhiteho
2008-04-17  8:38                         ` [Cluster-devel] [PATCH 13/48] [GFS2] Move part of gfs2_block_map into a separate function swhiteho
2008-04-17  8:38                           ` [Cluster-devel] [PATCH 14/48] [GFS2] Introduce array of buffers to struct metapath swhiteho
2008-04-17  8:38                             ` [Cluster-devel] [PATCH 15/48] [GFS2] Add consts to various bits of rgrp.c swhiteho
2008-04-17  8:38                               ` [Cluster-devel] [PATCH 16/48] [GFS2] Eliminate gl_req_bh swhiteho
2008-04-17  8:38                                 ` [Cluster-devel] [PATCH 17/48] [GFS2] Remove lm.[ch] and distribute content swhiteho
2008-04-17  8:38                                   ` [Cluster-devel] [PATCH 18/48] [GFS2] Remove rgrp and glock version numbers swhiteho
2008-04-17  8:38                                     ` [Cluster-devel] [PATCH 19/48] [GFS2] Shrink & rename di_depth swhiteho
2008-04-17  8:38                                       ` [Cluster-devel] [PATCH 20/48] [GFS2] Remove unused counters swhiteho
2008-04-17  8:38                                         ` [Cluster-devel] [PATCH 21/48] [GFS2] Reduce inode size by merging fields swhiteho
2008-04-17  8:38                                           ` [Cluster-devel] [PATCH 22/48] [GFS2] Merge the rd_last_alloc_meta and rd_last_alloc_data fields swhiteho
2008-04-17  8:38                                             ` [Cluster-devel] [PATCH 23/48] [GFS2] Update gfs2_trans_add_unrevoke to accept extents swhiteho
2008-04-17  8:39                                               ` [Cluster-devel] [PATCH 24/48] [GFS2] Merge gfs2_alloc_meta and gfs2_alloc_data swhiteho
2008-04-17  8:39                                                 ` [Cluster-devel] [PATCH 25/48] [GFS2] Add extent allocation to block allocator swhiteho
2008-04-17  8:39                                                   ` [Cluster-devel] [PATCH 26/48] [GFS2] The case of the missing asterisk swhiteho
2008-04-17  8:39                                                     ` [Cluster-devel] [PATCH 27/48] [GFS2] Add a function to interate over an extent swhiteho
2008-04-17  8:39                                                       ` [Cluster-devel] [PATCH 28/48] [GFS2] Eliminate (almost) duplicate field from gfs2_inode swhiteho
2008-04-17  8:39                                                         ` [Cluster-devel] [PATCH 29/48] [GFS2] Get inode buffer only once per block map call swhiteho
2008-04-17  8:39                                                           ` [Cluster-devel] [PATCH 30/48] [GFS2] Fix bug where we called drop_bh incorrectly swhiteho
2008-04-17  8:39                                                             ` [Cluster-devel] [PATCH 31/48] [GFS2] be*_add_cpu conversion swhiteho
2008-04-17  8:39                                                               ` [Cluster-devel] [PATCH 32/48] [GFS2] gfs2/ops_file.c should #include "ops_inode.h" swhiteho
2008-04-17  8:39                                                                 ` [Cluster-devel] [PATCH 33/48] [GFS2] proper extern for gfs2/locking/dlm/mount.c:gdlm_ops swhiteho
2008-04-17  8:39                                                                   ` swhiteho [this message]
2008-04-17  8:39                                                                     ` [Cluster-devel] [PATCH 35/48] [GFS2] Allow bmap to allocate extents swhiteho
2008-04-17  8:39                                                                       ` [Cluster-devel] [PATCH 36/48] [GFS2] fix file_system_type leak on gfs2meta mount swhiteho
2008-04-17  8:39                                                                         ` [Cluster-devel] [PATCH 37/48] [GFS2] remove gfs2_dev_iops swhiteho
2008-04-17  8:39                                                                           ` [Cluster-devel] [PATCH 38/48] [GFS2] re-support special inode swhiteho
2008-04-17  8:39                                                                             ` [Cluster-devel] [PATCH 39/48] [GFS2] Need to ensure that sector_t is 64bits for GFS2 swhiteho
2008-04-17  8:39                                                                               ` [Cluster-devel] [PATCH 40/48] [GFS2] possible null pointer dereference fixup swhiteho
2008-04-17  8:39                                                                                 ` [Cluster-devel] [PATCH 41/48] [GFS2] gfs2_adjust_quota has broken unstuffing code swhiteho
2008-04-17  8:39                                                                                   ` [Cluster-devel] [PATCH 42/48] [GFS2] Remove drop of module ref where not needed swhiteho
2008-04-17  8:39                                                                                     ` [Cluster-devel] [PATCH 43/48] [GFS2] Streamline quota lock/check for no-quota case swhiteho
2008-04-17  8:39                                                                                       ` [Cluster-devel] [PATCH 44/48] [GFS2] Faster gfs2_bitfit algorithm swhiteho
2008-04-17  8:39                                                                                         ` [Cluster-devel] [PATCH 45/48] [GFS2] fs/gfs2/recovery.c: suppress warnings swhiteho
2008-04-17  8:39                                                                                           ` [Cluster-devel] [PATCH 46/48] [GFS2] Invalidate cache at correct point swhiteho
2008-04-17  8:39                                                                                             ` [Cluster-devel] [PATCH 47/48] [GFS2] test for IS_ERR rather than 0 swhiteho
2008-04-17  8:39                                                                                               ` [Cluster-devel] [PATCH 48/48] [GFS2] fix GFP_KERNEL misuses swhiteho
2008-04-17 11:58                                                                         ` [Cluster-devel] Re: [PATCH 36/48] [GFS2] fix file_system_type leak on gfs2meta mount Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=12084216391215-git-send-email-swhiteho@redhat.com \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).