From: Benjamin Marzinski <bmarzins@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] upstream fix for bz428751
Date: Fri, 14 Mar 2008 13:52:52 -0500 [thread overview]
Message-ID: <20080314185252.GC25043@ether.msp.redhat.com> (raw)
GFS2 wasn't invalidating its cache before it called into the lock manager
with a request that could potentially drop a lock. This was leaving a
window where the lock could be actually be held by another node, but the
file's page cache would still appear valid, causing coherency problems.
This patch moves the cache invalidation to before the lock manager call
when dropping a lock. It also adds the option to the lock_dlm lock
manager to not use conversion mode deadlock avoidance, which, on a
conversion from shared to exclusive, could internally drop the lock, and
then reacquire in. GFS2 now asks lock_dlm to not do this. Instead, GFS2
manually drops the lock and reacquires it.
This is essentially the same patch as I posted for RHEL. Since this
patch depends on other fixes which aren't complete, I only verified that
it compiles correctly. It needs testing once we have a fix for rhbz
#432057.
-Ben
-------------- next part --------------
diff -urpN a/fs/gfs2/glock.c b/fs/gfs2/glock.c
--- a/fs/gfs2/glock.c 2008-03-03 11:09:42.000000000 -0600
+++ b/fs/gfs2/glock.c 2008-03-14 06:54:23.000000000 -0500
@@ -764,7 +764,7 @@ static void state_change(struct gfs2_glo
static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
+ struct gfs2_holder *gh = gl->gl_req_gh;
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -772,8 +772,14 @@ static void drop_bh(struct gfs2_glock *g
state_change(gl, LM_ST_UNLOCKED);
- if (glops->go_inval)
- glops->go_inval(gl, DIO_METADATA);
+ if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) {
+ spin_lock(&gl->gl_spin);
+ gh->gh_error = 0;
+ spin_unlock(&gl->gl_spin);
+ gfs2_glock_xmote_th(gl, gl->gl_req_gh);
+ gfs2_glock_put(gl);
+ return;
+ }
spin_lock(&gl->gl_spin);
gfs2_demote_wake(gl);
@@ -794,7 +800,6 @@ static void xmote_bh(struct gfs2_glock *
struct gfs2_sbd *sdp = gl->gl_sbd;
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh = gl->gl_req_gh;
- int prev_state = gl->gl_state;
int op_done = 1;
if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
@@ -808,16 +813,6 @@ static void xmote_bh(struct gfs2_glock *
state_change(gl, ret & LM_OUT_ST_MASK);
- if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
- if (glops->go_inval)
- glops->go_inval(gl, DIO_METADATA);
- } else if (gl->gl_state == LM_ST_DEFERRED) {
- /* We might not want to do this here.
- Look at moving to the inode glops. */
- if (glops->go_inval)
- glops->go_inval(gl, 0);
- }
-
/* Deal with each possible exit condition */
if (!gh) {
@@ -837,6 +832,14 @@ static void xmote_bh(struct gfs2_glock *
}
} else {
spin_lock(&gl->gl_spin);
+ if (ret & LM_OUT_CONV_DEADLK) {
+ gh->gh_error = 0;
+ set_bit(GLF_CONV_DEADLK, &gl->gl_flags);
+ spin_unlock(&gl->gl_spin);
+ gfs2_glock_drop_th(gl);
+ gfs2_glock_put(gl);
+ return;
+ }
list_del_init(&gh->gh_list);
gh->gh_error = -EIO;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
@@ -910,6 +913,8 @@ static void gfs2_glock_xmote_th(struct g
if (glops->go_xmote_th)
glops->go_xmote_th(gl);
+ if (state == LM_ST_DEFERRED && glops->go_inval)
+ glops->go_inval(gl, DIO_METADATA);
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -952,6 +957,8 @@ static void gfs2_glock_drop_th(struct gf
if (glops->go_xmote_th)
glops->go_xmote_th(gl);
+ if (glops->go_inval)
+ glops->go_inval(gl, DIO_METADATA);
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
diff -urpN a/fs/gfs2/incore.h b/fs/gfs2/incore.h
--- a/fs/gfs2/incore.h 2008-03-03 11:09:42.000000000 -0600
+++ b/fs/gfs2/incore.h 2008-03-14 06:01:39.000000000 -0500
@@ -167,6 +167,7 @@ enum {
GLF_DEMOTE_IN_PROGRESS = 6,
GLF_LFLUSH = 7,
GLF_WAITERS2 = 8,
+ GLF_CONV_DEADLK = 9,
};
struct gfs2_glock {
diff -urpN a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
--- a/fs/gfs2/locking/dlm/lock.c 2008-03-03 11:09:42.000000000 -0600
+++ b/fs/gfs2/locking/dlm/lock.c 2008-03-14 05:18:11.000000000 -0500
@@ -137,7 +137,8 @@ static inline unsigned int make_flags(st
/* Conversion deadlock avoidance by DLM */
- if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+ if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) &&
+ !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
!(lkf & DLM_LKF_NOQUEUE) &&
cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
lkf |= DLM_LKF_CONVDEADLK;
diff -urpN a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
--- a/fs/gfs2/locking/dlm/thread.c 2008-03-03 11:09:42.000000000 -0600
+++ b/fs/gfs2/locking/dlm/thread.c 2008-03-14 05:18:11.000000000 -0500
@@ -135,7 +135,15 @@ static void process_complete(struct gdlm
lp->lksb.sb_status, lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->flags);
- return;
+ if (lp->lksb.sb_status == -EDEADLOCK &&
+ lp->ls->fsflags & LM_MFLAG_CONV_NODROP) {
+ lp->req = lp->cur;
+ acb.lc_ret |= LM_OUT_CONV_DEADLK;
+ if (lp->cur == DLM_LOCK_IV)
+ lp->lksb.sb_lkid = 0;
+ goto out;
+ } else
+ return;
}
/*
diff -urpN a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
--- a/fs/gfs2/ops_fstype.c 2008-03-11 09:26:42.000000000 -0500
+++ b/fs/gfs2/ops_fstype.c 2008-03-14 06:03:28.000000000 -0500
@@ -723,7 +723,7 @@ static int gfs2_lm_mount(struct gfs2_sbd
{
char *proto = sdp->sd_proto_name;
char *table = sdp->sd_table_name;
- int flags = 0;
+ int flags = LM_MFLAG_CONV_NODROP;
int error;
if (sdp->sd_args.ar_spectator)
diff -urpN a/include/linux/lm_interface.h b/include/linux/lm_interface.h
--- a/include/linux/lm_interface.h 2008-03-03 11:09:49.000000000 -0600
+++ b/include/linux/lm_interface.h 2008-03-14 06:10:07.000000000 -0500
@@ -21,9 +21,15 @@ typedef void (*lm_callback_t) (void *ptr
* modify the filesystem. The lock module shouldn't assign a journal to the FS
* mount. It shouldn't send recovery callbacks to the FS mount. If the node
* dies or withdraws, all locks can be wiped immediately.
+ *
+ * LM_MFLAG_CONV_NODROP
+ * Do not allow the dlm to internally resolve conversion deadlocks by demoting
+ * the lock to unlocked and then reacquiring it in the requested mode. Instead,
+ * it should cancel the request and return LM_OUT_CONV_DEADLK.
*/
#define LM_MFLAG_SPECTATOR 0x00000001
+#define LM_MFLAG_CONV_NODROP 0x00000002
/*
* lm_lockstruct flags
@@ -110,6 +116,9 @@ typedef void (*lm_callback_t) (void *ptr
*
* LM_OUT_ASYNC
* The result of the request will be returned in an LM_CB_ASYNC callback.
+ *
+ * LM_OUT_CONV_DEADLK
+ * The lock request was canceled do to a conversion deadlock.
*/
#define LM_OUT_ST_MASK 0x00000003
@@ -117,6 +126,7 @@ typedef void (*lm_callback_t) (void *ptr
#define LM_OUT_CANCELED 0x00000008
#define LM_OUT_ASYNC 0x00000080
#define LM_OUT_ERROR 0x00000100
+#define LM_OUT_CONV_DEADLK 0x00000200
/*
* lm_callback_t types
reply other threads:[~2008-03-14 18:52 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080314185252.GC25043@ether.msp.redhat.com \
--to=bmarzins@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).