cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: David Teigland <teigland@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH] dlm: timeout fixes
Date: Tue, 29 May 2007 08:44:23 -0500	[thread overview]
Message-ID: <20070529134423.GA31702@redhat.com> (raw)

Various fixes related to the new timeout feature:
- add_timeout() missed setting TIMEWARN flag on lkb's when the
  TIMEOUT flag was already set
- clear_proc_locks should remove a dead process's locks from the
  timeout list
- the end-of-life calculation for user locks needs to consider that
  ETIMEDOUT is equivalent to -DLM_ECANCEL
- make initial default timewarn_cs config value visible in configfs
- change bit position of TIMEOUT_CANCEL flag so it's not copied to
  a remote master node
- set timestamp on remote lkb's so a lock dump will display the time
  they've been waiting

Signed-off-by: David Teigland <teigland@redhat.com>

Index: linux-quilt/fs/dlm/lock.c
===================================================================
--- linux-quilt.orig/fs/dlm/lock.c	2007-05-25 14:29:56.000000000 -0500
+++ linux-quilt/fs/dlm/lock.c	2007-05-25 14:40:59.000000000 -0500
@@ -1010,17 +1010,18 @@
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
 
-	if (is_master_copy(lkb))
+	if (is_master_copy(lkb)) {
+		lkb->lkb_timestamp = jiffies;
 		return;
-
-	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
-		goto add_it;
+	}
 
 	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
 	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
 		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
 		goto add_it;
 	}
+	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+		goto add_it;
 	return;
 
  add_it:
@@ -3510,8 +3511,7 @@
 	case -DLM_ECANCEL:
 		receive_flags_reply(lkb, ms);
 		revert_lock_pc(r, lkb);
-		if (ms->m_result)
-			queue_cast(r, lkb, -DLM_ECANCEL);
+		queue_cast(r, lkb, -DLM_ECANCEL);
 		break;
 	case 0:
 		break;
@@ -4534,6 +4534,7 @@
 		lkb = del_proc_lock(ls, proc);
 		if (!lkb)
 			break;
+		del_timeout(lkb);
 		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
 			orphan_proc_lock(ls, lkb);
 		else
Index: linux-quilt/fs/dlm/user.c
===================================================================
--- linux-quilt.orig/fs/dlm/user.c	2007-05-25 14:29:51.000000000 -0500
+++ linux-quilt/fs/dlm/user.c	2007-05-25 14:40:59.000000000 -0500
@@ -138,6 +138,35 @@
 }
 #endif
 
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+	switch (sb_status) {
+	case -DLM_EUNLOCK:
+		return 1;
+	case -DLM_ECANCEL:
+	case -ETIMEDOUT:
+		if (lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	case -EAGAIN:
+		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
    being removed and then remove that lkb from the orphans list and free it */
 
@@ -184,25 +213,7 @@
 		log_debug(ls, "ast overlap %x status %x %x",
 			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
 
-	/* Figure out if this lock is at the end of its life and no longer
-	   available for the application to use.  The lkb still exists until
-	   the final ast is read.  A lock becomes EOL in three situations:
-	     1. a noqueue request fails with EAGAIN
-	     2. an unlock completes with EUNLOCK
-	     3. a cancel of a waiting request completes with ECANCEL
-	   An EOL lock needs to be removed from the process's list of locks.
-	   And we can't allow any new operation on an EOL lock.  This is
-	   not related to the lifetime of the lkb struct which is managed
-	   entirely by refcount. */
-
-	if (type == AST_COMP &&
-	    lkb->lkb_grmode == DLM_LOCK_IV &&
-	    ua->lksb.sb_status == -EAGAIN)
-		eol = 1;
-	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-	    (ua->lksb.sb_status == -DLM_ECANCEL &&
-	     lkb->lkb_grmode == DLM_LOCK_IV))
-		eol = 1;
+	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
 	if (eol) {
 		lkb->lkb_ast_type &= ~AST_BAST;
 		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
Index: linux-quilt/fs/dlm/config.c
===================================================================
--- linux-quilt.orig/fs/dlm/config.c	2007-05-25 14:29:51.000000000 -0500
+++ linux-quilt/fs/dlm/config.c	2007-05-25 14:29:56.000000000 -0500
@@ -433,6 +433,7 @@
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
 	cl->cl_scan_secs = dlm_config.ci_scan_secs;
 	cl->cl_log_debug = dlm_config.ci_log_debug;
+	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
Index: linux-quilt/fs/dlm/netlink.c
===================================================================
--- linux-quilt.orig/fs/dlm/netlink.c	2007-05-25 14:29:51.000000000 -0500
+++ linux-quilt/fs/dlm/netlink.c	2007-05-25 14:29:56.000000000 -0500
@@ -133,8 +133,6 @@
 	size_t size;
 	int rv;
 
-	log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id);
-
 	size = nla_total_size(sizeof(struct dlm_lock_data)) +
 	       nla_total_size(0); /* why this? */
 
Index: linux-quilt/fs/dlm/dlm_internal.h
===================================================================
--- linux-quilt.orig/fs/dlm/dlm_internal.h	2007-05-25 14:29:56.000000000 -0500
+++ linux-quilt/fs/dlm/dlm_internal.h	2007-05-25 14:40:59.000000000 -0500
@@ -215,9 +215,9 @@
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE	0x00200000
 #define DLM_IFL_WATCH_TIMEWARN	0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
 #define DLM_IFL_USER		0x00000001
 #define DLM_IFL_ORPHAN		0x00000002
-#define DLM_IFL_TIMEOUT_CANCEL	0x00000004
 
 struct dlm_lkb {
 	struct dlm_rsb		*lkb_resource;	/* the rsb */



             reply	other threads:[~2007-05-29 13:44 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-29 13:44 David Teigland [this message]
2007-05-29 15:34 ` [Cluster-devel] Re: [PATCH] dlm: timeout fixes Steven Whitehouse
  -- strict thread matches above, loose matches on Subject: below --
2007-07-09 16:02 [Cluster-devel] [GFS2/DLM] Pre-pull Patch Posting swhiteho
2007-07-09 16:02 ` [Cluster-devel] [PATCH] [GFS2] flush the glock completely in inode_go_sync swhiteho
2007-07-09 16:02   ` [Cluster-devel] [PATCH] [DLM] fix a couple of races swhiteho
2007-07-09 16:02     ` [Cluster-devel] [PATCH] [GFS2] kernel changes to support new gfs2_grow command swhiteho
2007-07-09 16:02       ` [Cluster-devel] [PATCH] [GFS2] Kernel changes to support new gfs2_grow command (part 2) swhiteho
2007-07-09 16:02         ` [Cluster-devel] [PATCH] [GFS2] use zero_user_page swhiteho
2007-07-09 16:02           ` [Cluster-devel] [PATCH] [GFS2] Addendum patch 2 for gfs2_grow swhiteho
2007-07-09 16:02             ` [Cluster-devel] [PATCH] [GFS2] Reduce size of struct gdlm_lock swhiteho
2007-07-09 16:02               ` [Cluster-devel] [PATCH] [GFS2] Clean up inode number handling swhiteho
2007-07-09 16:02                 ` [Cluster-devel] [PATCH] [GFS2] Quotas non-functional - fix bug swhiteho
2007-07-09 16:02                   ` [Cluster-devel] [PATCH] [DLM] keep dlm from panicing when traversing rsb list in debugfs swhiteho
2007-07-09 16:02                     ` [Cluster-devel] [PATCH] [DLM] block scand during recovery [1/6] swhiteho
2007-07-09 16:02                       ` [Cluster-devel] [PATCH] [DLM] add lock timeouts and warnings [2/6] swhiteho
2007-07-09 16:02                         ` [Cluster-devel] [PATCH] [DLM] dlm_device interface changes [3/6] swhiteho
2007-07-09 16:02                           ` [Cluster-devel] [PATCH] [DLM] cancel in conversion deadlock [4/6] swhiteho
2007-07-09 16:02                             ` [Cluster-devel] [PATCH] [DLM] fix new_lockspace error exit [5/6] swhiteho
2007-07-09 16:02                               ` [Cluster-devel] [PATCH] [DLM] wait for config check during join [6/6] swhiteho
2007-07-09 16:02                                 ` [Cluster-devel] [PATCH] [DLM] fix compile breakage swhiteho
2007-07-09 16:02                                   ` [Cluster-devel] [PATCH] [GFS2] latest gfs2-nmw headers break userland build swhiteho
2007-07-09 16:02                                     ` [Cluster-devel] [PATCH] [DLM] Compile fix swhiteho
2007-07-09 16:02                                       ` [Cluster-devel] [PATCH] [DLM] timeout fixes swhiteho

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070529134423.GA31702@redhat.com \
    --to=teigland@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).