All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] Re: [PATCH] dlm: timeout fixes
Date: Tue, 29 May 2007 16:34:54 +0100	[thread overview]
Message-ID: <1180452894.25918.56.camel@quoit> (raw)
In-Reply-To: <20070529134423.GA31702@redhat.com>

Hi,

These four are now all in the -nmw git tree. Thanks,

Steve.

On Tue, 2007-05-29 at 08:44 -0500, David Teigland wrote:
> Various fixes related to the new timeout feature:
> - add_timeout() missed setting TIMEWARN flag on lkb's when the
>   TIMEOUT flag was already set
> - clear_proc_locks should remove a dead process's locks from the
>   timeout list
> - the end-of-life calculation for user locks needs to consider that
>   ETIMEDOUT is equivalent to -DLM_ECANCEL
> - make initial default timewarn_cs config value visible in configfs
> - change bit position of TIMEOUT_CANCEL flag so it's not copied to
>   a remote master node
> - set timestamp on remote lkb's so a lock dump will display the time
>   they've been waiting
> 
> Signed-off-by: David Teigland <teigland@redhat.com>
> 
> Index: linux-quilt/fs/dlm/lock.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/lock.c	2007-05-25 14:29:56.000000000 -0500
> +++ linux-quilt/fs/dlm/lock.c	2007-05-25 14:40:59.000000000 -0500
> @@ -1010,17 +1010,18 @@
>  {
>  	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
>  
> -	if (is_master_copy(lkb))
> +	if (is_master_copy(lkb)) {
> +		lkb->lkb_timestamp = jiffies;
>  		return;
> -
> -	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
> -		goto add_it;
> +	}
>  
>  	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
>  	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
>  		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
>  		goto add_it;
>  	}
> +	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
> +		goto add_it;
>  	return;
>  
>   add_it:
> @@ -3510,8 +3511,7 @@
>  	case -DLM_ECANCEL:
>  		receive_flags_reply(lkb, ms);
>  		revert_lock_pc(r, lkb);
> -		if (ms->m_result)
> -			queue_cast(r, lkb, -DLM_ECANCEL);
> +		queue_cast(r, lkb, -DLM_ECANCEL);
>  		break;
>  	case 0:
>  		break;
> @@ -4534,6 +4534,7 @@
>  		lkb = del_proc_lock(ls, proc);
>  		if (!lkb)
>  			break;
> +		del_timeout(lkb);
>  		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
>  			orphan_proc_lock(ls, lkb);
>  		else
> Index: linux-quilt/fs/dlm/user.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/user.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/user.c	2007-05-25 14:40:59.000000000 -0500
> @@ -138,6 +138,35 @@
>  }
>  #endif
>  
> +/* Figure out if this lock is at the end of its life and no longer
> +   available for the application to use.  The lkb still exists until
> +   the final ast is read.  A lock becomes EOL in three situations:
> +     1. a noqueue request fails with EAGAIN
> +     2. an unlock completes with EUNLOCK
> +     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
> +   An EOL lock needs to be removed from the process's list of locks.
> +   And we can't allow any new operation on an EOL lock.  This is
> +   not related to the lifetime of the lkb struct which is managed
> +   entirely by refcount. */
> +
> +static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
> +{
> +	switch (sb_status) {
> +	case -DLM_EUNLOCK:
> +		return 1;
> +	case -DLM_ECANCEL:
> +	case -ETIMEDOUT:
> +		if (lkb->lkb_grmode == DLM_LOCK_IV)
> +			return 1;
> +		break;
> +	case -EAGAIN:
> +		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
> +			return 1;
> +		break;
> +	}
> +	return 0;
> +}
> +
>  /* we could possibly check if the cancel of an orphan has resulted in the lkb
>     being removed and then remove that lkb from the orphans list and free it */
>  
> @@ -184,25 +213,7 @@
>  		log_debug(ls, "ast overlap %x status %x %x",
>  			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
>  
> -	/* Figure out if this lock is at the end of its life and no longer
> -	   available for the application to use.  The lkb still exists until
> -	   the final ast is read.  A lock becomes EOL in three situations:
> -	     1. a noqueue request fails with EAGAIN
> -	     2. an unlock completes with EUNLOCK
> -	     3. a cancel of a waiting request completes with ECANCEL
> -	   An EOL lock needs to be removed from the process's list of locks.
> -	   And we can't allow any new operation on an EOL lock.  This is
> -	   not related to the lifetime of the lkb struct which is managed
> -	   entirely by refcount. */
> -
> -	if (type == AST_COMP &&
> -	    lkb->lkb_grmode == DLM_LOCK_IV &&
> -	    ua->lksb.sb_status == -EAGAIN)
> -		eol = 1;
> -	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
> -	    (ua->lksb.sb_status == -DLM_ECANCEL &&
> -	     lkb->lkb_grmode == DLM_LOCK_IV))
> -		eol = 1;
> +	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
>  	if (eol) {
>  		lkb->lkb_ast_type &= ~AST_BAST;
>  		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
> Index: linux-quilt/fs/dlm/config.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/config.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/config.c	2007-05-25 14:29:56.000000000 -0500
> @@ -433,6 +433,7 @@
>  	cl->cl_toss_secs = dlm_config.ci_toss_secs;
>  	cl->cl_scan_secs = dlm_config.ci_scan_secs;
>  	cl->cl_log_debug = dlm_config.ci_log_debug;
> +	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
>  
>  	space_list = &sps->ss_group;
>  	comm_list = &cms->cs_group;
> Index: linux-quilt/fs/dlm/netlink.c
> ===================================================================
> --- linux-quilt.orig/fs/dlm/netlink.c	2007-05-25 14:29:51.000000000 -0500
> +++ linux-quilt/fs/dlm/netlink.c	2007-05-25 14:29:56.000000000 -0500
> @@ -133,8 +133,6 @@
>  	size_t size;
>  	int rv;
>  
> -	log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id);
> -
>  	size = nla_total_size(sizeof(struct dlm_lock_data)) +
>  	       nla_total_size(0); /* why this? */
>  
> Index: linux-quilt/fs/dlm/dlm_internal.h
> ===================================================================
> --- linux-quilt.orig/fs/dlm/dlm_internal.h	2007-05-25 14:29:56.000000000 -0500
> +++ linux-quilt/fs/dlm/dlm_internal.h	2007-05-25 14:40:59.000000000 -0500
> @@ -215,9 +215,9 @@
>  #define DLM_IFL_OVERLAP_CANCEL  0x00100000
>  #define DLM_IFL_ENDOFLIFE	0x00200000
>  #define DLM_IFL_WATCH_TIMEWARN	0x00400000
> +#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
>  #define DLM_IFL_USER		0x00000001
>  #define DLM_IFL_ORPHAN		0x00000002
> -#define DLM_IFL_TIMEOUT_CANCEL	0x00000004
>  
>  struct dlm_lkb {
>  	struct dlm_rsb		*lkb_resource;	/* the rsb */



      reply	other threads:[~2007-05-29 15:34 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-29 13:44 [Cluster-devel] [PATCH] dlm: timeout fixes David Teigland
2007-05-29 15:34 ` Steven Whitehouse [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1180452894.25918.56.camel@quoit \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.