From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Tue, 29 May 2007 16:34:54 +0100 Subject: [Cluster-devel] Re: [PATCH] dlm: timeout fixes In-Reply-To: <20070529134423.GA31702@redhat.com> References: <20070529134423.GA31702@redhat.com> Message-ID: <1180452894.25918.56.camel@quoit> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, These four are now all in the -nmw git tree. Thanks, Steve. On Tue, 2007-05-29 at 08:44 -0500, David Teigland wrote: > Various fixes related to the new timeout feature: > - add_timeout() missed setting TIMEWARN flag on lkb's when the > TIMEOUT flag was already set > - clear_proc_locks should remove a dead process's locks from the > timeout list > - the end-of-life calculation for user locks needs to consider that > ETIMEDOUT is equivalent to -DLM_ECANCEL > - make initial default timewarn_cs config value visible in configfs > - change bit position of TIMEOUT_CANCEL flag so it's not copied to > a remote master node > - set timestamp on remote lkb's so a lock dump will display the time > they've been waiting > > Signed-off-by: David Teigland > > Index: linux-quilt/fs/dlm/lock.c > =================================================================== > --- linux-quilt.orig/fs/dlm/lock.c 2007-05-25 14:29:56.000000000 -0500 > +++ linux-quilt/fs/dlm/lock.c 2007-05-25 14:40:59.000000000 -0500 > @@ -1010,17 +1010,18 @@ > { > struct dlm_ls *ls = lkb->lkb_resource->res_ls; > > - if (is_master_copy(lkb)) > + if (is_master_copy(lkb)) { > + lkb->lkb_timestamp = jiffies; > return; > - > - if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) > - goto add_it; > + } > > if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && > !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { > lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; > goto add_it; > } > + if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) > + goto add_it; > return; > > add_it: > @@ -3510,8 +3511,7 @@ > case -DLM_ECANCEL: > receive_flags_reply(lkb, ms); > revert_lock_pc(r, lkb); > - if (ms->m_result) > - queue_cast(r, lkb, -DLM_ECANCEL); > + queue_cast(r, lkb, -DLM_ECANCEL); > break; > case 0: > break; > @@ -4534,6 +4534,7 @@ > lkb = del_proc_lock(ls, proc); > if (!lkb) > break; > + del_timeout(lkb); > if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) > orphan_proc_lock(ls, lkb); > else > Index: linux-quilt/fs/dlm/user.c > =================================================================== > --- linux-quilt.orig/fs/dlm/user.c 2007-05-25 14:29:51.000000000 -0500 > +++ linux-quilt/fs/dlm/user.c 2007-05-25 14:40:59.000000000 -0500 > @@ -138,6 +138,35 @@ > } > #endif > > +/* Figure out if this lock is at the end of its life and no longer > + available for the application to use. The lkb still exists until > + the final ast is read. A lock becomes EOL in three situations: > + 1. a noqueue request fails with EAGAIN > + 2. an unlock completes with EUNLOCK > + 3. a cancel of a waiting request completes with ECANCEL/EDEADLK > + An EOL lock needs to be removed from the process's list of locks. > + And we can't allow any new operation on an EOL lock. This is > + not related to the lifetime of the lkb struct which is managed > + entirely by refcount. */ > + > +static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type) > +{ > + switch (sb_status) { > + case -DLM_EUNLOCK: > + return 1; > + case -DLM_ECANCEL: > + case -ETIMEDOUT: > + if (lkb->lkb_grmode == DLM_LOCK_IV) > + return 1; > + break; > + case -EAGAIN: > + if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV) > + return 1; > + break; > + } > + return 0; > +} > + > /* we could possibly check if the cancel of an orphan has resulted in the lkb > being removed and then remove that lkb from the orphans list and free it */ > > @@ -184,25 +213,7 @@ > log_debug(ls, "ast overlap %x status %x %x", > lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); > > - /* Figure out if this lock is at the end of its life and no longer > - available for the application to use. The lkb still exists until > - the final ast is read. A lock becomes EOL in three situations: > - 1. a noqueue request fails with EAGAIN > - 2. an unlock completes with EUNLOCK > - 3. a cancel of a waiting request completes with ECANCEL > - An EOL lock needs to be removed from the process's list of locks. > - And we can't allow any new operation on an EOL lock. This is > - not related to the lifetime of the lkb struct which is managed > - entirely by refcount. */ > - > - if (type == AST_COMP && > - lkb->lkb_grmode == DLM_LOCK_IV && > - ua->lksb.sb_status == -EAGAIN) > - eol = 1; > - else if (ua->lksb.sb_status == -DLM_EUNLOCK || > - (ua->lksb.sb_status == -DLM_ECANCEL && > - lkb->lkb_grmode == DLM_LOCK_IV)) > - eol = 1; > + eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type); > if (eol) { > lkb->lkb_ast_type &= ~AST_BAST; > lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; > Index: linux-quilt/fs/dlm/config.c > =================================================================== > --- linux-quilt.orig/fs/dlm/config.c 2007-05-25 14:29:51.000000000 -0500 > +++ linux-quilt/fs/dlm/config.c 2007-05-25 14:29:56.000000000 -0500 > @@ -433,6 +433,7 @@ > cl->cl_toss_secs = dlm_config.ci_toss_secs; > cl->cl_scan_secs = dlm_config.ci_scan_secs; > cl->cl_log_debug = dlm_config.ci_log_debug; > + cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; > > space_list = &sps->ss_group; > comm_list = &cms->cs_group; > Index: linux-quilt/fs/dlm/netlink.c > =================================================================== > --- linux-quilt.orig/fs/dlm/netlink.c 2007-05-25 14:29:51.000000000 -0500 > +++ linux-quilt/fs/dlm/netlink.c 2007-05-25 14:29:56.000000000 -0500 > @@ -133,8 +133,6 @@ > size_t size; > int rv; > > - log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id); > - > size = nla_total_size(sizeof(struct dlm_lock_data)) + > nla_total_size(0); /* why this? */ > > Index: linux-quilt/fs/dlm/dlm_internal.h > =================================================================== > --- linux-quilt.orig/fs/dlm/dlm_internal.h 2007-05-25 14:29:56.000000000 -0500 > +++ linux-quilt/fs/dlm/dlm_internal.h 2007-05-25 14:40:59.000000000 -0500 > @@ -215,9 +215,9 @@ > #define DLM_IFL_OVERLAP_CANCEL 0x00100000 > #define DLM_IFL_ENDOFLIFE 0x00200000 > #define DLM_IFL_WATCH_TIMEWARN 0x00400000 > +#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 > #define DLM_IFL_USER 0x00000001 > #define DLM_IFL_ORPHAN 0x00000002 > -#define DLM_IFL_TIMEOUT_CANCEL 0x00000004 > > struct dlm_lkb { > struct dlm_rsb *lkb_resource; /* the rsb */