From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 16 Jun 2006 20:07:52 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog README include/lis ... Message-ID: <20060616200752.6018.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: STABLE Changes by: lhh at sourceware.org 2006-06-16 20:07:47 Modified files: rgmanager : ChangeLog README rgmanager/include: list.h resgroup.h rg_locks.h rgmanager/init.d: rgmanager rgmanager/man : clusvcadm.8 rgmanager/src/clulib: clulog.c msgsimple.c vft.c rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c restree.c rg_locks.c rg_state.c rg_thread.c rgmanager/src/resources: Makefile clusterfs.sh fs.sh ip.sh nfsclient.sh nfsexport.sh ocf-shellfuncs service.sh rgmanager/src/utils: Makefile clustat.c clusvcadm.c Added files: rgmanager/src/daemons: nodeevent.c watchdog.c rgmanager/src/resources: svclib_nfslock Log message: Merge from RHEL4 branch Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.13&r2=1.5.2.13.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/README.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2&r2=1.2.8.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/list.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.1.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.5.6.1&r2=1.3.2.5.6.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1&r2=1.1.8.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1&r2=1.3.2.1.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/clusvcadm.8.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1&r2=1.1.2.1.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.6.1&r2=1.2.2.1.6.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msgsimple.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4&r2=1.4.8.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/watchdog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.6.2.2.6.1&r2=1.6.2.2.6.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.2&r2=1.5.2.2.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.8.2.10.6.3&r2=1.8.2.10.6.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.8.6.6&r2=1.9.2.8.6.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.2.6.5&r2=1.10.2.2.6.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_locks.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.2&r2=1.4.2.2.6.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.8.6.3&r2=1.4.2.8.6.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/svclib_nfslock.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.3.6.2&r2=1.4.2.3.6.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.4.4&r2=1.1.2.3.4.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.6.4.4&r2=1.4.2.6.4.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ip.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.4.4.9&r2=1.5.2.4.4.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.4&r2=1.3.2.2.6.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsexport.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.1.6.3&r2=1.4.2.1.6.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1.6.2&r2=1.1.2.1.6.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.2&r2=1.3.2.1.6.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.3.6.7&r2=1.5.2.3.6.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.3.6.3&r2=1.2.2.3.6.4 --- cluster/rgmanager/ChangeLog 2005/03/21 22:01:30 1.5.2.13 +++ cluster/rgmanager/ChangeLog 2006/06/16 20:07:45 1.5.2.13.6.1 @@ -1,3 +1,87 @@ +2006-06-16 Lon Hohberger + * src/daemons/fo_domain.c, groups.c: Get rid of compiler warnings + * src/daemons/rg_state.c: Change clu_lock_verbose to use the NULL + lock/convert mechanism offered by DLM to work around #193128 + * src/resources/fs.sh, clusterfs.sh, nfsexport.sh, nfsclient.sh, + service.sh, svclib_nfslock: Finish up initial NFS workaround. + +2006-05-23 Lon Hohberger + * src/daemons/members.c: Zap pad fields on copy-out + * src/daemons/main.c: Give notice if skipping an event because of + locked services. Call the self-watchdog init function + * src/daemons/watchdog.c: Add Stanko Kupcevic's self-watchdog from + CVS head (fixes #193247) + * src/daemons/groups.c: Add debug messages. Actually count + resgroups during node transition handling + * src/daemons/rg_state.c: allow failover of stopping services if + the owner died (#193255) + * src/utils/clustat.c: fix typo, misc. usability problems (#192999) + +2006-05-16 Lon Hohberger + * src/resources/nfsclient.sh: Fix 189218 - nfsclient not matching + wildcards correctly when checking status. Allow disabling of + recovery for services where the nfs clients are ordered (this will + cause a full service restart, but works) + * src/resources/clusterfs.sh, fs.sh, svclib_nfslock, service.sh: + Implement rudimentary atomic bomb-style NFS lock reclaim handling + Needs compatible and correctly configured version of nfs-utils + installed and running on the system. For clusterfs.sh, ensure + that we flush buffers during service tear-down - regardless of + whether or not we unmount the file system. + * src/utils/clunfslock.sh: HA-callout program (/usr/sbin/clunfslock) + for use with the rpc.statd -H parameter. Copies the client to all + cluster-managed mounted file systems so that it will get lock + reclaim notification on failover. + +2006-05-09 Lon Hohberger + * include/list.h: Prevent dereferencing curr if it's null for some + reason + * include/resgroup.h: Clean up alignment, add rgmanager lock/unlock + message types + * src/daemons/Makefile: Add nodeevent.o to the build for rgmanager + * src/clulib/msgsimple.c: Misc code path cleanups + * src/clulib/vft.c: Add local reads for fast clustat operation. + * src/daemons/groups.c: Count all resource groups for all nodes + in one pass, rather than one node per pass. Split queueing of + status checks off so we never block the main thread. Mark services + which have autostart=0 in the config as "disabled" to help remove + confusion between "disabled", "stopped", and the no-longer-needed + "stopped but behave like disabled" states. bz #182454 / + #190234 / #190408 + * src/daemons/fo_domain.c: Add patch from Josef Whiter to + implement no-failback option for a given FO domain - bz #189841 + * src/daemons/main.c: Queue node events for another thread to + handle, so we never block the main thread. Also, implement + cluster-wide service lock/unlock feature from clumanager 1.2.x + - bz #175010 + * src/daemons/nodeevent.c: Split out node event queueing / handling + in to a separate thread so the main thread does not block + * src/daemons/rg_state.c: Return error codes if resource groups + are locked. + * src/daemons/rg_thread.c: Fix assertion failure causing segfault + in extremely rare cases. Quash the rg queue during shutdown. + - bz #181539 + * src/daemons/rg_state.c: Add fast local service state query to + reduce unnecessary lock contention + * src/daemons/groups.c: Handle request for expedited information + from clustat. + * src/daemons/main.c: Pass arg1 to send_rg_states() to enable fast + clustat operation. + * src/resources/fs.sh: Implement user/group quota support if + enabled in the file system options + * src/utils/clustat.c: Misc. error handling. Add single service / + member output and add -Q to the help information. #185952. + Added -f flag. + * src/utils/clusvcadm.c: Implement client-side of #175010 + * src/utils/clustat.c: show transition time in clustat -x + - bz #191398 + * src/resources/fs.sh: enable user/group quotas if enabled in the + options attribute - bz #191182 + * init.d/rgmanager: fix typo - bz #191205 + + +------------- + 2005-03-21 Lon Hohberger * init.d/rgmanager, Makefile: Fix up init script and add Makefile so that the init script is properly installed #142754 --- cluster/rgmanager/README 2004/08/30 17:49:10 1.2 +++ cluster/rgmanager/README 2006/06/16 20:07:45 1.2.8.1 @@ -1,7 +1,3 @@ -WARNING - -This code is not ready for production use. - This is a clustered resource group manager layered on top of Magma, a single API which can talk to multiple cluster infrastructures via their native APIs. This resource manager requires both magma and one or more --- cluster/rgmanager/include/list.h 2005/02/28 23:13:49 1.2.2.1 +++ cluster/rgmanager/include/list.h 2006/06/16 20:07:46 1.2.2.1.6.1 @@ -50,7 +50,7 @@ if (*list && (curr = *list)) do #define list_done(list, curr) \ - (((curr = (void *)le(curr)->le_next)) && (curr == *list)) + (curr && (((curr = (void *)le(curr)->le_next)) && (curr == *list))) /* list_do(list, node) { --- cluster/rgmanager/include/resgroup.h 2005/07/28 21:19:02 1.3.2.5.6.1 +++ cluster/rgmanager/include/resgroup.h 2006/06/16 20:07:46 1.3.2.5.6.2 @@ -55,25 +55,29 @@ #define RG_ACTION_REQUEST /* Message header */ 0x138582 -#define RG_SUCCESS 0 -#define RG_FAIL 1 -#define RG_START 2 -#define RG_STOP 3 -#define RG_STATUS 4 -#define RG_DISABLE 5 -#define RG_STOP_RECOVER 6 -#define RG_START_RECOVER 7 -#define RG_RESTART 8 -#define RG_EXITING 9 -#define RG_INIT 10 -#define RG_ENABLE 11 -#define RG_STATUS_INQUIRY 12 -#define RG_RELOCATE 13 -#define RG_CONDSTOP 14 -#define RG_CONDSTART 15 -#define RG_START_REMOTE 16 /* Part of a relocate */ -#define RG_STOP_USER 17 /* User-stop request */ -#define RG_NONE 999 +#define RG_SUCCESS 0 +#define RG_FAIL 1 +#define RG_START 2 +#define RG_STOP 3 +#define RG_STATUS 4 +#define RG_DISABLE 5 +#define RG_STOP_RECOVER 6 +#define RG_START_RECOVER 7 +#define RG_RESTART 8 +#define RG_EXITING 9 +#define RG_INIT 10 +#define RG_ENABLE 11 +#define RG_STATUS_INQUIRY 12 +#define RG_RELOCATE 13 +#define RG_CONDSTOP 14 +#define RG_CONDSTART 15 +#define RG_START_REMOTE 16 /* Part of a relocate */ +#define RG_STOP_USER 17 /* User-stop request */ +#define RG_STOP_EXITING 18 /* Exiting. */ +#define RG_LOCK 19 +#define RG_UNLOCK 20 +#define RG_QUERY_LOCK 21 +#define RG_NONE 999 extern const char *rg_req_strings[]; @@ -181,7 +185,7 @@ */ #define FOD_ORDERED (1<<0) #define FOD_RESTRICTED (1<<1) - +#define FOD_NOFAILBACK (1<<2) //#define DEBUG #ifdef DEBUG --- cluster/rgmanager/include/rg_locks.h 2004/08/13 15:36:50 1.1 +++ cluster/rgmanager/include/rg_locks.h 2006/06/16 20:07:46 1.1.8.1 @@ -4,9 +4,8 @@ int rg_running(void); int rg_locked(void); -int rg_lockall(void); -int rg_unlockall(void); -int rg_wait_unlockall(void); +int rg_lockall(int flag); +int rg_unlockall(int flag); int rg_quorate(void); int rg_set_quorate(void); --- cluster/rgmanager/init.d/rgmanager 2005/03/21 21:02:58 1.3.2.1 +++ cluster/rgmanager/init.d/rgmanager 2006/06/16 20:07:46 1.3.2.1.6.1 @@ -16,7 +16,7 @@ # Grab the network config file . /etc/sysconfig/network -# Grap cluster start config if it exists +# Grab cluster start config if it exists [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster PATH=/sbin:/bin:/usr/sbin:/usr/bin --- cluster/rgmanager/man/clusvcadm.8 2005/01/18 22:35:35 1.1.2.1 +++ cluster/rgmanager/man/clusvcadm.8 2006/06/16 20:07:46 1.1.2.1.6.1 @@ -46,9 +46,9 @@ .I service .IP \-l -Lock the cluster's service managers. This should only be used if the +Lock the local resource group manager. This should only be used if the administrator intends to perform a global, cluster-wide shutdown. This -prevents ALL service operations on ALL currently running members, thus, +prevents starting resource groups on the local node, allowing services will not fail over during the shutdown of the cluster. Generally, administrators should use the .B --- cluster/rgmanager/src/clulib/clulog.c 2005/12/06 18:43:45 1.2.2.1.6.1 +++ cluster/rgmanager/src/clulib/clulog.c 2006/06/16 20:07:46 1.2.2.1.6.2 @@ -20,7 +20,7 @@ /** @file * Library routines for communicating with the logging daemon. * - * $Id: clulog.c,v 1.2.2.1.6.1 2005/12/06 18:43:45 lhh Exp $ + * $Id: clulog.c,v 1.2.2.1.6.2 2006/06/16 20:07:46 lhh Exp $ * * Author: Jeff Moyer */ @@ -50,7 +50,7 @@ #include -static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.1 $"; +static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.2 $"; #ifdef DEBUG #include --- cluster/rgmanager/src/clulib/msgsimple.c 2004/11/11 19:46:18 1.4 +++ cluster/rgmanager/src/clulib/msgsimple.c 2006/06/16 20:07:46 1.4.8.1 @@ -83,15 +83,19 @@ /* * Peek at the header. We need the size of the inbound buffer! */ + errno = EAGAIN; ret = msg_peek(fd, &peek_msg, sizeof (generic_msg_hdr)); if (ret != sizeof (generic_msg_hdr)) { if (ret == -1) { if (errno != ECONNRESET) fprintf(stderr, "fd%d peek: %s\n", fd, strerror(errno)); + //perror("msg_peek"); } else if (ret != 0) /* Blank message = probably closed socket */ fprintf(stderr, "fd%d peek: %d/%d bytes\n", fd, ret, (int)sizeof (generic_msg_hdr)); + else if (ret == 0) + errno = EAGAIN; return -1; } --- cluster/rgmanager/src/clulib/vft.c 2006/01/20 16:27:29 1.7.2.3.6.2 +++ cluster/rgmanager/src/clulib/vft.c 2006/06/16 20:07:46 1.7.2.3.6.3 @@ -1598,6 +1598,47 @@ } +int +vf_read_local(char *keyid, uint64_t *view, void **data, uint32_t *datalen) +{ + key_node_t *key_node = NULL; + + pthread_mutex_lock(&vf_mutex); + pthread_mutex_lock(&key_list_mutex); + + key_node = kn_find_key(keyid); + if (!key_node) { + pthread_mutex_unlock(&key_list_mutex); + pthread_mutex_unlock(&vf_mutex); + printf("no key for %s\n", keyid); + return VFR_NODATA; + } + + if (!key_node->kn_data || !key_node->kn_datalen) { + pthread_mutex_unlock(&key_list_mutex); + pthread_mutex_unlock(&vf_mutex); + return VFR_NODATA; + } + + *data = malloc(key_node->kn_datalen); + if (! *data) { + pthread_mutex_unlock(&key_list_mutex); + pthread_mutex_unlock(&vf_mutex); + printf("Couldn't malloc %s\n", keyid); + return VFR_ERROR; + } + + memcpy(*data, key_node->kn_data, key_node->kn_datalen); + *datalen = key_node->kn_datalen; + *view = key_node->kn_viewno; + + pthread_mutex_unlock(&key_list_mutex); + pthread_mutex_unlock(&vf_mutex); + + return VFR_OK; +} + + static int vf_send_current(int fd, char *keyid) { /cvs/cluster/cluster/rgmanager/src/daemons/nodeevent.c,v --> standard output revision 1.2.2.1 --- cluster/rgmanager/src/daemons/nodeevent.c +++ - 2006-06-16 20:07:48.351799000 +0000 @@ -0,0 +1,103 @@ +/* + Copyright Red Hat, Inc. 2006 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +#include +#include +#include +#include + +typedef struct __ne_q { + list_head(); + int ne_local; + uint64_t ne_nodeid; + int ne_state; +} nevent_t; + +int node_event(int, uint64_t, int); + +/** + * Node event queue. + */ +static nevent_t *event_queue = NULL; +static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_t ne_thread = 0; +int ne_queue_request(int local, uint64_t nodeid, int state); + + +void * +node_event_thread(void *arg) +{ + nevent_t *ev; + + while (1) { + pthread_mutex_lock(&ne_queue_mutex); + ev = event_queue; + if (ev) + list_remove(&event_queue, ev); + else + break; /* We're outta here */ + pthread_mutex_unlock(&ne_queue_mutex); + + node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state); + + free(ev); + } + + /* Mutex held */ + ne_thread = 0; + rg_dec_threads(); + pthread_mutex_unlock(&ne_queue_mutex); + return NULL; +} + + +void +node_event_q(int local, uint64_t nodeID, int state) +{ + nevent_t *ev; + pthread_attr_t attrs; + + while (1) { + ev = malloc(sizeof(nevent_t)); + if (ev) { + break; + } + sleep(1); + } + + memset(ev,0,sizeof(*ev)); + + ev->ne_state = state; + ev->ne_local = local; + ev->ne_nodeid = nodeID; + + pthread_mutex_lock (&ne_queue_mutex); + list_insert(&event_queue, ev); + if (ne_thread == 0) { + pthread_attr_init(&attrs); + pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&attrs, 262144); + + pthread_create(&ne_thread, &attrs, node_event_thread, NULL); + pthread_attr_destroy(&attrs); + + rg_inc_threads(); + } + pthread_mutex_unlock (&ne_queue_mutex); +} /cvs/cluster/cluster/rgmanager/src/daemons/watchdog.c,v --> standard output revision 1.2.2.1 --- cluster/rgmanager/src/daemons/watchdog.c +++ - 2006-06-16 20:07:48.438384000 +0000 @@ -0,0 +1,97 @@ +/* + Copyright Red Hat, Inc. 2005-2006 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +#include +#include +#include +#include +#include + +#include +#include + +static pid_t child = 0; + +static void +signal_handler(int signum) +{ + kill(child, signum); +} +static void +redirect_signals(void) +{ + int i; + for (i = 0; i < _NSIG; i++) { + switch (i) { + case SIGCHLD: + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + setup_signal(i, SIG_DFL); + break; + default: + setup_signal(i, signal_handler); + } + } +} + +/** + return watchdog's pid, or 0 on failure +*/ +int +watchdog_init(void) +{ + int status; + pid_t parent; + + parent = getpid(); + child = fork(); + if (child < 0) + return 0; + else if (!child) + return parent; + + redirect_signals(); + + while (1) { + if (waitpid(child, &status, 0) <= 0) + continue; + + if (WIFEXITED(status)) + exit(WEXITSTATUS(status)); + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGKILL) { + clulog(LOG_CRIT, "Watchdog: Daemon killed, exiting\n"); + raise(SIGKILL); + while(1) ; + } + else { +#ifdef DEBUG + clulog(LOG_CRIT, "Watchdog: Daemon died, but not rebooting because DEBUG is set\n"); +#else + clulog(LOG_CRIT, "Watchdog: Daemon died, rebooting...\n"); + sync(); + reboot(RB_AUTOBOOT); +#endif + exit(255); + } + } + } +} --- cluster/rgmanager/src/daemons/Makefile 2005/10/17 20:30:45 1.6.2.2.6.1 +++ cluster/rgmanager/src/daemons/Makefile 2006/06/16 20:07:46 1.6.2.2.6.2 @@ -40,7 +40,8 @@ clurgmgrd: rg_thread.o rg_locks.o main.o groups.o rg_state.o \ rg_queue.o members.o rg_forward.o reslist.o \ - resrules.o restree.o fo_domain.o + resrules.o restree.o fo_domain.o nodeevent.o \ + watchdog.o $(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs # --- cluster/rgmanager/src/daemons/fo_domain.c 2005/01/25 20:05:44 1.5.2.2 +++ cluster/rgmanager/src/daemons/fo_domain.c 2006/06/16 20:07:46 1.5.2.2.6.1 @@ -19,6 +19,9 @@ /** @file * Fail-over Domain & Preferred Node Ordering Driver. Ripped right from * the clumanager 1.2 code base. + * + * April 2006 - Nofailback option added to restrict failover behavior in ordered + * + restricted failover domains by Josef Whiter */ #include #include @@ -153,6 +156,13 @@ free(ret); } + snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]/@nofailback", + base, idx); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + if (atoi(ret) != 0) + fod->fd_flags |= FOD_NOFAILBACK; + free(ret); + } snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]", base, idx); @@ -226,7 +236,9 @@ if (fod->fd_flags & FOD_ORDERED) printf("Ordered "); if (fod->fd_flags & FOD_RESTRICTED) - printf("Restricted"); + printf("Restricted "); + if (fod->fd_flags & FOD_NOFAILBACK) + printf("No Failback"); printf("\n"); } @@ -316,8 +328,14 @@ char domainname[128]; int ordered = 0; int restricted = 0; + int nofailback = 0; fod_t *fod = NULL; int found = 0; + int owned_by_node = 0, started = 0, no_owner = 0; +#ifndef NO_CCS + rg_state_t svc_state; + void *lockp; +#endif ENTER(); @@ -370,6 +388,11 @@ } /* + * Determine whtehter this domain has failback turned on or not.. + */ + nofailback = !!(fod->fd_flags & FOD_NOFAILBACK); + + /* * Determine whether this domain is restricted or not... */ restricted = !!(fod->fd_flags & FOD_RESTRICTED); @@ -379,6 +402,37 @@ */ ordered = !!(fod->fd_flags & FOD_ORDERED); +#ifndef NO_CCS + if(nofailback) { + if (rg_lock(rg_name, &lockp) != 0) { + clulog(LOG_WARNING, "Error getting a lock\n"); + RETURN(FOD_BEST); + } + + if (get_rg_state(rg_name, &svc_state) == FAIL) { + /* + * Couldn't get the service state, thats odd + */ + clulog(LOG_WARNING, "Problem getting state information for " + "%s\n", rg_name); + rg_unlock(rg_name, lockp); + RETURN(FOD_BEST); + } + rg_unlock(rg_name, lockp); + + /* + * Check to see if the service is started and if we are the owner in case of + * restricted+owner+no failback + */ + if (svc_state.rs_state == RG_STATE_STARTED) + started = 1; + if (svc_state.rs_owner == nodeid) + owned_by_node = 1; + if (!memb_online(membership, svc_state.rs_owner)) + no_owner = 1; + } +#endif + switch (node_in_domain(nodename, fod, membership)) { case 0: /* @@ -429,6 +483,17 @@ "lowest-ordered\n", nodeid); #endif if (ordered) { + /* + * If we are ordered we want to see if failback is + * turned on + */ + if (nofailback && started && owned_by_node && !no_owner) { +#ifdef DEBUG + clulog(LOG_DEBUG,"Ordered mode and no " + "failback -> BEST\n"); +#endif + RETURN(FOD_BEST); + } #ifdef DEBUG clulog(LOG_DEBUG,"Ordered mode -> BETTER\n"); #endif @@ -444,6 +509,16 @@ * Node is a member of the domain and is the lowest-ordered, * online member. */ + + if(nofailback && started && !owned_by_node && !no_owner) { +#ifdef DEBUG + clulog(LOG_DEBUG, "Member #%d is the lowest-ordered " + "memeber of the domain, but is not the owner " + "-> BETTER\n", nodeid); +#endif + RETURN(FOD_BETTER); + } + /* In this case, we can ignore 'ordered' */ #ifdef DEBUG clulog(LOG_DEBUG, "Member #%d is the lowest-ordered member " --- cluster/rgmanager/src/daemons/groups.c 2006/01/20 16:27:29 1.8.2.10.6.3 +++ cluster/rgmanager/src/daemons/groups.c 2006/06/16 20:07:46 1.8.2.10.6.4 @@ -28,6 +28,10 @@ #include #include #include +#include + +#define cm_svccount cm_pad[0] /* Theses are uint8_t size */ +#define cm_svcexcl cm_pad[1] static int config_version = 0; @@ -40,6 +44,12 @@ pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER; +struct status_arg { + int fd; + int fast; +}; + + /** See if a given node ID should start a resource, given cluster membership @@ -60,16 +70,21 @@ int -count_resource_groups(uint64_t nodeid, int *excl) +count_resource_groups(cluster_member_list_t *ml) { resource_t *res; char *rgname, *val; - int count = 0, exclusive = 0; + int x; rg_state_t st; void *lockp; + cluster_member_t *mp; - if (excl) - *excl = 0; + for (x = 0; x < ml->cml_count; x++) { + ml->cml_members[x].cm_svccount = 0; + ml->cml_members[x].cm_svcexcl = 0; + } + + pthread_rwlock_rdlock(&resource_lock); list_do(&_resources, res) { if (res->r_rule->rr_root == 0) @@ -77,34 +92,43 @@ rgname = res->r_attrs[0].ra_value; - if (rg_lock(rgname, &lockp) < 0) + if (rg_lock(rgname, &lockp) < 0) { + clulog(LOG_ERR, "#XX: Unable to obtain cluster " + "lock @ %s:%d: %s\n", __FILE__, __LINE__, + strerror(errno)); continue; + } if (get_rg_state(rgname, &st) < 0) { + clulog(LOG_ERR, "#34: Cannot get status " + "for service %s\n", rgname); rg_unlock(rgname, lockp); continue; } + rg_unlock(rgname, lockp); - if (st.rs_owner != nodeid || - (st.rs_state == RG_STATE_STARTED && - st.rs_state == RG_STATE_STARTING)) + if (st.rs_state != RG_STATE_STARTED && + st.rs_state != RG_STATE_STARTING) continue; - if (excl) { - /* Count exclusive resources */ - val = res_attr_value(res, "exclusive"); - exclusive = val && ((!strcmp(val, "yes") || - (atoi(val)>0))); - } + mp = memb_id_to_p(ml, st.rs_owner); + if (!mp) + continue; + + ++mp->cm_svccount; - ++count; - if (exclusive && excl) - ++(*excl); + val = res_attr_value(res, "exclusive"); + if (val && ((!strcmp(val, "yes") || + (atoi(val)>0))) ) { + ++mp->cm_svcexcl; + } } while (!list_done(&_resources, res)); - return count; + pthread_rwlock_unlock(&resource_lock); + + return 0; } @@ -125,7 +149,13 @@ uint64_t highnode = owner, nodeid; char *val; resource_t *res; - int exclusive, count, excl; + int exclusive; + + if (lock) + pthread_rwlock_rdlock(&resource_lock); + count_resource_groups(allowed); + if (lock) + pthread_rwlock_unlock(&resource_lock); for (x=0; x < allowed->cml_count; x++) { if (allowed->cml_members[x].cm_state != STATE_UP) @@ -141,7 +171,8 @@ pthread_rwlock_rdlock(&resource_lock); score = node_should_start(nodeid, allowed, rg_name, &_domains); if (!score) { /* Illegal -- failover domain constraint */ - pthread_rwlock_unlock(&resource_lock); + if (lock) + pthread_rwlock_unlock(&resource_lock); continue; } @@ -153,19 +184,18 @@ val = res_attr_value(res, "exclusive"); exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0))); - count = count_resource_groups(nodeid, &excl); - if (lock) pthread_rwlock_unlock(&resource_lock); if (exclusive) { - if (count > 0) { + + if (allowed->cml_members[x].cm_svccount > 0) { /* Definitely not this guy */ continue; } else { score += 2; } - } else if (excl) { + } else if (allowed->cml_members[x].cm_svcexcl) { /* This guy has an exclusive resource group. Can't relocate / failover to him. */ continue; @@ -192,14 +222,19 @@ cluster_member_list_t *membership) { char *val; - int autostart, exclusive, count = 0, excl = 0; + cluster_member_t *mp; + int autostart, exclusive; + void *lockp; + + mp = memb_id_to_p(membership, my_id()); + assert(mp); /* * Service must be not be running elsewhere to consider for a * local start. */ if (svcStatus->rs_state == RG_STATE_STARTED && - svcStatus->rs_state == my_id()) + svcStatus->rs_owner == mp->cm_id) return; if (svcStatus->rs_state == RG_STATE_DISABLED) @@ -218,6 +253,32 @@ "Skipping RG %s: Autostart disabled\n", svcName); */ + /* + Mark non-autostart services as disabled to avoid + confusion! + */ + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#XX: Unable to obtain cluster " + "lock @ %s:%d: %s\n", __FILE__, __LINE__, + strerror(errno)); + return; + } + + if (get_rg_state(svcName, svcStatus) != 0) { + clulog(LOG_ERR, "#34: Cannot get status " + "for service %s\n", svcName); + rg_unlock(svcName, lockp); + return; + } + + if (svcStatus->rs_transition == 0 && + svcStatus->rs_state == RG_STATE_STOPPED) { + svcStatus->rs_state = RG_STATE_DISABLED; + set_rg_state(svcName, svcStatus); + } + + rg_unlock(svcName, lockp); + return; } } @@ -225,17 +286,10 @@ val = res_attr_value(node->rn_resource, "exclusive"); exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0))); - /* - Count the normal + exclusive resource groups running locally - */ - count = count_resource_groups(my_id(), &excl); - - if (exclusive && count_resource_groups(my_id(), NULL)) { - /* + if (exclusive && mp->cm_svccount) { clulog(LOG_DEBUG, "Skipping RG %s: Exclusive and I am running services\n", svcName); - */ return; } @@ -243,12 +297,10 @@ Don't start other services if I'm running an exclusive service. */ - if (excl) { - /* + if (mp->cm_svcexcl) { clulog(LOG_DEBUG, "Skipping RG %s: I am running an exclusive service\n", svcName); - */ return; } @@ -256,9 +308,10 @@ * Start any stopped services, or started services * that are owned by a down node. */ - if (node_should_start(my_id(), membership, svcName, &_domains) == + if (node_should_start(mp->cm_id, membership, svcName, &_domains) == FOD_BEST) - rt_enqueue_request(svcName, RG_START, -1, 0, my_id(), 0, 0); + rt_enqueue_request(svcName, RG_START, -1, 0, mp->cm_id, + 0, 0); } @@ -267,6 +320,7 @@ cluster_member_list_t *membership) { int a, b; + /* Service must be running locally in order to consider for a relocate @@ -291,7 +345,6 @@ if (a <= b) return; - clulog(LOG_DEBUG, "Relocating group %s to better node %s\n", svcName, memb_id_to_name(membership, nodeid)); @@ -318,12 +371,18 @@ int ret; if (rg_locked()) { - clulog(LOG_NOTICE, "Services locked\n"); + clulog(LOG_NOTICE, + "Resource groups locked; not evaluating\n"); return -EAGAIN; } - membership = member_list(); + membership = member_list(); + pthread_rwlock_rdlock(&resource_lock); + + /* Requires read lock */ + count_resource_groups(membership); + list_do(&_tree, node) { if (node->rn_resource->r_rule->rr_root == 0) @@ -372,7 +431,8 @@ rg_state_str(svcStatus.rs_state), nodeName); - if (local && (nodeStatus == STATE_UP)) { + if ((local && (nodeStatus == STATE_UP)) || + svcStatus.rs_state == RG_STATE_STOPPED) { consider_start(node, svcName, &svcStatus, membership); @@ -401,6 +461,9 @@ pthread_rwlock_unlock(&resource_lock); cml_free(membership); + clulog(LOG_DEBUG, "Event (%d:%d:%d) Processed\n", local, + (int)nodeid, nodeStatus); + return 0; } @@ -513,8 +576,9 @@ @param rgname Resource group name whose state we want to send. @see send_rg_states */ +int get_rg_state_local(char *, rg_state_t *); void -send_rg_state(int fd, char *rgname) +send_rg_state(int fd, char *rgname, int fast) { rg_state_msg_t msg, *msgp = &msg; void *lockp; @@ -523,18 +587,23 @@ msgp->rsm_hdr.gh_length = sizeof(msg); msgp->rsm_hdr.gh_command = RG_STATUS; - if (rg_lock(rgname, &lockp) < 0) - return; - - if (get_rg_state(rgname, &msgp->rsm_state) < 0) { + /* try fast read -- only if it fails and fast is not + specified should we do the full locked read */ + if (get_rg_state_local(rgname, &msgp->rsm_state) != 0 && + !fast) { + if (rg_lock(rgname, &lockp) < 0) + return; + if (get_rg_state(rgname, &msgp->rsm_state) < 0) { + rg_unlock(rgname, lockp); + return; + } rg_unlock(rgname, lockp); - return; } - rg_unlock(rgname, lockp); swab_rg_state_msg_t(msgp); - msg_send(fd, msgp, sizeof(msg)); + if (msg_send(fd, msgp, sizeof(msg)) < 0) + perror("msg_send"); } @@ -545,8 +614,10 @@ static void * status_check_thread(void *arg) { - int fd = *(int *)arg; + int fd = ((struct status_arg *)arg)->fd; + int fast = ((struct status_arg *)arg)->fast; resource_t *res; + generic_msg_hdr hdr; free(arg); @@ -556,12 +627,17 @@ if (res->r_rule->rr_root == 0) continue; - send_rg_state(fd, res->r_attrs[0].ra_value); + send_rg_state(fd, res->r_attrs[0].ra_value, fast); } while (!list_done(&_resources, res)); pthread_rwlock_unlock(&resource_lock); msg_send_simple(fd, RG_SUCCESS, 0, 0); + + /* XXX wait for client to tell us it's done; I don't know why + this is needed when doing fast I/O, but it is. */ + msg_receive_timeout(fd, &hdr, sizeof(hdr), 10); + msg_close(fd); return NULL; @@ -575,26 +651,27 @@ @return 0 */ int -send_rg_states(int fd) +send_rg_states(int fd, int fast) { - int *fdp; + struct status_arg *arg; pthread_t newthread; pthread_attr_t attrs; - fdp = malloc(sizeof(int)); - if (!fdp) { + arg = malloc(sizeof(struct status_arg)); + if (!arg) { msg_send_simple(fd, RG_FAIL, 0, 0); return -1; } - *fdp = fd; + arg->fd = fd; + arg->fast = fast; pthread_attr_init(&attrs); pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); pthread_attr_setstacksize(&attrs, 65535); - pthread_create(&newthread, &attrs, status_check_thread, fdp); + pthread_create(&newthread, &attrs, status_check_thread, arg); pthread_attr_destroy(&attrs); return 0; @@ -631,6 +708,7 @@ { resource_node_t *curr; char *name; + rg_state_t svcblk; pthread_rwlock_rdlock(&resource_lock); list_do(&_tree, curr) { @@ -644,11 +722,27 @@ if (debugfmt) clulog(LOG_DEBUG, debugfmt, name); + /* Optimization: Don't bother even queueing the request + during the exit case if we don't own it */ + if (request == RG_STOP_EXITING) { + if (get_rg_state_local(name, &svcblk) < 0) + continue; + + /* Always run stop if we're the owner, regardless + of state; otherwise, don't run stop */ + if (svcblk.rs_owner != my_id()) + continue; + } + rt_enqueue_request(name, request, -1, 0, NODE_ID_NONE, 0, 0); } while (!list_done(&_tree, curr)); pthread_rwlock_unlock(&resource_lock); + + /* XXX during shutdown, if we're doing a simultaenous shutdown, + this will cause this rgmanager to hang waiting for all the + other rgmanagers to complete. */ if (block) rg_wait_threads(); } @@ -657,13 +751,12 @@ /** Stop changed resources. */ -void -do_status_checks(void) +void * +q_status_checks(void *arg) { resource_node_t *curr; char *name; rg_state_t svcblk; - void *lockp; pthread_rwlock_rdlock(&resource_lock); list_do(&_tree, curr) { @@ -674,30 +767,42 @@ /* Group name */ name = curr->rn_resource->r_attrs->ra_value; - /* If we're not running it, no need to CONDSTOP */ - if (rg_lock(name, &lockp) != 0) - continue; - if (get_rg_state(name, &svcblk) < 0) { - rg_unlock(name, lockp); + /* Local check - no one will make us take a service */ + if (get_rg_state_local(name, &svcblk) < 0) { continue; } - rg_unlock(name, lockp); if (svcblk.rs_owner != my_id() || svcblk.rs_state != RG_STATE_STARTED) continue; - /*clulog(LOG_DEBUG, "Checking status of %s\n", name);*/ - rt_enqueue_request(name, RG_STATUS, -1, 0, NODE_ID_NONE, 0, 0); } while (!list_done(&_tree, curr)); pthread_rwlock_unlock(&resource_lock); - /*rg_wait_threads();*/ + + return NULL; } + +void +do_status_checks(void) +{ + pthread_attr_t attrs; + pthread_t newthread; + + pthread_attr_init(&attrs); + pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&attrs, 65535); + + pthread_create(&newthread, &attrs, q_status_checks, NULL); + pthread_attr_destroy(&attrs); +} + + /** Stop changed resources. */ @@ -708,7 +813,6 @@ char *name; rg_state_t svcblk; int need_kill; - void *lockp; clulog(LOG_INFO, "Stopping changed resources.\n"); @@ -722,13 +826,9 @@ name = curr->rn_resource->r_attrs->ra_value; /* If we're not running it, no need to CONDSTOP */ - if (rg_lock(name, &lockp) != 0) - continue; - if (get_rg_state(name, &svcblk) < 0) { - rg_unlock(name, lockp); + if (get_rg_state_local(name, &svcblk) < 0) { continue; } - rg_unlock(name, lockp); if (svcblk.rs_owner != my_id()) continue; @@ -757,9 +857,9 @@ do_condstarts(void) { resource_node_t *curr; - char *name; + char *name, *val; rg_state_t svcblk; - int need_init, new_groups = 0; + int need_init, new_groups = 0, autostart; void *lockp; clulog(LOG_INFO, "Starting changed resources.\n"); @@ -779,19 +879,13 @@ if (curr->rn_resource->r_flags & RF_NEEDSTART) need_init = 1; - if (rg_lock(name, &lockp) != 0) - continue; - - if (get_rg_state(name, &svcblk) < 0) { - rg_unlock(name, lockp); + if (get_rg_state_local(name, &svcblk) < 0) { continue; } if (!need_init && svcblk.rs_owner != my_id()) { - rg_unlock(name, lockp); continue; } - rg_unlock(name, lockp); if (need_init) { ++new_groups; @@ -842,7 +936,14 @@ } /* Set it up for an auto-start */ - svcblk.rs_state = RG_STATE_STOPPED; + val = res_attr_value(curr->rn_resource, "autostart"); + autostart = !(val && ((!strcmp(val, "no") || + (atoi(val)==0)))); + if (autostart) + svcblk.rs_state = RG_STATE_STOPPED; + else + svcblk.rs_state = RG_STATE_DISABLED; + set_rg_state(name, &svcblk); rg_unlock(name, lockp); --- cluster/rgmanager/src/daemons/main.c 2006/01/24 19:46:59 1.9.2.8.6.6 +++ cluster/rgmanager/src/daemons/main.c 2006/06/16 20:07:46 1.9.2.8.6.7 @@ -34,8 +34,12 @@ #include #include +#define L_SYS (1<<1) +#define L_USER (1<<0) + int configure_logging(int ccsfd); +void node_event_q(int, uint64_t, int); int daemon_init(char *); int init_resource_groups(int); void kill_resource_groups(void); @@ -44,9 +48,10 @@ void graceful_exit(int); void flag_shutdown(int sig); void hard_exit(void); -int send_rg_states(int); +int send_rg_states(int, int); int check_config_update(void); int svc_exists(char *); +int watchdog_init(void); int shutdown_pending = 0, running = 1, need_reconfigure = 0; char debug = 0; /* XXX* */ @@ -134,6 +139,9 @@ void node_event(int local, uint64_t nodeID, int nodeStatus) { + if (!running) + return; + if (local) { /* Local Node Event */ @@ -216,8 +224,15 @@ clulog(LOG_INFO, "State change: %s DOWN\n", node_delta->cml_members[x].cm_name); - node_event(0, node_delta->cml_members[x].cm_id, - STATE_DOWN); + /* Don't bother evaluating anything resource groups are + locked. This is just a performance thing */ + if (!rg_locked()) { + node_event_q(0, node_delta->cml_members[x].cm_id, + STATE_DOWN); + } else { + clulog(LOG_NOTICE, "Not taking action - services" + " locked\n"); + } } /* Free nodes */ @@ -231,7 +246,7 @@ me = memb_online(node_delta, my_id()); if (me) { clulog(LOG_INFO, "State change: Local UP\n"); - node_event(1, my_id(), STATE_UP); + node_event_q(1, my_id(), STATE_UP); } for (x=0; node_delta && x < node_delta->cml_count; x++) { @@ -245,14 +260,69 @@ clulog(LOG_INFO, "State change: %s UP\n", node_delta->cml_members[x].cm_name); - node_event(0, node_delta->cml_members[x].cm_id, - STATE_UP); + node_event_q(0, node_delta->cml_members[x].cm_id, + STATE_UP); } cml_free(node_delta); cml_free(new_ml); - rg_unlockall(); + rg_unlockall(L_SYS); + + return 0; +} + + +int +lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen) +{ + char lockstate; + + if (datalen != 1) { + clulog(LOG_WARNING, "%s: invalid data length!\n", __FUNCTION__); + free(data); + return 0; + } + + lockstate = *(char *)data; + free(data); + + if (lockstate == 0) { + rg_unlockall(L_USER); /* Doing this multiple times + has no effect */ + clulog(LOG_NOTICE, "Resource Groups Unlocked\n"); + return 0; + } + + if (lockstate == 1) { + rg_lockall(L_USER); /* Doing this multiple times + has no effect */ + clulog(LOG_NOTICE, "Resource Groups Locked\n"); + return 0; + } + + clulog(LOG_DEBUG, "Invalid lock state in callback: %d\n", lockstate); + return 0; +} + + +int +do_lockreq(int fd, int req) +{ + int ret; + char state; + cluster_member_list_t *m = member_list(); + + state = (req==RG_LOCK)?1:0; + ret = vf_write(m, VFF_IGN_CONN_ERRORS, "rg_lockdown", &state, 1); + cml_free(m); + + if (ret == 0) { + msg_send_simple(fd, RG_SUCCESS, 0, 0); + } else { + msg_send_simple(fd, RG_FAIL, 0, 0); + } + return 0; } @@ -292,9 +362,38 @@ switch (msg_hdr.gh_command) { case RG_STATUS: clulog(LOG_DEBUG, "Sending service states to fd%d\n",fd); - send_rg_states(fd); + send_rg_states(fd, msg_hdr.gh_arg1); break; + + case RG_LOCK: + msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1); + if (rg_quorate()) { + do_lockreq(fd, RG_LOCK); + } + + msg_close(fd); + break; + + case RG_UNLOCK: + msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1); + if (rg_quorate()) { + do_lockreq(fd, RG_UNLOCK); + } + + msg_close(fd); + break; + + case RG_QUERY_LOCK: + msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1); + if (rg_quorate()) { + ret = (rg_locked() & L_USER) ? RG_LOCK : RG_UNLOCK; + msg_send_simple(fd, ret, 0, 0); + } + msg_close(fd); + break; + + case RG_ACTION_REQUEST: ret = msg_receive_timeout(fd, &msg_sm, sizeof(msg_sm), @@ -308,17 +407,6 @@ /* Decode SmMessageSt message */ swab_SmMessageSt(&msg_sm); - if (rg_locked()) { - msg_sm.sm_data.d_ret = RG_EAGAIN; - /* Encode before responding... */ - swab_SmMessageSt(&msg_sm); - - if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) != - sizeof (SmMessageSt)) - clulog(LOG_ERR, "#40: Error replying to " - "action request.\n"); - } - if (!svc_exists(msg_sm.sm_data.d_svcName)) { msg_sm.sm_data.d_ret = RG_ENOSERVICE; /* No such service! */ @@ -375,18 +463,19 @@ break; case CE_SUSPEND: clulog(LOG_DEBUG, "Suspend Event\n"); - rg_lockall(); + rg_lockall(L_SYS); break; case CE_MEMB_CHANGE: clulog(LOG_DEBUG, "Membership Change Event\n"); - if (rg_quorate()) { - rg_unlockall(); + if (rg_quorate() && running) { + rg_unlockall(L_SYS); membership_update(); } break; case CE_QUORATE: rg_set_quorate(); - rg_unlockall(); + rg_unlockall(L_SYS); + rg_unlockall(L_USER); clulog(LOG_NOTICE, "Quorum Achieved\n"); membership_update(); break; @@ -394,7 +483,7 @@ clulog(LOG_EMERG, "#1: Quorum Dissolved\n"); rg_set_inquorate(); member_list_update(NULL); /* Clear member list */ - rg_lockall(); + rg_lockall(L_SYS); rg_doall(RG_INIT, 1, "Emergency stop of %s"); rg_set_uninitialized(); break; @@ -430,7 +519,7 @@ */ } - while (tv.tv_sec || tv.tv_usec) { + while (running && (tv.tv_sec || tv.tv_usec)) { FD_ZERO(&rfds); max = msg_fill_fdset(&rfds, MSG_LISTEN, RG_PURPOSE); FD_SET(clusterfd, &rfds); @@ -489,8 +578,6 @@ return 0; } - - return 0; } @@ -512,7 +599,7 @@ void hard_exit(void) { - rg_lockall(); + rg_lockall(L_SYS); rg_doall(RG_INIT, 1, "Emergency stop of %s"); vf_shutdown(); exit(1); @@ -522,8 +609,8 @@ void cleanup(int cluster_fd) { - rg_lockall(); - rg_doall(RG_STOP, 1, NULL); + rg_lockall(L_SYS); + rg_doall(RG_STOP_EXITING, 1, NULL); vf_shutdown(); kill_resource_groups(); member_list_update(NULL); @@ -648,8 +735,11 @@ if (foreground) clu_log_console(1); - if (!foreground && (geteuid() == 0)) + if (!foreground && (geteuid() == 0)) { daemon_init(argv[0]); + if (!debug && !watchdog_init()) + clulog(LOG_NOTICE, "Failed to start watchdog\n"); + } /* We need quorum before we can read the configuration data from @@ -723,6 +813,8 @@ return -1; } + vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb); + if (clu_login(cluster_fd, RG_SERVICE_GROUP) == -1) { if (errno != ENOSYS) { clu_log_console(1); @@ -756,7 +848,6 @@ /*malloc_dump_table(); */ /* Only works if alloc.c us used */ /*malloc_stats();*/ - /*malloc_dump_table(1352, 1352);*/ exit(0); } --- cluster/rgmanager/src/daemons/restree.c 2006/01/20 16:27:29 1.10.2.2.6.5 +++ cluster/rgmanager/src/daemons/restree.c 2006/06/16 20:07:46 1.10.2.2.6.6 @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -54,6 +54,7 @@ /* XXX from reslist.c */ void * act_dup(resource_act_t *acts); +time_t get_time(char *action, int depth, resource_node_t *node); const char *res_ops[] = { @@ -512,7 +513,7 @@ node->rn_parent = parent; node->rn_resource = curres; node->rn_state = RES_STOPPED; - node->rn_actions = (resource_act_t *)act_dup(curres->r_actions); + node->rn_actions = (resource_act_t *)act_dup(curres->r_actions); curres->r_refs++; list_insert(tree, node); @@ -862,6 +863,44 @@ void +set_time(char *action, int depth, resource_node_t *node) +{ + time_t now; + int x = 0; + + time(&now); + + for (; node->rn_actions[x].ra_name; x++) { + + if (strcmp(node->rn_actions[x].ra_name, action) || + node->rn_actions[x].ra_depth != depth) + continue; + + node->rn_actions[x].ra_last = now; + break; + } +} + + +time_t +get_time(char *action, int depth, resource_node_t *node) +{ + int x = 0; + + for (; node->rn_actions[x].ra_name; x++) { + + if (strcmp(node->rn_actions[x].ra_name, action) || + node->rn_actions[x].ra_depth != depth) + continue; + + return node->rn_actions[x].ra_last; + } + + return (time_t)0; +} + + +void clear_checks(resource_node_t *node) { time_t now; @@ -899,8 +938,8 @@ @see _res_op_by_level res_exec */ int -_res_op(resource_node_t **tree, resource_t *first, char *type, - void * __attribute__((unused))ret, int realop) +_res_op(resource_node_t **tree, resource_t *first, + char *type, void * __attribute__((unused))ret, int realop) { int rv, me; resource_node_t *node; @@ -965,13 +1004,20 @@ /* Start starts before children */ if (me && (op == RS_START)) { node->rn_flags &= ~RF_NEEDSTART; + rv = res_exec(node, op, 0); - if (rv != 0) + if (rv != 0) { + node->rn_state = RES_FAILED; return rv; + } - time(&node->rn_resource->r_started); + set_time("start", 0, node); clear_checks(node); - ++node->rn_resource->r_incarnations; + + if (node->rn_state != RES_STARTED) { + ++node->rn_resource->r_incarnations; + node->rn_state = RES_STARTED; + } } if (node->rn_child) { @@ -983,13 +1029,18 @@ /* Stop/status/etc stops after children have stopped */ if (me && (op == RS_STOP)) { node->rn_flags &= ~RF_NEEDSTOP; - --node->rn_resource->r_incarnations; rv = res_exec(node, op, 0); if (rv != 0) { - ++node->rn_resource->r_incarnations; + node->rn_state = RES_FAILED; return rv; } + + if (node->rn_state != RES_STOPPED) { + --node->rn_resource->r_incarnations; + node->rn_state = RES_STOPPED; + } + } else if (me && (op == RS_STATUS)) { rv = do_status(node); --- cluster/rgmanager/src/daemons/rg_locks.c 2005/03/02 07:07:01 1.4.2.2 +++ cluster/rgmanager/src/daemons/rg_locks.c 2006/06/16 20:07:46 1.4.2.2.6.1 @@ -167,11 +167,11 @@ int -rg_lockall(void) +rg_lockall(int flag) { pthread_mutex_lock(&locks_mutex); if (!__rg_lock) - __rg_lock = 1; + __rg_lock |= flag; pthread_mutex_unlock(&locks_mutex); return 0; } @@ -189,11 +189,11 @@ int -rg_unlockall(void) +rg_unlockall(int flag) { pthread_mutex_lock(&locks_mutex); if (__rg_lock) - __rg_lock = 0; + __rg_lock &= ~flag; pthread_cond_broadcast(&unlock_cond); pthread_mutex_unlock(&locks_mutex); return 0; @@ -201,21 +201,6 @@ int -rg_wait_unlockall(void) -{ - pthread_mutex_lock(&locks_mutex); - if (!__rg_lock) { - pthread_mutex_unlock(&locks_mutex); - return 0; - } - - pthread_cond_wait(&unlock_cond, &locks_mutex); - pthread_mutex_unlock(&locks_mutex); - return 0; -} - - -int rg_set_quorate(void) { pthread_mutex_lock(&locks_mutex); --- cluster/rgmanager/src/daemons/rg_state.c 2006/02/02 19:00:02 1.4.2.8.6.3 +++ cluster/rgmanager/src/daemons/rg_state.c 2006/06/16 20:07:46 1.4.2.8.6.4 @@ -117,6 +117,7 @@ struct timeval start, now; uint64_t nodeid, *p; int flags; + int conv = 0, err; int block = !(dflt_flags & CLK_NOWAIT); /* Holder not supported for this call */ @@ -128,6 +129,37 @@ gettimeofday(&start, NULL); start.tv_sec += 30; } + + /* Ripped from global.c in magma */ + if (!(dflt_flags & CLK_CONVERT) && + (block || ((dflt_flags & CLK_EX) == 0))) { + /* Acquire NULL lock */ + ret = clu_lock(resource, CLK_NULL, lockpp); + err = errno; + if (ret == 0) { + if ((flags & CLK_EX) == 0) { + /* User only wanted a NULL lock... */ + return 0; + } + /* + Ok, NULL lock was taken, rest of blocking + call should be done using lock conversions. + */ + flags |= CLK_CONVERT; + conv = 1; + } else { + switch(err) { + case EINVAL: + /* Oops, null locks don't work on this + plugin; use normal spam mode */ + break; + default: + errno = err; + return -1; + } + } + } + while (1) { if (block) { gettimeofday(&now, NULL); @@ -144,9 +176,15 @@ } } - ret = clu_lock(resource, flags | CLK_NOWAIT, lockpp); + *lockpp = NULL; - if ((ret != 0) && (errno == EAGAIN) && block) { + /* Take the lock (convert if possible). */ + ret = clu_lock(resource, flags | CLK_NOWAIT | + ((conv && !timed_out) ? CLK_CONVERT : 0), + lockpp); + err = errno; + + if ((ret != 0) && (err == EAGAIN) && block) { if (timed_out) { p = (uint64_t *)*lockpp; if (p) { @@ -175,6 +213,16 @@ break; } + /* Fatal error. If we took an automatic NL lock with the hopes of + converting it, release the lock before returning */ + if (conv == 1 && ret < 0) { + clu_unlock(resource, *lockpp); + *lockpp = NULL; + } + + if (ret < 0) + errno = err; + return ret; } @@ -369,6 +417,46 @@ } +int vf_read_local(char *, uint64_t *, void *, uint32_t *); +int +get_rg_state_local(char *name, rg_state_t *svcblk) +{ + char res[256]; + int ret; + void *data = NULL; + uint32_t datalen = 0; + uint64_t viewno; + + /* ... */ + if (name) + strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name)); + + snprintf(res, sizeof(res),"usrm::rg=\"%s\"", svcblk->rs_name); + ret = vf_read_local(res, &viewno, &data, &datalen); + + if (ret != VFR_OK || datalen == 0 || + datalen != sizeof(*svcblk)) { + if (data) + free(data); + + svcblk->rs_owner = NODE_ID_NONE; + svcblk->rs_last_owner = NODE_ID_NONE; + svcblk->rs_state = RG_STATE_UNINITIALIZED; + svcblk->rs_restarts = 0; + svcblk->rs_transition = 0; + strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name)); + + return FAIL; + } + + /* Copy out the data. */ + memcpy(svcblk, data, sizeof(*svcblk)); + free(data); + + return 0; +} + + /** * Advise service manager as to whether or not to stop a service, given * that we already know it's legal to run the service. @@ -380,6 +468,7 @@ * 1 = STOP service - return whatever it returns. * 2 = DO NOT stop service, return 0 (success) * 3 = DO NOT stop service, return RG_EFORWARD + * 4 = DO NOT stop service, return RG_EAGAIN */ int svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req) @@ -504,6 +593,7 @@ * @return 0 = DO NOT start service, return FAIL * 1 = START service - return whatever it returns. * 2 = DO NOT start service, return 0 + * 3 = DO NOT start service, return RG_EAGAIN */ int svc_advise_start(rg_state_t *svcStatus, char *svcName, int req) @@ -519,10 +609,6 @@ break; case RG_STATE_STOPPING: - clulog(LOG_DEBUG, "RG %s is stopping\n", svcName); - ret = 2; - break; - case RG_STATE_STARTED: case RG_STATE_CHECK: case RG_STATE_STARTING: @@ -548,7 +634,14 @@ break; } + /* We are allowed to do something with the service. Make + sure we're not locked */ if (svcStatus->rs_owner == NODE_ID_NONE) { + if (rg_locked()) { + ret = 3; + break; + } + clulog(LOG_NOTICE, "Starting stopped service%s\n", svcName); @@ -556,6 +649,13 @@ break; } + if (rg_locked()) { + clulog(LOG_WARNING, "Not initiating failover of %s: " + "Resource groups locked!\n", svcName); + ret = 3; + break; + } + /* * Service is running but owner is down -> FAILOVER */ @@ -588,6 +688,12 @@ break; case RG_STATE_STOPPED: + /* Don't actually enable if the RG is locked! */ + if (rg_locked()) { + ret = 3; + break; + } + clulog(LOG_NOTICE, "Starting stopped service %s\n", svcName); ret = 1; @@ -596,6 +702,12 @@ case RG_STATE_DISABLED: case RG_STATE_UNINITIALIZED: if (req == RG_ENABLE) { + /* Don't actually enable if the RG is locked! */ + if (rg_locked()) { + ret = 3; + break; + } + clulog(LOG_NOTICE, "Starting disabled service %s\n", svcName); @@ -656,6 +768,9 @@ case 2: /* Don't start service, return 0 */ rg_unlock(svcName, lockp); return 0; + case 3: + rg_unlock(svcName, lockp); + return RG_EAGAIN; default: break; } @@ -738,14 +853,12 @@ } rg_unlock(svcName, lockp); - if (svcStatus.rs_state == RG_STATE_STARTED && - svcStatus.rs_owner != my_id()) - /* Don't check status for other resource groups */ + if (svcStatus.rs_owner != my_id()) + /* Don't check status for anything not owned */ return SUCCESS; - if (svcStatus.rs_state != RG_STATE_STARTED && - svcStatus.rs_owner == my_id()) - /* Not-running RGs should not be checked yet. */ + if (svcStatus.rs_state != RG_STATE_STARTED) + /* Not-running RGs should not be checked either. */ return SUCCESS; return group_op(svcName, RG_STATUS); @@ -798,6 +911,9 @@ case 3: rg_unlock(svcName, lockp); return RG_EFORWARD; + case 4: + rg_unlock(svcName, lockp); + return RG_EAGAIN; default: break; } @@ -1077,7 +1193,7 @@ handle_relocate_req(char *svcName, int request, uint64_t preferred_target, uint64_t *new_owner) { - cluster_member_list_t *allowed_nodes; + cluster_member_list_t *allowed_nodes, *backup = NULL; uint64_t target = preferred_target, me = my_id(); int ret, x; @@ -1102,19 +1218,23 @@ If we can't start it on the preferred target, then we'll try other nodes. */ + //count_resource_groups(allowed_nodes); + backup = cml_dup(allowed_nodes); + for (x = 0; x < allowed_nodes->cml_count; x++) { if (allowed_nodes->cml_members[x].cm_id == me || - allowed_nodes->cml_members[x].cm_id == preferred_target) + allowed_nodes->cml_members[x].cm_id == + preferred_target) continue; allowed_nodes->cml_members[x].cm_state = STATE_DOWN; } /* - * First, see if it's legal to relocate to the target node. Legal - * means: the node is online and is in the [restricted] failover - * domain of the service, or the service has no failover domain. + * First, see if it's legal to relocate to the target node. + * Legal means: the node is online and is in the + * [restricted] failover domain of the service, or the + * service has no failover domain. */ - target = best_target_node(allowed_nodes, me, svcName, 1); cml_free(allowed_nodes); @@ -1155,7 +1275,12 @@ * Ok, so, we failed to send it to the preferred target node. * Try to start it on all other nodes. */ - allowed_nodes = member_list(); + if (backup) { + allowed_nodes = backup; + } else { + allowed_nodes = member_list(); + //count_resource_groups(allowed_nodes); + } if (preferred_target != NODE_ID_NONE) memb_mark_down(allowed_nodes, preferred_target); @@ -1208,12 +1333,14 @@ * We're done. */ exhausted: - clulog(LOG_WARNING, - "#70: Attempting to restart service %s locally.\n", - svcName); - if (svc_start(svcName, RG_START_RECOVER) == 0) { - *new_owner = me; - return FAIL; + if (!rg_locked()) { + clulog(LOG_WARNING, + "#70: Attempting to restart service %s locally.\n", + svcName); + if (svc_start(svcName, RG_START_RECOVER) == 0) { + *new_owner = me; + return FAIL; + } } if (svc_stop(svcName, RG_STOP) != 0) { @@ -1263,6 +1390,12 @@ */ ret = svc_start(svcName, req); + /* + If services are locked, return the error + */ + if (ret == RG_EAGAIN) + return RG_EAGAIN; + /* * If we succeeded, then we're done. */ --- cluster/rgmanager/src/daemons/rg_thread.c 2006/01/20 16:27:29 1.7.2.3.6.2 +++ cluster/rgmanager/src/daemons/rg_thread.c 2006/06/16 20:07:46 1.7.2.3.6.3 @@ -71,7 +71,7 @@ } -static void +static int wait_initialize(const char *name) { resthread_t *t; @@ -80,15 +80,21 @@ pthread_mutex_lock(&reslist_mutex); t = find_resthread_byname(name); - assert(t); + if (!t) { + pthread_mutex_unlock(&reslist_mutex); + return -1; + } + if (t->rt_status != RG_STATE_UNINITIALIZED) { pthread_mutex_unlock(&reslist_mutex); - return; + return 0; } pthread_mutex_unlock(&reslist_mutex); usleep(50000); } + + assert(0); } @@ -191,7 +197,6 @@ pthread_cond_wait(&my_queue_cond, &my_queue_mutex); pthread_mutex_unlock(&my_queue_mutex); - while(1) { pthread_mutex_lock(&reslist_mutex); pthread_mutex_lock(&my_queue_mutex); @@ -201,7 +206,6 @@ loop with the lock held. */ break; } - pthread_mutex_unlock(&my_queue_mutex); pthread_mutex_unlock(&reslist_mutex); @@ -216,6 +220,8 @@ myself = find_resthread_byname(myname); assert(myself); myself->rt_request = req->rr_request; + if (req->rr_request == RG_STOP_EXITING) + myself->rt_status = RG_STATE_STOPPING; pthread_mutex_unlock(&reslist_mutex); switch(req->rr_request) { @@ -289,6 +295,30 @@ break; + case RG_STOP_EXITING: + /* We're out of here. Don't allow starts anymore */ + error = svc_stop(myname, RG_STOP); + + if (error == 0) { + ret = RG_SUCCESS; + + } else if (error == RG_EFORWARD) { + ret = RG_NONE; + break; + } else { + /* + * Bad news. + */ + ret = RG_FAIL; + } + + pthread_mutex_lock(&my_queue_mutex); + purge_all(&my_queue); + pthread_mutex_unlock(&my_queue_mutex); + + break; + + case RG_DISABLE: /* Disable and user stop requests need to be forwarded; they're user requests */ @@ -454,6 +484,7 @@ int ret; resthread_t *resgroup = NULL; +retry: pthread_mutex_lock(&reslist_mutex); while (resgroup == NULL) { resgroup = find_resthread_byname(resgroupname); @@ -468,10 +499,14 @@ return ret; } + ret = (resgroup->rt_status == RG_STATE_STOPPING); + pthread_mutex_unlock(&reslist_mutex); - wait_initialize(resgroupname); + if (wait_initialize(resgroupname) < 0) { + goto retry; + } - return 0; + return ret; } @@ -521,6 +556,9 @@ resthread_t *resgroup; if (spawn_if_needed(resgroupname) != 0) { + /* Usually, we get here if the thread is killing + stuff. This prevents us from queueing START requests + while we're exiting */ return -1; } /cvs/cluster/cluster/rgmanager/src/resources/svclib_nfslock,v --> standard output revision 1.2.2.1 --- cluster/rgmanager/src/resources/svclib_nfslock +++ - 2006-06-16 20:07:49.459256000 +0000 @@ -0,0 +1,251 @@ +#!/bin/bash +# +# Do reclaim-broadcasts when we kill lockd during shutdown/startup +# of a cluster service. +# +# Exported functions: +# +# notify_list_store +# notify_list_merge +# notify_list_broadcast +# + +# +# Usage: +# statd_notify +# +# Copy out a list from , merge them with the system nfs lock +# list, and send them out as after generating a random +# state (needed so clients will reclaim their locks) +# +nfslock_statd_notify() +{ + declare tmpdir=/tmp/statd-$2.$$ + declare nl_dir=$1 + declare nl_ip=$2 + declare command # Work around bugs in rpc.statd + declare pid_xxx # Work around bugs in rpc.statd + declare owner + + [ -z "$lockd_pid" ] && return 0 + if ! [ -d $nl_dir ]; then + return 0 + fi + + if [ -z "`ls $nl_dir/sm/* 2> /dev/null`" ]; then + ocf_log debug "No hosts to notify" + return 0 + fi + + # Ok, copy the HA directory to something we can use. + rm -rf $tmpdir + mkdir -p $tmpdir/sm + + # Copy in our specified entries + cp -f $nl_dir/sm/* $tmpdir/sm + + # Copy in our global entries + # XXX This might be what we just copied. + + if [ -d "/var/lib/nfs/statd/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -f /var/lib/nfs/statd/sm/* $tmpdir/sm + elif [ -d "/var/lib/nfs/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -f /var/lib/nfs/sm/* $tmpdir/sm + fi + + # + # Generate a random state file. If this ends up being what a client + # already has in its list, that's bad, but the chances of this + # are small - and relocations should be rare. + # + dd if=/dev/urandom of=$tmpdir/state bs=1 count=4 &> /dev/null + + # + # Make sure we set permissions, or statd will not like it. + # + chown -R $owner $tmpdir + + # + # Tell rpc.statd to notify clients. Don't go into background, + # because statd is buggy and won't exit like it's supposed to after + # sending the notifications out. + # + ocf_log info "Sending reclaim notifications via $nl_ip" + command="rpc.statd -NFP $tmpdir -n $nl_ip" + eval $command 2>&1 & + sleep 3 # XXX - the instance of rpc.statd we just spawned is supposed + # to exit after it finishes notifying clients. + # rpc.statd spawned which is still running handles the actual + # new SM_MON requests... we hope 3 seconds is enough time + # to get all the SM_NOTIFY messages out. rpc.statd = bugged + # + # clean up + # + pid_xxx=`ps auwwx | grep "$command" | grep -v grep | awk '{print $2}'` + kill $pid_xxx + rm -rf $tmpdir + + return 0 +} + + +# +# Copy of isSlave from svclib_ip and/or ip.sh +# +nfslock_isSlave() +{ + declare intf=$1 + declare line + + if [ -z "$intf" ]; then + ocf_log err "usage: isSlave " + return 1 + fi + + line=$(/sbin/ip link list dev $intf) + if [ $? -ne 0 ]; then + ocf_log err "$intf not found" + return 1 + fi + + if [ "$line" = "${line/<*SLAVE*>/}" ]; then + return 2 + fi + + # Yes, it is a slave device. Ignore. + return 0 +} + + +# +# Get all the IPs on the system except loopback IPs +# +nfslock_ip_address_list() +{ + declare idx dev family ifaddr + + while read idx dev family ifaddr; do + + if [ "$family" != "inet" ] && [ "$family" != "inet6" ]; then + continue + fi + + if [ "$dev" = "lo" ]; then + # Ignore loopback + continue + fi + + nfslock_isSlave $dev + if [ $? -ne 2 ]; then + continue + fi + + idx=${idx/:/} + + echo $dev $family ${ifaddr/\/*/} ${ifaddr/*\//} + + done < <(/sbin/ip -o addr list | awk '{print $1,$2,$3,$4}') + + return 0 +} + + +# +# Usage: broadcast_notify +# +# Send the contents of out via all IPs on the system. +# +notify_list_broadcast() +{ + declare dev family addr maskbits ip_name + declare lockd_pid=$(pidof lockd) + declare nl_dir=$1 + + while read dev family addr maskbits; do + if [ "$family" != "inet" ]; then + continue + fi + + ip_name=$(clufindhostname -i $addr) + if [ -z "$ip_name" ]; then + nfslock_statd_notify $nl_dir $addr + else + nfslock_statd_notify $nl_dir $ip_name + fi + + done < <(nfslock_ip_address_list) +} + + +# +# Store the lock monitor list from rpc.statd - do this during a teardown +# after the IP addresses of a service have been taken offline. Note that +# this should be done by HA-callout programs, but this feature is not in +# RHEL3. +# +notify_list_store() +{ + declare nl_dir=$1 + declare owner + + mkdir -p $nl_dir/sm + + if [ -d "/var/lib/nfs/statd/sm" ]; then + if [ -z "`ls /var/lib/nfs/statd/sm/* 2> /dev/null`" ]; then + return 1 + # nothing to do! + fi + + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -af /var/lib/nfs/statd/sm/* $nl_dir/sm + chown -R $owner $nl_dir + return 0 + elif [ -d "/var/lib/nfs/sm" ]; then + if [ -z "`ls /var/lib/nfs/sm/* 2> /dev/null`" ]; then + return 1 + # nothing to do! + fi + + owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}') + cp -af /var/lib/nfs/sm/* $nl_dir/sm + chown -R $owner $nl_dir + return 0 + fi + + return 1 +} + + +# +# Merge the contents of /sm with the system-wide list +# Make sure ownership is right, or statd will hiccup. This should not +# actually ever be needed because statd will, upon getting a SM_MON +# request, create all the entries in this list. It's mostly for +# housekeeping for next time we relocate the service. +# +notify_list_merge() +{ + declare nl_dir=$1 + declare owner + + if [ -z "`ls $nl_dir/* 2> /dev/null`" ]; then + return 1 + fi + + if [ -d "/var/lib/nfs/statd/sm" ]; then + owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}') + cp -af $nl_dir/sm/* /var/lib/nfs/statd/sm + chown -R $owner $nl_dir + return 0 + elif [ -d "/var/lib/nfs/sm" ]; then + owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}') + cp -af $nl_dir/sm/* /var/lib/nfs/sm + chown -R $owner $nl_dir + return 0 + fi + + return 1 +} + --- cluster/rgmanager/src/resources/Makefile 2005/12/06 18:37:04 1.4.2.3.6.2 +++ cluster/rgmanager/src/resources/Makefile 2006/06/16 20:07:46 1.4.2.3.6.3 @@ -20,7 +20,7 @@ RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \ script.sh netfs.sh clusterfs.sh smb.sh -TARGETS=${RESOURCES} ocf-shellfuncs +TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock all: --- cluster/rgmanager/src/resources/clusterfs.sh 2005/12/07 20:14:29 1.1.2.3.4.4 +++ cluster/rgmanager/src/resources/clusterfs.sh 2006/06/16 20:07:46 1.1.2.3.4.5 @@ -37,7 +37,16 @@ YES=0 NO=1 YES_STR="yes" -INVALIDATEBUFFERS="/bin/true" + +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +else + unset OCF_RESKEY_nfslock +fi + . $(dirname $0)/ocf-shellfuncs @@ -135,6 +144,18 @@ + + + If set, the node will try to kill lockd and issue + reclaims across all remaining network interface cards. + This happens always, regardless of unmounting failed. + + + Enable NFS lock workarounds + + + + @@ -774,6 +795,23 @@ esac fi + # + # Always do this hackery on clustered file systems. + # + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + ocf_log warning "Dropping node-wide NFS locks" + mkdir -p $mp/.clumanager/statd + # Copy out the notify list; our + # IPs are already torn down + if notify_list_store $mp/.clumanager/statd; then + notify_list_broadcast $mp/.clumanager/statd + fi + fi + + # Always invalidate buffers on clusterfs resources + clubufflush -f $dev + if [ -z "$force_umount" ]; then ocf_log debug "Not umounting $dev (clustered file system)" return $SUCCESS @@ -782,7 +820,6 @@ # # Unmount the device. # - while [ ! "$done" ]; do isMounted $dev $mp case $? in --- cluster/rgmanager/src/resources/fs.sh 2005/12/07 20:14:29 1.4.2.6.4.4 +++ cluster/rgmanager/src/resources/fs.sh 2006/06/16 20:07:46 1.4.2.6.4.5 @@ -39,6 +39,13 @@ YES_STR="yes" INVALIDATEBUFFERS="/bin/true" +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +fi + . $(dirname $0)/ocf-shellfuncs meta_data() @@ -126,7 +133,6 @@ --> - If set and unmounting the file system fails, the node will @@ -139,6 +145,18 @@ + + + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + + + Enable NFS lock workarounds + + + + File system ID for NFS exports. This can be overridden @@ -316,6 +334,7 @@ verify_options() { declare -i ret=$OCF_SUCCESS + declare o # # From mount(8) @@ -762,6 +781,63 @@ } +# +# Enable quotas on the mount point if the user requested them +# +enable_fs_quotas() +{ + declare -i need_check=0 + declare quotaopts="" + declare mopt + declare opts=$1 + declare mp=$2 + + if [ -z "`which quotaon`" ]; then + ocf_log err "quotaon not found in $PATH" + return 1 + fi + + for mopt in `echo $opts | sed -e s/,/\ /g`; do + case $mopt in + usrquota) + quotaopts="u$quotaopts" + continue + ;; + grpquota) + quotaopts="g$quotaopts" + continue + ;; + noquota) + quotaopts="" + return 0 + ;; + esac + done + + [ -z "$quotaopts" ] && return 0 + + # Ok, create quota files if they don't exist + for f in quota.user aquota.user quota.group aquota.group; do + if ! [ -f "$mp/$f" ]; then + ocf_log info "$mp/$f was missing - creating" + touch "$mp/$f" + chmod 600 "$mp/$f" + need_check=1 + fi + done + + if [ $need_check -eq 1 ]; then + ocf_log info "Checking quota info in $mp" + quotacheck -$quotaopts $mp + fi + + ocf_log info "Enabling Quotas on $mp" + ocf_log debug "quotaon -$quotaopts $mp" + quotaon -$quotaopts $mp + + return $? +} + # # startFilesystem @@ -958,6 +1034,18 @@ return $FAIL fi + # + # Create this for the NFS NLM broadcast bit + # + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + mkdir -p $mp/.clumanager/statd + notify_list_merge $mp/.clumanager/statd + fi + fi + + enable_fs_quotas $opts $mp activeMonitor start || return $OCF_ERR_GENERIC return $SUCCESS @@ -1048,6 +1136,7 @@ activeMonitor stop || return $OCF_ERR_GENERIC + quotaoff -gu $mp &> /dev/null umount $mp if [ $? -eq 0 ]; then umount_failed= @@ -1059,6 +1148,22 @@ if [ "$force_umount" ]; then killMountProcesses $mp + if [ $try -eq 1 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + ocf_log warning \ + "Dropping node-wide NFS locks" + pkill -KILL -x lockd + mkdir -p $mp/.clumanager/statd + # Copy out the notify list; our + # IPs are already torn down + if notify_list_store $mp/.clumanager/statd + then + notify_list_broadcast \ + $mp/.clumanager/statd + fi + fi + fi fi if [ $try -ge $max_tries ]; then --- cluster/rgmanager/src/resources/ip.sh 2005/12/07 20:14:29 1.5.2.4.4.9 +++ cluster/rgmanager/src/resources/ip.sh 2006/06/16 20:07:46 1.5.2.4.4.10 @@ -30,6 +30,13 @@ PATH=/bin:/sbin:/usr/bin:/usr/sbin export LC_ALL LANG PATH +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +fi + . $(dirname $0)/ocf-shellfuncs @@ -90,6 +97,19 @@ + + + + If set and unmounting the file system fails, the node will + try to kill lockd and issue reclaims across all remaining + network interface cards. + + + Enable NFS lock workarounds + + + + @@ -865,6 +885,13 @@ fi ip_op ${OCF_RESKEY_family} add ${OCF_RESKEY_address} + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + notify_list_broadcast /var/lib/nfs/statd + fi + fi + exit $? ;; stop) --- cluster/rgmanager/src/resources/nfsclient.sh 2006/01/27 21:06:57 1.3.2.2.6.4 +++ cluster/rgmanager/src/resources/nfsclient.sh 2006/06/16 20:07:46 1.3.2.2.6.5 @@ -95,6 +95,18 @@ + + + This tells us whether the service in question has the + NFS lock workarounds enabled. If so, we always unexport + * rather than the specified client. + + + NFS Lock workaround flag + + + + Defines a list of options for this particular client. See 'man 5 exports' for a list @@ -106,6 +118,19 @@ + + + Allows recovery of this NFS client (default = 1) if it + disappears from the export list. If set to 0, the service + will be restarted. This is useful to help preserve export + ordering. + + + Allow recovery + + + + @@ -282,6 +307,14 @@ stop) verify_all || exit $OCF_ERR_ARGS + if [ "$OCF_RESKEY_nfslock" = "1" ]; then + # + # If the NFS lock workarounds were enabled, unexport from + # the world + # + export OCF_RESKEY_target="*" + fi + ocf_log info "Removing export: ${OCF_RESKEY_target}:${OCF_RESKEY_path}" exportfs -u "${OCF_RESKEY_target}:${OCF_RESKEY_path}" rv=$? @@ -299,9 +332,26 @@ # * Exports longer than 14 chars have line breaks inserted, which # broke the way the status check worked. # - exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \ - "^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target}" - rv=$? + # Status check fix from Craig Lewis: + # * Exports with RegExp metacharacters need to be escaped. + # These metacharacters are: * ? . + # + export OCF_RESKEY_target_regexp=$(echo $OCF_RESKEY_target | \ + sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') + exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \ + "^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}" + rv=$? + ;; + +recover) + if [ "$OCF_RESKEY_allow_recover" = "0" ] || \ + [ "$OCF_RESKEY_allow_recover" = "no" ] || \ + [ "$OCF_RESKEY_allow_recover" = "false" ]; then + exit 1 + fi + + $0 stop || exit 1 + $0 start || exit 1 ;; restart) --- cluster/rgmanager/src/resources/nfsexport.sh 2005/12/07 22:53:28 1.4.2.1.6.3 +++ cluster/rgmanager/src/resources/nfsexport.sh 2006/06/16 20:07:46 1.4.2.1.6.4 @@ -97,6 +97,19 @@ + + + + If you can see this, your GUI is broken. + This inherits an unspecified nfslock parameter so that + it works with fs or clusterfs resources. + + + If you can see this, your GUI is broken. + + + + --- cluster/rgmanager/src/resources/ocf-shellfuncs 2005/10/17 20:53:12 1.2.2.1 +++ cluster/rgmanager/src/resources/ocf-shellfuncs 2006/06/16 20:07:46 1.2.2.2 @@ -1,5 +1,5 @@ # -# $Id: ocf-shellfuncs,v 1.2.2.1 2005/10/17 20:53:12 lhh Exp $ +# $Id: ocf-shellfuncs,v 1.2.2.2 2006/06/16 20:07:46 lhh Exp $ # # Common helper functions for the OCF Resource Agents supplied by # heartbeat. --- cluster/rgmanager/src/resources/service.sh 2005/12/06 18:37:04 1.1.2.1.6.2 +++ cluster/rgmanager/src/resources/service.sh 2006/06/16 20:07:46 1.1.2.1.6.3 @@ -5,7 +5,12 @@ # resources. ;( # - +# Grab nfs lock tricks if available +export NFS_TRICKS=1 +if [ -f "$(dirname $0)/svclib_nfslock" ]; then + . $(dirname $0)/svclib_nfslock + NFS_TRICKS=0 +fi meta_data() { @@ -89,6 +94,22 @@ + + + Enable NFS lock workarounds. When used with a compatible + HA-callout program like clunfslock, this could be used + to provide NFS lock failover, but at significant cost to + other services on the machine. This requires a compatible + version of nfs-utils and manual configuration of rpc.statd; + see 'man rpc.statd' to see if your version supports + the -H parameter. + + + Enable NFS lock workarounds + + + + This currently has three possible options: "restart" tries @@ -144,6 +165,17 @@ # case $1 in start) + # + # XXX If this is set, we kill lockd. If there is no + # child IP address, then clients will NOT get the reclaim + # notification. + # + if [ $NFS_TRICKS -eq 0 ]; then + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ + [ "$OCF_RESKEY_nfslock" = "1" ]; then + pkill -KILL -x lockd + fi + fi exit 0 ;; stop) --- cluster/rgmanager/src/utils/Makefile 2005/10/17 20:30:45 1.3.2.1.6.2 +++ cluster/rgmanager/src/utils/Makefile 2006/06/16 20:07:47 1.3.2.1.6.3 @@ -23,7 +23,7 @@ CFLAGS+= -L${libdir} -DPACKAGE_VERSION=\"${RELEASE}\" LDFLAGS+= -lmagmamsg -lmagma -lpthread -ldl -lncurses -L../clulib -lclulib -lccs -TARGETS=clubufflush clufindhostname clustat clusvcadm clulog +TARGETS=clubufflush clufindhostname clustat clusvcadm clulog clunfslock all: ${TARGETS} @@ -52,6 +52,10 @@ clusvcadm: clusvcadm.o $(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS) +clunfslock: clunfslock.sh + cp clunfslock.sh clunfslock + chmod 755 clunfslock + clean: rm -f *.o $(TARGETS) --- cluster/rgmanager/src/utils/clustat.c 2006/01/20 16:27:30 1.5.2.3.6.7 +++ cluster/rgmanager/src/utils/clustat.c 2006/06/16 20:07:47 1.5.2.3.6.8 @@ -18,6 +18,12 @@ #define FLAG_RGMGR 0x4 #define FLAG_NOCFG 0x8 /* Shouldn't happen */ +#define RG_VERBOSE 0x1 + +#define QSTAT_ONLY 1 +#define VERSION_ONLY 2 +#define NODEID_ONLY 3 + int running = 1; @@ -35,7 +41,7 @@ rg_state_list_t * -rg_state_list(uint64_t local_node_id) +rg_state_list(uint64_t local_node_id, int fast) { int fd, n, x; rg_state_list_t *rsl = NULL; @@ -49,7 +55,7 @@ return NULL; } - msg_send_simple(fd, RG_STATUS, 0, 0); + msg_send_simple(fd, RG_STATUS, fast, 0); rsl = malloc(sizeof(rg_state_list_t)); if (!rsl) { @@ -70,8 +76,10 @@ "from Resource Group Manager\n"); break; } + if (n < 0) { - if (errno == EINTR) + if (errno == EAGAIN || + errno == EINTR) continue; fprintf(stderr, "Failed to receive " "service data: select: %s\n", @@ -80,8 +88,16 @@ } n = msg_receive_simple(fd, &msgp, tv.tv_sec); - if (n < sizeof(generic_msg_hdr)) + if (n < 0) { + if (errno == EAGAIN) + continue; + perror("msg_receive_simple"); + break; + } + if (n < sizeof(generic_msg_hdr)) { + printf("Error: Malformed message\n"); break; + } if (!msgp) { printf("Error: no message?!\n"); @@ -99,6 +115,7 @@ return NULL; } + rsmp = (rg_state_msg_t *)msgp; swab_rg_state_t(&rsmp->rsm_state); @@ -119,6 +136,7 @@ msgp = NULL; } + msg_send_simple(fd, RG_SUCCESS, 0, 0); msg_close(fd); if (!rsl->rgl_count) { @@ -260,8 +278,9 @@ return "unknown"; } + void -txt_rg_state(rg_state_t *rs, cluster_member_list_t *members) +_txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags) { char owner[31]; @@ -286,39 +305,90 @@ void -xml_rg_state(rg_state_t *rs, cluster_member_list_t *members) +_txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags) +{ + printf("Service Name : %s\n", rs->rs_name); + printf(" Current State : %s (%d)\n", + rg_state_str(rs->rs_state), rs->rs_state); + printf(" Owner : %s\n", + my_memb_id_to_name(members, rs->rs_owner)); + printf(" Last Owner : %s\n", + my_memb_id_to_name(members, rs->rs_last_owner)); + printf(" Last Transition : %s\n", + ctime((time_t *)(&rs->rs_transition))); +} + + +void +txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags) { + if (flags & RG_VERBOSE) + _txt_rg_state_v(rs, members, flags); + else + _txt_rg_state(rs, members, flags); +} + + +void +xml_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags) +{ + char time_str[32]; + int x; + + /* Chop off newlines */ + ctime_r((time_t *)&rs->rs_transition, time_str); + for (x = 0; time_str[x]; x++) { + if (time_str[x] < 32) { + time_str[x] = 0; + break; + } + } + printf(" \n", + " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\"" + " last_transition=\"%llu\" last_transition_str=\"%s\"/>\n", rs->rs_name, rs->rs_state, rg_state_str(rs->rs_state), my_memb_id_to_name(members, rs->rs_owner), my_memb_id_to_name(members, rs->rs_last_owner), - rs->rs_restarts); + rs->rs_restarts, + (long long unsigned)rs->rs_transition, + time_str); } void -txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members) +txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members, + char *svcname, int flags) { int x; if (!rgl || !members) return; - printf(" %-20.20s %-30.30s %-14.14s\n", - "Service Name", "Owner (Last)", "State"); - printf(" %-20.20s %-30.30s %-14.14s\n", - "------- ----", "----- ------", "-----"); + if (!(flags & RG_VERBOSE)) { + printf(" %-20.20s %-30.30s %-14.14s\n", + "Service Name", "Owner (Last)", "State"); + printf(" %-20.20s %-30.30s %-14.14s\n", + "------- ----", "----- ------", "-----"); + } else { + printf("Service Information\n" + "------- -----------\n\n"); + } - for (x = 0; x < rgl->rgl_count; x++) - txt_rg_state(&rgl->rgl_states[x], members); + for (x = 0; x < rgl->rgl_count; x++) { + if (svcname && + strcmp(rgl->rgl_states[x].rs_name, svcname)) + continue; + txt_rg_state(&rgl->rgl_states[x], members, flags); + } } void -xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members) +xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members, + char *svcname) { int x; @@ -327,8 +397,12 @@ printf(" \n"); - for (x = 0; x < rgl->rgl_count; x++) - xml_rg_state(&rgl->rgl_states[x], members); + for (x = 0; x < rgl->rgl_count; x++) { + if (svcname && + strcmp(rgl->rgl_states[x].rs_name, svcname)) + continue; + xml_rg_state(&rgl->rgl_states[x], members, 0); + } printf(" \n"); } @@ -408,22 +482,25 @@ void -txt_member_states(cluster_member_list_t *membership) +txt_member_states(cluster_member_list_t *membership, char *name) { int x; printf(" %-40.40s %s\n", "Member Name", "Status"); printf(" %-40.40s %s\n", "------ ----", "------"); - for (x = 0; x < membership->cml_count; x++) + for (x = 0; x < membership->cml_count; x++) { + if (name && strcmp(membership->cml_members[x].cm_name, name)) + continue; txt_member_state(&membership->cml_members[x]); + } printf("\n"); } void -xml_member_states(cluster_member_list_t *membership) +xml_member_states(cluster_member_list_t *membership, char *name) { int x; @@ -431,38 +508,50 @@ return; printf(" \n"); - for (x = 0; x < membership->cml_count; x++) + for (x = 0; x < membership->cml_count; x++) { + if (name && strcmp(membership->cml_members[x].cm_name, name)) + continue; xml_member_state(&membership->cml_members[x]); + } printf(" \n"); } void txt_cluster_status(int qs, cluster_member_list_t *membership, - rg_state_list_t *rgs) + rg_state_list_t *rgs, char *name, char *svcname, + int flags) { - txt_quorum_state(qs); - - if (!membership || !(qs & QF_GROUPMEMBER)) { - printf("Resource Group Manager not running; no service " - "information available.\n\n"); + if (!svcname && !name) { + txt_quorum_state(qs); + if (!membership || !(qs & QF_GROUPMEMBER)) { + printf("Resource Group Manager not running; " + "no service information available.\n\n"); + } } - txt_member_states(membership); - txt_rg_states(rgs, membership); + if (!svcname || (name && svcname)) + txt_member_states(membership, name); + if (!name || (name && svcname)) + txt_rg_states(rgs, membership, svcname, flags); } void xml_cluster_status(int qs, cluster_member_list_t *membership, - rg_state_list_t *rgs) + rg_state_list_t *rgs, char *name, char *svcname, + int flags) { printf("\n"); - printf("\n"); - xml_quorum_state(qs); - xml_member_states(membership); - if (rgs) - xml_rg_states(rgs, membership); + printf("\n"); + + if (!svcname && !name) + xml_quorum_state(qs); + if (!svcname || (name && svcname)) + xml_member_states(membership, name); + if (rgs && + (!name || (name && svcname))) + xml_rg_states(rgs, membership, svcname); printf("\n"); } @@ -545,9 +634,12 @@ " with -x.\n" " -I Display local node ID and exit\n" " -m Display status of and exit\n" -" -s Display statis of and exit\n" +" -s Display status of and exit\n" " -v Display version & cluster plugin and exit\n" " -x Dump information as XML\n" +" -Q Return 0 if quorate, 1 if not (no output)\n" +" -f Enable fast clustat reports\n" +" -l Use long format for services\n" "\n", basename(arg0)); } @@ -559,37 +651,32 @@ cluster_member_list_t *membership; rg_state_list_t *rgs = NULL; uint64_t local_node_id; + int fast = 0; + int runtype = 0; int refresh_sec = 0, errors = 0; - int opt, xml = 0; - char *member_name; - char *rg_name; + int opt, xml = 0, flags = 0; + char *member_name = NULL; + char *rg_name = NULL; - /* Connect & grab all our info */ - fd = clu_connect(RG_SERVICE_GROUP, 0); - if (fd < 0) { - printf("Could not connect to cluster service\n"); - return 1; - } - - while ((opt = getopt(argc, argv, "Is:m:i:xvQh?")) != EOF) { + while ((opt = getopt(argc, argv, "fIls:m:i:xvQh?")) != EOF) { switch(opt) { case 'v': - printf("%s version %s\n", basename(argv[0]), - PACKAGE_VERSION); - printf("Connected via: %s\n", clu_plugin_version()); - goto cleanup; + runtype = VERSION_ONLY; + break; case 'I': - printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32), - (uint32_t)(local_node_id&0xffffffff)); - goto cleanup; + runtype = NODEID_ONLY; + break; case 'i': refresh_sec = atoi(optarg); if (refresh_sec <= 0) refresh_sec = 1; break; + case 'l': + flags |= RG_VERBOSE; + break; case 'm': member_name = optarg; @@ -597,9 +684,8 @@ case 'Q': /* Return to shell: 0 true, 1 false... */ - ret = !(clu_quorum_status(RG_SERVICE_GROUP) & - QF_QUORATE); - goto cleanup; + runtype = QSTAT_ONLY; + break; case 's': rg_name = optarg; @@ -615,6 +701,9 @@ xml = 1; break; + case 'f': + ++fast; + break; case '?': case 'h': usage(argv[0]); @@ -631,6 +720,37 @@ return 1; } + /* Connect & grab all our info */ + fd = clu_connect(RG_SERVICE_GROUP, 0); + + switch(runtype) { + case QSTAT_ONLY: + if (fd < 0) + break; + ret = !(clu_quorum_status(RG_SERVICE_GROUP) & + QF_QUORATE); + goto cleanup; + case VERSION_ONLY: + printf("%s version %s\n", basename(argv[0]), + PACKAGE_VERSION); + if (fd < 0) + break; + printf("Connected via: %s\n", clu_plugin_version()); + goto cleanup; + case NODEID_ONLY: + if (fd < 0) + break; + clu_local_nodeid(NULL, &local_node_id); + printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32), + (uint32_t)(local_node_id&0xffffffff)); + goto cleanup; + } + + if (fd < 0) { + printf("Could not connect to cluster service\n"); + return 1; + } + /* XXX add member/rg single-shot state */ signal(SIGINT, term_handler); signal(SIGTERM, term_handler); @@ -639,7 +759,7 @@ qs = clu_quorum_status(RG_SERVICE_GROUP); membership = build_member_list(&local_node_id); - rgs = rg_state_list(local_node_id); + rgs = rg_state_list(local_node_id, fast); if (refresh_sec) { setupterm((char *) 0, STDOUT_FILENO, (int *) 0); @@ -647,9 +767,11 @@ } if (xml) - xml_cluster_status(qs, membership, rgs); + xml_cluster_status(qs, membership, rgs, member_name, + rg_name,flags); else - txt_cluster_status(qs, membership, rgs); + txt_cluster_status(qs, membership, rgs, member_name, + rg_name,flags); if (membership) cml_free(membership); --- cluster/rgmanager/src/utils/clusvcadm.c 2005/07/28 21:19:51 1.2.2.3.6.3 +++ cluster/rgmanager/src/utils/clusvcadm.c 2006/06/16 20:07:47 1.2.2.3.6.4 @@ -52,11 +52,107 @@ } +int +do_lock_req(int req) +{ + int cfd = -1; + int fd = -1; + int ret = RG_FAIL; + cluster_member_list_t *membership = NULL; + uint64_t me; + generic_msg_hdr hdr; + + fd = clu_connect(RG_SERVICE_GROUP, 0); + if (fd < 0) { + printf("Could not connect to cluster service\n"); + goto out; + } + + membership = clu_member_list(RG_SERVICE_GROUP); + msg_update(membership); + clu_local_nodeid(RG_SERVICE_GROUP, &me); + + fd = msg_open(me, RG_PORT, 0, 5); + if (fd < 0) { + printf("Could not connect to resource group manager\n"); + goto out; + } + + if (msg_send_simple(fd, req, 0, 0) < 0) { + printf("Communication failed\n"); + goto out; + } + + if (msg_receive_timeout(fd, &hdr, sizeof(hdr), 5) < sizeof(hdr)) { + printf("Receive failed\n"); + goto out; + } + + swab_generic_msg_hdr(&hdr); + ret = hdr.gh_command; + +out: + if (membership) + cml_free(membership); + + if (fd >= 0) + msg_close(fd); + + if (cfd >= 0) + clu_disconnect(cfd); + + return ret; +} + + +int +do_lock(void) +{ + if (do_lock_req(RG_LOCK) != RG_SUCCESS) { + printf("Lock operation failed\n"); + return 1; + } + printf("Resource groups locked\n"); + return 0; +} + + +int +do_unlock(void) +{ + if (do_lock_req(RG_UNLOCK) != RG_SUCCESS) { + printf("Unlock operation failed\n"); + return 1; + } + printf("Resource groups unlocked\n"); + return 0; +} + + +int +do_query_lock(void) +{ + switch(do_lock_req(RG_QUERY_LOCK)) { + case RG_LOCK: + printf("Resource groups locked\n"); + break; + case RG_UNLOCK: + printf("Resource groups unlocked\n"); + break; + default: + printf("Query operation failed\n"); + return 1; + } + return 0; +} + void usage(char *name) { -printf("usage: %s -d Disable \n", name); +printf("Resource Group Control Commands:\n"); +printf(" %s -v Display version and exit\n",name); +printf(" %s -d Disable \n", name); printf(" %s -e Enable \n", name); printf(" %s -e -m Enable " @@ -67,7 +163,16 @@ printf(" %s -R Restart a group in place.\n", name); printf(" %s -s Stop \n", name); -printf(" %s -v Display version and exit\n",name); +printf("\n"); +printf("Resource Group Locking (for cluster Shutdown / Debugging):\n"); +printf(" %s -l Lock local resource group manager.\n" + " This prevents resource groups from\n" + " starting on the local node.\n", + name); +printf(" %s -S Show lock state\n", name); +printf(" %s -u Unlock local resource group manager.\n" + " This allows resource groups to start\n" + " on the local node.\n", name); } @@ -90,8 +195,17 @@ return 1; } - while ((opt = getopt(argc, argv, "e:d:r:n:m:vR:s:S:qh?")) != EOF) { + while ((opt = getopt(argc, argv, "lSue:d:r:n:m:vR:s:qh?")) != EOF) { switch (opt) { + case 'l': + return do_lock(); + + case 'S': + return do_query_lock(); + + case 'u': + return do_unlock(); + case 'e': /* ENABLE */ actionstr = "trying to enable";