From mboxrd@z Thu Jan  1 00:00:00 1970
From: lhh@sourceware.org <lhh@sourceware.org>
Date: 16 Jun 2006 20:07:52 -0000
Subject: [Cluster-devel] cluster/rgmanager ChangeLog README include/lis ...
Message-ID: <20060616200752.6018.qmail@sourceware.org>
List-Id: <cluster-devel.redhat.com>
To: cluster-devel.redhat.com
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	STABLE
Changes by:	lhh at sourceware.org	2006-06-16 20:07:47

Modified files:
	rgmanager      : ChangeLog README 
	rgmanager/include: list.h resgroup.h rg_locks.h 
	rgmanager/init.d: rgmanager 
	rgmanager/man  : clusvcadm.8 
	rgmanager/src/clulib: clulog.c msgsimple.c vft.c 
	rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c 
	                       restree.c rg_locks.c rg_state.c 
	                       rg_thread.c 
	rgmanager/src/resources: Makefile clusterfs.sh fs.sh ip.sh 
	                         nfsclient.sh nfsexport.sh 
	                         ocf-shellfuncs service.sh 
	rgmanager/src/utils: Makefile clustat.c clusvcadm.c 
Added files:
	rgmanager/src/daemons: nodeevent.c watchdog.c 
	rgmanager/src/resources: svclib_nfslock 

Log message:
	Merge from RHEL4 branch

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.13&r2=1.5.2.13.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/README.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2&r2=1.2.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/list.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.5.6.1&r2=1.3.2.5.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1&r2=1.1.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1&r2=1.3.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/clusvcadm.8.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1&r2=1.1.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.6.1&r2=1.2.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msgsimple.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4&r2=1.4.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/watchdog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.6.2.2.6.1&r2=1.6.2.2.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.2&r2=1.5.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.8.2.10.6.3&r2=1.8.2.10.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.8.6.6&r2=1.9.2.8.6.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.2.6.5&r2=1.10.2.2.6.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_locks.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.2&r2=1.4.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.8.6.3&r2=1.4.2.8.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/svclib_nfslock.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.3.6.2&r2=1.4.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.4.4&r2=1.1.2.3.4.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.6.4.4&r2=1.4.2.6.4.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ip.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.4.4.9&r2=1.5.2.4.4.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.4&r2=1.3.2.2.6.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsexport.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.1.6.3&r2=1.4.2.1.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1.6.2&r2=1.1.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.2&r2=1.3.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.3.6.7&r2=1.5.2.3.6.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.3.6.3&r2=1.2.2.3.6.4

--- cluster/rgmanager/ChangeLog	2005/03/21 22:01:30	1.5.2.13
+++ cluster/rgmanager/ChangeLog	2006/06/16 20:07:45	1.5.2.13.6.1
@@ -1,3 +1,87 @@
+2006-06-16 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/fo_domain.c, groups.c: Get rid of compiler warnings
+	* src/daemons/rg_state.c: Change clu_lock_verbose to use the NULL
+	lock/convert mechanism offered by DLM to work around #193128
+	* src/resources/fs.sh, clusterfs.sh, nfsexport.sh, nfsclient.sh,
+	service.sh, svclib_nfslock: Finish up initial NFS workaround.
+
+2006-05-23 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/members.c: Zap pad fields on copy-out
+	* src/daemons/main.c: Give notice if skipping an event because of
+	locked services.  Call the self-watchdog init function
+	* src/daemons/watchdog.c: Add Stanko Kupcevic's self-watchdog from
+	CVS head (fixes #193247)
+	* src/daemons/groups.c: Add debug messages.  Actually count
+	resgroups during node transition handling
+	* src/daemons/rg_state.c: allow failover of stopping services if
+	the owner died (#193255)
+	* src/utils/clustat.c: fix typo, misc. usability problems (#192999)
+
+2006-05-16 Lon Hohberger <lhh@redhat.com>
+	* src/resources/nfsclient.sh: Fix 189218 - nfsclient not matching
+	wildcards correctly when checking status.  Allow disabling of
+	recovery for services where the nfs clients are ordered (this will
+	cause a full service restart, but works)
+	* src/resources/clusterfs.sh, fs.sh, svclib_nfslock, service.sh:
+	Implement rudimentary atomic bomb-style NFS lock reclaim handling
+	Needs compatible and correctly configured version of nfs-utils 
+	installed and running on the system.  For clusterfs.sh, ensure
+	that we flush buffers during service tear-down - regardless of
+	whether or not we unmount the file system.
+	* src/utils/clunfslock.sh: HA-callout program (/usr/sbin/clunfslock)
+	for use with the rpc.statd -H parameter.  Copies the client to all
+	cluster-managed mounted file systems so that it will get lock
+	reclaim notification on failover.
+
+2006-05-09 Lon Hohberger <lhh@redhat.com>
+	* include/list.h: Prevent dereferencing curr if it's null for some
+	reason
+	* include/resgroup.h: Clean up alignment, add rgmanager lock/unlock
+	message types
+	* src/daemons/Makefile: Add nodeevent.o to the build for rgmanager
+	* src/clulib/msgsimple.c: Misc code path cleanups
+	* src/clulib/vft.c: Add local reads for fast clustat operation.
+	* src/daemons/groups.c: Count all resource groups for all nodes 
+	in one pass, rather than one node per pass.  Split queueing of
+	status checks off so we never block the main thread.  Mark services
+	which have autostart=0 in the config as "disabled" to help remove
+	confusion between "disabled", "stopped", and the no-longer-needed
+	"stopped but behave like disabled" states. bz #182454 /
+	#190234 / #190408	
+	* src/daemons/fo_domain.c: Add patch from Josef Whiter to 
+	implement no-failback option for a given FO domain - bz #189841
+	* src/daemons/main.c: Queue node events for another thread to
+	handle, so we never block the main thread.  Also, implement 
+	cluster-wide service lock/unlock feature from clumanager 1.2.x
+	- bz #175010
+	* src/daemons/nodeevent.c: Split out node event queueing / handling
+	in to a separate thread so the main thread does not block
+	* src/daemons/rg_state.c: Return error codes if resource groups
+	are locked.
+	* src/daemons/rg_thread.c: Fix assertion failure causing segfault
+	in extremely rare cases.  Quash the rg queue during shutdown.
+	- bz #181539
+	* src/daemons/rg_state.c: Add fast local service state query to
+	reduce unnecessary lock contention
+	* src/daemons/groups.c: Handle request for expedited information
+	from clustat.
+	* src/daemons/main.c: Pass arg1 to send_rg_states() to enable fast
+	clustat operation.
+	* src/resources/fs.sh: Implement user/group quota support if 
+	enabled in the file system options
+	* src/utils/clustat.c: Misc. error handling.  Add single service / 
+	member output and add -Q to the help information.  #185952.
+	Added -f flag.
+	* src/utils/clusvcadm.c: Implement client-side of #175010
+	* src/utils/clustat.c: show transition time in clustat -x
+	- bz #191398
+	* src/resources/fs.sh: enable user/group quotas if enabled in the
+	options attribute - bz #191182
+	* init.d/rgmanager: fix typo - bz #191205
+
+
+-------------
+
 2005-03-21 Lon Hohberger <lhh@redhat.com>
 	* init.d/rgmanager, Makefile: Fix up init script and add Makefile
 	so that the init script is properly installed #142754
--- cluster/rgmanager/README	2004/08/30 17:49:10	1.2
+++ cluster/rgmanager/README	2006/06/16 20:07:45	1.2.8.1
@@ -1,7 +1,3 @@
-WARNING
-
-This code is not ready for production use.
-
 This is a clustered resource group manager layered on top of Magma, a
 single API which can talk to multiple cluster infrastructures via their
 native APIs.  This resource manager requires both magma and one or more
--- cluster/rgmanager/include/list.h	2005/02/28 23:13:49	1.2.2.1
+++ cluster/rgmanager/include/list.h	2006/06/16 20:07:46	1.2.2.1.6.1
@@ -50,7 +50,7 @@
 	if (*list && (curr = *list)) do
 
 #define list_done(list, curr) \
-	(((curr = (void *)le(curr)->le_next)) && (curr == *list))
+	(curr && (((curr = (void *)le(curr)->le_next)) && (curr == *list)))
 
 /*
    list_do(list, node) {
--- cluster/rgmanager/include/resgroup.h	2005/07/28 21:19:02	1.3.2.5.6.1
+++ cluster/rgmanager/include/resgroup.h	2006/06/16 20:07:46	1.3.2.5.6.2
@@ -55,25 +55,29 @@
 
 #define RG_ACTION_REQUEST	/* Message header */ 0x138582
 
-#define RG_SUCCESS 0
-#define RG_FAIL    1
-#define RG_START    2
-#define RG_STOP     3
-#define RG_STATUS   4
-#define RG_DISABLE  5
-#define RG_STOP_RECOVER 6
-#define RG_START_RECOVER 7
-#define RG_RESTART  8
-#define RG_EXITING  9 
-#define RG_INIT    10
-#define RG_ENABLE  11
-#define RG_STATUS_INQUIRY  12
-#define RG_RELOCATE 13
-#define RG_CONDSTOP 14
-#define RG_CONDSTART 15
-#define RG_START_REMOTE 16	/* Part of a relocate */
-#define RG_STOP_USER 17		/* User-stop request */
-#define RG_NONE     999
+#define RG_SUCCESS	  0
+#define RG_FAIL		  1
+#define RG_START	  2
+#define RG_STOP		  3
+#define RG_STATUS	  4
+#define RG_DISABLE	  5
+#define RG_STOP_RECOVER	  6
+#define RG_START_RECOVER  7
+#define RG_RESTART	  8
+#define RG_EXITING	  9 
+#define RG_INIT		  10
+#define RG_ENABLE	  11
+#define RG_STATUS_INQUIRY 12
+#define RG_RELOCATE	  13
+#define RG_CONDSTOP	  14
+#define RG_CONDSTART	  15
+#define RG_START_REMOTE   16	/* Part of a relocate */
+#define RG_STOP_USER	  17	/* User-stop request */
+#define RG_STOP_EXITING	  18	/* Exiting. */
+#define RG_LOCK		  19
+#define RG_UNLOCK	  20
+#define RG_QUERY_LOCK	  21
+#define RG_NONE		  999
 
 extern const char *rg_req_strings[];
 
@@ -181,7 +185,7 @@
  */
 #define FOD_ORDERED		(1<<0)
 #define FOD_RESTRICTED		(1<<1)
-
+#define FOD_NOFAILBACK		(1<<2)
 
 //#define DEBUG
 #ifdef DEBUG
--- cluster/rgmanager/include/rg_locks.h	2004/08/13 15:36:50	1.1
+++ cluster/rgmanager/include/rg_locks.h	2006/06/16 20:07:46	1.1.8.1
@@ -4,9 +4,8 @@
 int rg_running(void);
 
 int rg_locked(void);
-int rg_lockall(void);
-int rg_unlockall(void);
-int rg_wait_unlockall(void);
+int rg_lockall(int flag);
+int rg_unlockall(int flag);
 
 int rg_quorate(void);
 int rg_set_quorate(void);
--- cluster/rgmanager/init.d/rgmanager	2005/03/21 21:02:58	1.3.2.1
+++ cluster/rgmanager/init.d/rgmanager	2006/06/16 20:07:46	1.3.2.1.6.1
@@ -16,7 +16,7 @@
 # Grab the network config file
 . /etc/sysconfig/network
 
-# Grap cluster start config if it exists
+# Grab cluster start config if it exists
 [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster
 
 PATH=/sbin:/bin:/usr/sbin:/usr/bin
--- cluster/rgmanager/man/clusvcadm.8	2005/01/18 22:35:35	1.1.2.1
+++ cluster/rgmanager/man/clusvcadm.8	2006/06/16 20:07:46	1.1.2.1.6.1
@@ -46,9 +46,9 @@
 .I
 service
 .IP \-l
-Lock the cluster's service managers.  This should only be used if the 
+Lock the local resource group manager.  This should only be used if the 
 administrator intends to perform a global, cluster-wide shutdown.  This
-prevents ALL service operations on ALL currently running members, thus,
+prevents starting resource groups on the local node, allowing 
 services will not fail over during the shutdown of the cluster.  Generally,
 administrators should use the
 .B
--- cluster/rgmanager/src/clulib/clulog.c	2005/12/06 18:43:45	1.2.2.1.6.1
+++ cluster/rgmanager/src/clulib/clulog.c	2006/06/16 20:07:46	1.2.2.1.6.2
@@ -20,7 +20,7 @@
 /** @file
  * Library routines for communicating with the logging daemon.
  *
- *  $Id: clulog.c,v 1.2.2.1.6.1 2005/12/06 18:43:45 lhh Exp $
+ *  $Id: clulog.c,v 1.2.2.1.6.2 2006/06/16 20:07:46 lhh Exp $
  *
  *  Author: Jeff Moyer <moyer@missioncriticallinux.com>
  */
@@ -50,7 +50,7 @@
 #include <string.h>
 
 
-static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.1 $";
+static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.2 $";
 
 #ifdef DEBUG
 #include <assert.h>
--- cluster/rgmanager/src/clulib/msgsimple.c	2004/11/11 19:46:18	1.4
+++ cluster/rgmanager/src/clulib/msgsimple.c	2006/06/16 20:07:46	1.4.8.1
@@ -83,15 +83,19 @@
 	/*
 	 * Peek at the header.  We need the size of the inbound buffer!
 	 */
+	errno = EAGAIN;
 	ret = msg_peek(fd, &peek_msg, sizeof (generic_msg_hdr));
 	if (ret != sizeof (generic_msg_hdr)) {
 		if (ret == -1) {
 			if (errno != ECONNRESET)
 				fprintf(stderr, "fd%d peek: %s\n", fd,
 					strerror(errno));
+			//perror("msg_peek");
 		} else if (ret != 0)	/* Blank message = probably closed socket */
 			fprintf(stderr, "fd%d peek: %d/%d bytes\n", fd,
 			       ret, (int)sizeof (generic_msg_hdr));
+		else if (ret == 0)
+			errno = EAGAIN;
 		return -1;
 	}
 
--- cluster/rgmanager/src/clulib/vft.c	2006/01/20 16:27:29	1.7.2.3.6.2
+++ cluster/rgmanager/src/clulib/vft.c	2006/06/16 20:07:46	1.7.2.3.6.3
@@ -1598,6 +1598,47 @@
 }
 
 
+int
+vf_read_local(char *keyid, uint64_t *view, void **data, uint32_t *datalen)
+{
+	key_node_t *key_node = NULL;
+
+	pthread_mutex_lock(&vf_mutex);
+	pthread_mutex_lock(&key_list_mutex);
+
+	key_node = kn_find_key(keyid);
+	if (!key_node) {
+		pthread_mutex_unlock(&key_list_mutex);
+		pthread_mutex_unlock(&vf_mutex);
+		printf("no key for %s\n", keyid);
+		return VFR_NODATA;
+	}
+
+	if (!key_node->kn_data || !key_node->kn_datalen) {
+		pthread_mutex_unlock(&key_list_mutex);
+		pthread_mutex_unlock(&vf_mutex);
+		return VFR_NODATA;
+	}
+
+	*data = malloc(key_node->kn_datalen);
+	if (! *data) {
+		pthread_mutex_unlock(&key_list_mutex);
+		pthread_mutex_unlock(&vf_mutex);
+		printf("Couldn't malloc %s\n", keyid);
+		return VFR_ERROR;
+	}
+
+	memcpy(*data, key_node->kn_data, key_node->kn_datalen);
+	*datalen = key_node->kn_datalen;
+	*view = key_node->kn_viewno;
+
+	pthread_mutex_unlock(&key_list_mutex);
+	pthread_mutex_unlock(&vf_mutex);
+
+	return VFR_OK;
+}
+
+
 static int
 vf_send_current(int fd, char *keyid)
 {
/cvs/cluster/cluster/rgmanager/src/daemons/nodeevent.c,v  -->  standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/nodeevent.c
+++ -	2006-06-16 20:07:48.351799000 +0000
@@ -0,0 +1,103 @@
+/*
+  Copyright Red Hat, Inc. 2006
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#include <resgroup.h>
+#include <rg_locks.h>
+#include <gettid.h>
+#include <assert.h>
+
+typedef struct __ne_q {
+	list_head();
+	int ne_local;
+	uint64_t ne_nodeid;
+	int ne_state;
+} nevent_t;
+
+int node_event(int, uint64_t, int);
+
+/**
+ * Node event queue.
+ */
+static nevent_t *event_queue = NULL;
+static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t ne_thread = 0;
+int ne_queue_request(int local, uint64_t nodeid, int state);
+
+
+void *
+node_event_thread(void *arg)
+{
+	nevent_t *ev;
+
+	while (1) {
+		pthread_mutex_lock(&ne_queue_mutex);
+		ev = event_queue;
+		if (ev)
+			list_remove(&event_queue, ev);
+		else
+			break; /* We're outta here */
+		pthread_mutex_unlock(&ne_queue_mutex);
+
+		node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state);
+
+		free(ev);
+	}
+
+	/* Mutex held */
+	ne_thread = 0;
+	rg_dec_threads();
+	pthread_mutex_unlock(&ne_queue_mutex);
+	return NULL;
+}
+
+
+void
+node_event_q(int local, uint64_t nodeID, int state)
+{
+	nevent_t *ev;
+	pthread_attr_t attrs;
+
+	while (1) {
+		ev = malloc(sizeof(nevent_t));
+		if (ev) {
+			break;
+		}
+		sleep(1);
+	}
+
+	memset(ev,0,sizeof(*ev));
+
+	ev->ne_state = state;
+	ev->ne_local = local;
+	ev->ne_nodeid = nodeID;
+
+	pthread_mutex_lock (&ne_queue_mutex);
+	list_insert(&event_queue, ev);
+	if (ne_thread == 0) {
+        	pthread_attr_init(&attrs);
+        	pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+		pthread_attr_setstacksize(&attrs, 262144);
+
+		pthread_create(&ne_thread, &attrs, node_event_thread, NULL);
+        	pthread_attr_destroy(&attrs);
+
+		rg_inc_threads();
+	}
+	pthread_mutex_unlock (&ne_queue_mutex);
+}
/cvs/cluster/cluster/rgmanager/src/daemons/watchdog.c,v  -->  standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/watchdog.c
+++ -	2006-06-16 20:07:48.438384000 +0000
@@ -0,0 +1,97 @@
+/*
+  Copyright Red Hat, Inc. 2005-2006
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/reboot.h>
+#include <stdlib.h>
+
+#include <signals.h>
+#include <clulog.h>
+
+static pid_t child = 0;
+
+static void 
+signal_handler(int signum)
+{
+        kill(child, signum);
+}
+static void 
+redirect_signals(void)
+{
+        int i;
+        for (i = 0; i < _NSIG; i++) {
+	        switch (i) {
+		case SIGCHLD:
+		case SIGILL:
+		case SIGFPE:
+		case SIGSEGV:
+		case SIGBUS:
+		        setup_signal(i, SIG_DFL);
+			break;
+		default:
+		        setup_signal(i, signal_handler);
+		}
+	}
+}
+
+/**
+ return watchdog's pid, or 0 on failure
+*/
+int 
+watchdog_init(void)
+{
+	int status;
+	pid_t parent;
+	
+	parent = getpid();
+	child = fork();
+	if (child < 0)
+	        return 0;
+	else if (!child)
+		return parent;
+	
+	redirect_signals();
+	
+	while (1) {
+	        if (waitpid(child, &status, 0) <= 0)
+		        continue;
+		
+		if (WIFEXITED(status))
+		        exit(WEXITSTATUS(status));
+		
+		if (WIFSIGNALED(status)) {
+		        if (WTERMSIG(status) == SIGKILL) {
+				clulog(LOG_CRIT, "Watchdog: Daemon killed, exiting\n");
+				raise(SIGKILL);
+				while(1) ;
+			}
+			else {
+#ifdef DEBUG
+			        clulog(LOG_CRIT, "Watchdog: Daemon died, but not rebooting because DEBUG is set\n");
+#else
+				clulog(LOG_CRIT, "Watchdog: Daemon died, rebooting...\n");
+				sync();
+			        reboot(RB_AUTOBOOT);
+#endif
+				exit(255);
+			}
+		}
+	}
+}
--- cluster/rgmanager/src/daemons/Makefile	2005/10/17 20:30:45	1.6.2.2.6.1
+++ cluster/rgmanager/src/daemons/Makefile	2006/06/16 20:07:46	1.6.2.2.6.2
@@ -40,7 +40,8 @@
 
 clurgmgrd: rg_thread.o rg_locks.o main.o groups.o rg_state.o \
 		rg_queue.o members.o rg_forward.o reslist.o \
-		resrules.o restree.o fo_domain.o
+		resrules.o restree.o fo_domain.o nodeevent.o \
+		watchdog.o
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs
 
 #
--- cluster/rgmanager/src/daemons/fo_domain.c	2005/01/25 20:05:44	1.5.2.2
+++ cluster/rgmanager/src/daemons/fo_domain.c	2006/06/16 20:07:46	1.5.2.2.6.1
@@ -19,6 +19,9 @@
 /** @file
  * Fail-over Domain & Preferred Node Ordering Driver.  Ripped right from
  * the clumanager 1.2 code base.
+ *
+ * April 2006 - Nofailback option added to restrict failover behavior in ordered
+ *		+ restricted failover domains by Josef Whiter
  */
 #include <string.h>
 #include <list.h>
@@ -153,6 +156,13 @@
 		free(ret);
 	}
 
+	snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]/@nofailback",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (atoi(ret) != 0)
+			fod->fd_flags |= FOD_NOFAILBACK;
+		free(ret);
+	}
 
 	snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]",
 		 base, idx);
@@ -226,7 +236,9 @@
 			if (fod->fd_flags & FOD_ORDERED)
 				printf("Ordered ");
 			if (fod->fd_flags & FOD_RESTRICTED)
-				printf("Restricted");
+				printf("Restricted ");
+			if (fod->fd_flags & FOD_NOFAILBACK)
+				printf("No Failback");
 			printf("\n");
 		}
 
@@ -316,8 +328,14 @@
 	char domainname[128];
 	int ordered = 0;
 	int restricted = 0;
+	int nofailback = 0;
 	fod_t *fod = NULL;
 	int found = 0;
+	int owned_by_node = 0, started = 0, no_owner = 0;
+#ifndef NO_CCS
+	rg_state_t svc_state;
+	void *lockp;
+#endif
 
 	ENTER();
 
@@ -370,6 +388,11 @@
 	}
 
 	/*
+	 * Determine whtehter this domain has failback turned on or not..
+	 */
+	nofailback = !!(fod->fd_flags & FOD_NOFAILBACK);
+
+	/*
 	 * Determine whether this domain is restricted or not...
 	 */
 	restricted = !!(fod->fd_flags & FOD_RESTRICTED);
@@ -379,6 +402,37 @@
 	 */
 	ordered = !!(fod->fd_flags & FOD_ORDERED);
 
+#ifndef NO_CCS
+	if(nofailback) {
+		if (rg_lock(rg_name, &lockp) != 0) {
+			clulog(LOG_WARNING, "Error getting a lock\n");
+			RETURN(FOD_BEST);
+		}
+                
+		if (get_rg_state(rg_name, &svc_state) == FAIL) {
+                	/*
+			 * Couldn't get the service state, thats odd
+			 */
+			clulog(LOG_WARNING, "Problem getting state information for "
+			       "%s\n", rg_name);
+			rg_unlock(rg_name, lockp);
+			RETURN(FOD_BEST);
+		}
+		rg_unlock(rg_name, lockp);
+
+		/*
+		 * Check to see if the service is started and if we are the owner in case of
+		 * restricted+owner+no failback
+		 */
+		if (svc_state.rs_state == RG_STATE_STARTED)
+			started = 1;
+		if (svc_state.rs_owner == nodeid)
+			owned_by_node = 1;
+		if (!memb_online(membership, svc_state.rs_owner))
+			no_owner = 1;
+	}
+#endif
+
 	switch (node_in_domain(nodename, fod, membership)) {
 	case 0:
 		/*
@@ -429,6 +483,17 @@
 		       "lowest-ordered\n", nodeid);
 #endif
 		if (ordered) {
+			/*
+			 * If we are ordered we want to see if failback is
+			 * turned on
+			 */
+			if (nofailback && started && owned_by_node && !no_owner) {
+#ifdef DEBUG
+				clulog(LOG_DEBUG,"Ordered mode and no "
+				       "failback -> BEST\n");
+#endif
+				RETURN(FOD_BEST);
+			}
 #ifdef DEBUG
 			clulog(LOG_DEBUG,"Ordered mode -> BETTER\n");
 #endif
@@ -444,6 +509,16 @@
 		 * Node is a member of the domain and is the lowest-ordered,
 		 * online member.
 		 */
+
+		if(nofailback && started && !owned_by_node && !no_owner) {
+#ifdef DEBUG
+			clulog(LOG_DEBUG, "Member #%d is the lowest-ordered "
+			       "memeber of the domain, but is not the owner "
+			       "-> BETTER\n", nodeid);
+#endif
+			RETURN(FOD_BETTER);
+		}
+ 
 		/* In this case, we can ignore 'ordered' */
 #ifdef DEBUG
 		clulog(LOG_DEBUG, "Member #%d is the lowest-ordered member "
--- cluster/rgmanager/src/daemons/groups.c	2006/01/20 16:27:29	1.8.2.10.6.3
+++ cluster/rgmanager/src/daemons/groups.c	2006/06/16 20:07:46	1.8.2.10.6.4
@@ -28,6 +28,10 @@
 #include <clulog.h>
 #include <list.h>
 #include <reslist.h>
+#include <assert.h>
+
+#define cm_svccount cm_pad[0] /* Theses are uint8_t size */
+#define cm_svcexcl  cm_pad[1]
 
 
 static int config_version = 0;
@@ -40,6 +44,12 @@
 pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER;
 
 
+struct status_arg {
+	int fd;
+	int fast;
+};
+
+
 /**
    See if a given node ID should start a resource, given cluster membership
 
@@ -60,16 +70,21 @@
 
 
 int
-count_resource_groups(uint64_t nodeid, int *excl)
+count_resource_groups(cluster_member_list_t *ml)
 {
 	resource_t *res;
 	char *rgname, *val;
-	int count = 0, exclusive = 0;
+	int x;
 	rg_state_t st;
 	void *lockp;
+	cluster_member_t *mp;
 
-	if (excl)
-		*excl = 0;
+	for (x = 0; x < ml->cml_count; x++) {
+		ml->cml_members[x].cm_svccount = 0;
+		ml->cml_members[x].cm_svcexcl = 0;
+	}
+
+	pthread_rwlock_rdlock(&resource_lock);
 
 	list_do(&_resources, res) {
 		if (res->r_rule->rr_root == 0)
@@ -77,34 +92,43 @@
 
 		rgname = res->r_attrs[0].ra_value;
 
-		if (rg_lock(rgname, &lockp) < 0)
+		if (rg_lock(rgname, &lockp) < 0) {
+			clulog(LOG_ERR, "#XX: Unable to obtain cluster "
+			       "lock @ %s:%d: %s\n", __FILE__, __LINE__,
+			       strerror(errno));
 			continue;
+		}
 
 		if (get_rg_state(rgname, &st) < 0) {
+			clulog(LOG_ERR, "#34: Cannot get status "
+			       "for service %s\n", rgname);
 			rg_unlock(rgname, lockp);
 			continue;
 		}
+
 		rg_unlock(rgname, lockp);
 
-		if (st.rs_owner != nodeid ||
-		    (st.rs_state == RG_STATE_STARTED &&
-		     st.rs_state == RG_STATE_STARTING))
+		if (st.rs_state != RG_STATE_STARTED &&
+		     st.rs_state != RG_STATE_STARTING)
 			continue;
 
-		if (excl) {
-			/* Count exclusive resources */
-			val = res_attr_value(res, "exclusive");
-			exclusive = val && ((!strcmp(val, "yes") ||
-					     (atoi(val)>0)));
-		}
+		mp = memb_id_to_p(ml, st.rs_owner);
+		if (!mp)
+			continue;
+
+		++mp->cm_svccount;
 
-		++count;
-		if (exclusive && excl)
-			++(*excl);
+		val = res_attr_value(res, "exclusive");
+		if (val && ((!strcmp(val, "yes") ||
+				     (atoi(val)>0))) ) {
+			++mp->cm_svcexcl;
+		}
 
 	} while (!list_done(&_resources, res));
 
-	return count;
+	pthread_rwlock_unlock(&resource_lock);
+
+	return 0;
 }
 
 
@@ -125,7 +149,13 @@
 	uint64_t highnode = owner, nodeid;
 	char *val;
 	resource_t *res;
-	int exclusive, count, excl;
+	int exclusive;
+
+	if (lock)
+		pthread_rwlock_rdlock(&resource_lock);
+	count_resource_groups(allowed);
+	if (lock)
+		pthread_rwlock_unlock(&resource_lock);
 
 	for (x=0; x < allowed->cml_count; x++) {
 		if (allowed->cml_members[x].cm_state != STATE_UP)
@@ -141,7 +171,8 @@
 			pthread_rwlock_rdlock(&resource_lock);
 		score = node_should_start(nodeid, allowed, rg_name, &_domains);
 		if (!score) { /* Illegal -- failover domain constraint */
-			pthread_rwlock_unlock(&resource_lock);
+			if (lock)
+				pthread_rwlock_unlock(&resource_lock);
 			continue;
 		}
 
@@ -153,19 +184,18 @@
 		val = res_attr_value(res, "exclusive");
 		exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0)));
 
-		count = count_resource_groups(nodeid, &excl);
-
 		if (lock)
 			pthread_rwlock_unlock(&resource_lock);
 
 		if (exclusive) {
-		       	if (count > 0) {
+
+			if (allowed->cml_members[x].cm_svccount > 0) {
 				/* Definitely not this guy */
 				continue;
 			} else {
 				score += 2;
 			}
-		} else if (excl) {
+		} else if (allowed->cml_members[x].cm_svcexcl) {
 			/* This guy has an exclusive resource group.
 			   Can't relocate / failover to him. */
 			continue;
@@ -192,14 +222,19 @@
 	       cluster_member_list_t *membership)
 {
 	char *val;
-	int autostart, exclusive, count = 0, excl = 0;
+	cluster_member_t *mp;
+	int autostart, exclusive;
+	void *lockp;
+
+	mp = memb_id_to_p(membership, my_id());
+	assert(mp);
 
 	/*
 	 * Service must be not be running elsewhere to consider for a
 	 * local start.
 	 */
 	if (svcStatus->rs_state == RG_STATE_STARTED &&
-	    svcStatus->rs_state == my_id())
+	    svcStatus->rs_owner == mp->cm_id)
 		return;
 
 	if (svcStatus->rs_state == RG_STATE_DISABLED)
@@ -218,6 +253,32 @@
 			       "Skipping RG %s: Autostart disabled\n",
 			       svcName);
 			 */
+			/*
+			   Mark non-autostart services as disabled to avoid
+			   confusion!
+			 */
+			if (rg_lock(svcName, &lockp) < 0) {
+				clulog(LOG_ERR, "#XX: Unable to obtain cluster "
+				       "lock @ %s:%d: %s\n", __FILE__, __LINE__,
+				       strerror(errno));
+				return;
+			}
+
+			if (get_rg_state(svcName, svcStatus) != 0) {
+				clulog(LOG_ERR, "#34: Cannot get status "
+				       "for service %s\n", svcName);
+				rg_unlock(svcName, lockp);
+				return;
+			}
+
+			if (svcStatus->rs_transition == 0 &&
+			    svcStatus->rs_state == RG_STATE_STOPPED) {
+				svcStatus->rs_state = RG_STATE_DISABLED;
+				set_rg_state(svcName, svcStatus);
+			}
+
+			rg_unlock(svcName, lockp);
+
 			return;
 		}
 	}
@@ -225,17 +286,10 @@
 	val = res_attr_value(node->rn_resource, "exclusive");
 	exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0)));
 
-	/*
-	   Count the normal + exclusive resource groups running locally
-	 */
-	count = count_resource_groups(my_id(), &excl);
-
-	if (exclusive && count_resource_groups(my_id(), NULL)) {
-		/*
+	if (exclusive && mp->cm_svccount) {
 		clulog(LOG_DEBUG,
 		       "Skipping RG %s: Exclusive and I am running services\n",
 		       svcName);
-		 */
 		return;
 	}
 
@@ -243,12 +297,10 @@
 	   Don't start other services if I'm running an exclusive
 	   service.
 	 */
-	if (excl) {
-		/*
+	if (mp->cm_svcexcl) {
 		clulog(LOG_DEBUG,
 		       "Skipping RG %s: I am running an exclusive service\n",
 		       svcName);
-		 */
 		return;
 	}
 
@@ -256,9 +308,10 @@
 	 * Start any stopped services, or started services
 	 * that are owned by a down node.
 	 */
-	if (node_should_start(my_id(), membership, svcName, &_domains) ==
+	if (node_should_start(mp->cm_id, membership, svcName, &_domains) ==
 	    FOD_BEST)
-		rt_enqueue_request(svcName, RG_START, -1, 0, my_id(), 0, 0);
+		rt_enqueue_request(svcName, RG_START, -1, 0, mp->cm_id,
+				   0, 0);
 }
 
 
@@ -267,6 +320,7 @@
 		  cluster_member_list_t *membership)
 {
 	int a, b;
+
 	/*
 	   Service must be running locally in order to consider for
 	   a relocate
@@ -291,7 +345,6 @@
 	if (a <= b)
 		return;
 
-
 	clulog(LOG_DEBUG, "Relocating group %s to better node %s\n",
 	       svcName,
 	       memb_id_to_name(membership, nodeid));
@@ -318,12 +371,18 @@
 	int ret;
 
 	if (rg_locked()) {
-		clulog(LOG_NOTICE, "Services locked\n");
+		clulog(LOG_NOTICE,
+			"Resource groups locked; not evaluating\n");
 		return -EAGAIN;
 	}
 
- 	membership = member_list();
+	membership = member_list();
+
 	pthread_rwlock_rdlock(&resource_lock);
+
+	/* Requires read lock */
+	count_resource_groups(membership);
+
 	list_do(&_tree, node) {
 
 		if (node->rn_resource->r_rule->rr_root == 0)
@@ -372,7 +431,8 @@
 		       rg_state_str(svcStatus.rs_state),
 		       nodeName);
 
-		if (local && (nodeStatus == STATE_UP)) {
+		if ((local && (nodeStatus == STATE_UP)) ||
+		    svcStatus.rs_state == RG_STATE_STOPPED) {
 
 			consider_start(node, svcName, &svcStatus, membership);
 
@@ -401,6 +461,9 @@
 	pthread_rwlock_unlock(&resource_lock);
 	cml_free(membership);
 
+	clulog(LOG_DEBUG, "Event (%d:%d:%d) Processed\n", local,
+	       (int)nodeid, nodeStatus);
+
 	return 0;
 }
 
@@ -513,8 +576,9 @@
   @param rgname		Resource group name whose state we want to send.
   @see send_rg_states
  */
+int get_rg_state_local(char *, rg_state_t *);
 void
-send_rg_state(int fd, char *rgname)
+send_rg_state(int fd, char *rgname, int fast)
 {
 	rg_state_msg_t msg, *msgp = &msg;
 	void *lockp;
@@ -523,18 +587,23 @@
 	msgp->rsm_hdr.gh_length = sizeof(msg);
 	msgp->rsm_hdr.gh_command = RG_STATUS;
 
-	if (rg_lock(rgname, &lockp) < 0)
-		return;
-
-	if (get_rg_state(rgname, &msgp->rsm_state) < 0) {
+	/* try fast read -- only if it fails and fast is not 
+	   specified should we do the full locked read */
+	if (get_rg_state_local(rgname, &msgp->rsm_state) != 0 &&
+	    !fast) {
+		if (rg_lock(rgname, &lockp) < 0)
+			return;
+		if (get_rg_state(rgname, &msgp->rsm_state) < 0) {
+			rg_unlock(rgname, lockp);
+			return;
+		}
 		rg_unlock(rgname, lockp);
-		return;
 	}
 
-	rg_unlock(rgname, lockp);
 	swab_rg_state_msg_t(msgp);
 
-	msg_send(fd, msgp, sizeof(msg));
+	if (msg_send(fd, msgp, sizeof(msg)) < 0)
+		perror("msg_send");
 }
 
 
@@ -545,8 +614,10 @@
 static void *
 status_check_thread(void *arg)
 {
-	int fd = *(int *)arg;
+	int fd = ((struct status_arg *)arg)->fd;
+	int fast = ((struct status_arg *)arg)->fast;
 	resource_t *res;
+	generic_msg_hdr hdr;
 
 	free(arg);
 
@@ -556,12 +627,17 @@
 		if (res->r_rule->rr_root == 0)
 			continue;
 
-		send_rg_state(fd, res->r_attrs[0].ra_value);
+		send_rg_state(fd, res->r_attrs[0].ra_value, fast);
 	} while (!list_done(&_resources, res));
 
 	pthread_rwlock_unlock(&resource_lock);
 
 	msg_send_simple(fd, RG_SUCCESS, 0, 0);
+
+	/* XXX wait for client to tell us it's done; I don't know why
+	   this is needed when doing fast I/O, but it is. */
+	msg_receive_timeout(fd, &hdr, sizeof(hdr), 10);
+
 	msg_close(fd);
 
 	return NULL;
@@ -575,26 +651,27 @@
   @return		0
  */
 int
-send_rg_states(int fd)
+send_rg_states(int fd, int fast)
 {
-	int *fdp;
+	struct status_arg *arg;
 	pthread_t newthread;
 	pthread_attr_t attrs;
 
-	fdp = malloc(sizeof(int));
-	if (!fdp) {
+	arg = malloc(sizeof(struct status_arg));
+	if (!arg) {
 		msg_send_simple(fd, RG_FAIL, 0, 0);
 		return -1;
 	}
 
-	*fdp = fd;
+	arg->fd = fd;
+	arg->fast = fast;
 
         pthread_attr_init(&attrs);
         pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
         pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
 	pthread_attr_setstacksize(&attrs, 65535);
 
-	pthread_create(&newthread, &attrs, status_check_thread, fdp);
+	pthread_create(&newthread, &attrs, status_check_thread, arg);
         pthread_attr_destroy(&attrs);
 
 	return 0;
@@ -631,6 +708,7 @@
 {
 	resource_node_t *curr;
 	char *name;
+	rg_state_t svcblk;
 
 	pthread_rwlock_rdlock(&resource_lock);
 	list_do(&_tree, curr) {
@@ -644,11 +722,27 @@
 		if (debugfmt)
 			clulog(LOG_DEBUG, debugfmt, name);
 
+		/* Optimization: Don't bother even queueing the request
+		   during the exit case if we don't own it */
+		if (request == RG_STOP_EXITING) {
+			if (get_rg_state_local(name, &svcblk) < 0)
+				continue;
+
+			/* Always run stop if we're the owner, regardless
+			   of state; otherwise, don't run stop */
+			if (svcblk.rs_owner != my_id())
+				continue;
+		}
+
 		rt_enqueue_request(name, request, -1, 0,
 				   NODE_ID_NONE, 0, 0);
 	} while (!list_done(&_tree, curr));
 
 	pthread_rwlock_unlock(&resource_lock);
+
+	/* XXX during shutdown, if we're doing a simultaenous shutdown,
+	   this will cause this rgmanager to hang waiting for all the
+	   other rgmanagers to complete. */
 	if (block) 
 		rg_wait_threads();
 }
@@ -657,13 +751,12 @@
 /**
   Stop changed resources.
  */
-void
-do_status_checks(void)
+void *
+q_status_checks(void *arg)
 {
 	resource_node_t *curr;
 	char *name;
 	rg_state_t svcblk;
-	void *lockp;
 
 	pthread_rwlock_rdlock(&resource_lock);
 	list_do(&_tree, curr) {
@@ -674,30 +767,42 @@
 		/* Group name */
 		name = curr->rn_resource->r_attrs->ra_value;
 
-		/* If we're not running it, no need to CONDSTOP */
-		if (rg_lock(name, &lockp) != 0)
-			continue;
-		if (get_rg_state(name, &svcblk) < 0) {
-			rg_unlock(name, lockp);
+		/* Local check - no one will make us take a service */
+		if (get_rg_state_local(name, &svcblk) < 0) {
 			continue;
 		}
-		rg_unlock(name, lockp);
 
 		if (svcblk.rs_owner != my_id() ||
 		    svcblk.rs_state != RG_STATE_STARTED)
 			continue;
 
-		/*clulog(LOG_DEBUG, "Checking status of %s\n", name);*/
-
 		rt_enqueue_request(name, RG_STATUS,
 				   -1, 0, NODE_ID_NONE, 0, 0);
 
 	} while (!list_done(&_tree, curr));
 
 	pthread_rwlock_unlock(&resource_lock);
-	/*rg_wait_threads();*/
+
+	return NULL;
 }
 
+
+void
+do_status_checks(void)
+{
+	pthread_attr_t attrs;
+	pthread_t newthread;
+
+        pthread_attr_init(&attrs);
+        pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+	pthread_attr_setstacksize(&attrs, 65535);
+
+	pthread_create(&newthread, &attrs, q_status_checks, NULL);
+        pthread_attr_destroy(&attrs);
+}
+
+
 /**
   Stop changed resources.
  */
@@ -708,7 +813,6 @@
 	char *name;
 	rg_state_t svcblk;
 	int need_kill;
-	void *lockp;
 
 	clulog(LOG_INFO, "Stopping changed resources.\n");
 
@@ -722,13 +826,9 @@
 		name = curr->rn_resource->r_attrs->ra_value;
 
 		/* If we're not running it, no need to CONDSTOP */
-		if (rg_lock(name, &lockp) != 0)
-			continue;
-		if (get_rg_state(name, &svcblk) < 0) {
-			rg_unlock(name, lockp);
+		if (get_rg_state_local(name, &svcblk) < 0) {
 			continue;
 		}
-		rg_unlock(name, lockp);
 
 		if (svcblk.rs_owner != my_id())
 			continue;
@@ -757,9 +857,9 @@
 do_condstarts(void)
 {
 	resource_node_t *curr;
-	char *name;
+	char *name, *val;
 	rg_state_t svcblk;
-	int need_init, new_groups = 0;
+	int need_init, new_groups = 0, autostart;
 	void *lockp;
 
 	clulog(LOG_INFO, "Starting changed resources.\n");
@@ -779,19 +879,13 @@
 		if (curr->rn_resource->r_flags & RF_NEEDSTART)
 			need_init = 1;
 
-		if (rg_lock(name, &lockp) != 0)
-			continue;
-
-		if (get_rg_state(name, &svcblk) < 0) {
-			rg_unlock(name, lockp);
+		if (get_rg_state_local(name, &svcblk) < 0) {
 			continue;
 		}
 
 		if (!need_init && svcblk.rs_owner != my_id()) {
-			rg_unlock(name, lockp);
 			continue;
 		}
-		rg_unlock(name, lockp);
 
 		if (need_init) {
 			++new_groups;
@@ -842,7 +936,14 @@
 		}
 
 		/* Set it up for an auto-start */
-		svcblk.rs_state = RG_STATE_STOPPED;
+		val = res_attr_value(curr->rn_resource, "autostart");
+		autostart = !(val && ((!strcmp(val, "no") ||
+				     (atoi(val)==0))));
+		if (autostart)
+			svcblk.rs_state = RG_STATE_STOPPED;
+		else
+			svcblk.rs_state = RG_STATE_DISABLED;
+
 		set_rg_state(name, &svcblk);
 
 		rg_unlock(name, lockp);
--- cluster/rgmanager/src/daemons/main.c	2006/01/24 19:46:59	1.9.2.8.6.6
+++ cluster/rgmanager/src/daemons/main.c	2006/06/16 20:07:46	1.9.2.8.6.7
@@ -34,8 +34,12 @@
 #include <rg_queue.h>
 #include <malloc.h>
 
+#define L_SYS (1<<1)
+#define L_USER (1<<0)
+
 int configure_logging(int ccsfd);
 
+void node_event_q(int, uint64_t, int);
 int daemon_init(char *);
 int init_resource_groups(int);
 void kill_resource_groups(void);
@@ -44,9 +48,10 @@
 void graceful_exit(int);
 void flag_shutdown(int sig);
 void hard_exit(void);
-int send_rg_states(int);
+int send_rg_states(int, int);
 int check_config_update(void);
 int svc_exists(char *);
+int watchdog_init(void);
 
 int shutdown_pending = 0, running = 1, need_reconfigure = 0;
 char debug = 0; /* XXX* */
@@ -134,6 +139,9 @@
 void
 node_event(int local, uint64_t nodeID, int nodeStatus)
 {
+	if (!running)
+		return;
+
 	if (local) {
 
 		/* Local Node Event */
@@ -216,8 +224,15 @@
 
 		clulog(LOG_INFO, "State change: %s DOWN\n",
 		       node_delta->cml_members[x].cm_name);
-		node_event(0, node_delta->cml_members[x].cm_id,
-			   STATE_DOWN);
+		/* Don't bother evaluating anything resource groups are
+		   locked.  This is just a performance thing */
+		if (!rg_locked()) {
+			node_event_q(0, node_delta->cml_members[x].cm_id,
+			     		STATE_DOWN);
+		} else {
+			clulog(LOG_NOTICE, "Not taking action - services"
+			       " locked\n");
+		}
 	}
 
 	/* Free nodes */
@@ -231,7 +246,7 @@
 	me = memb_online(node_delta, my_id());
 	if (me) {
 		clulog(LOG_INFO, "State change: Local UP\n");
-		node_event(1, my_id(), STATE_UP);
+		node_event_q(1, my_id(), STATE_UP);
 	}
 
 	for (x=0; node_delta && x < node_delta->cml_count; x++) {
@@ -245,14 +260,69 @@
 
 		clulog(LOG_INFO, "State change: %s UP\n",
 		       node_delta->cml_members[x].cm_name);
-		node_event(0, node_delta->cml_members[x].cm_id,
-			   STATE_UP);
+		node_event_q(0, node_delta->cml_members[x].cm_id,
+			     STATE_UP);
 	}
 
 	cml_free(node_delta);
 	cml_free(new_ml);
 
-	rg_unlockall();
+	rg_unlockall(L_SYS);
+
+	return 0;
+}
+
+
+int
+lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen)
+{
+	char lockstate;
+
+	if (datalen != 1) {
+		clulog(LOG_WARNING, "%s: invalid data length!\n", __FUNCTION__);
+		free(data);
+		return 0;
+	}
+
+       	lockstate = *(char *)data;
+	free(data);
+
+	if (lockstate == 0) {
+		rg_unlockall(L_USER); /* Doing this multiple times
+					 has no effect */
+		clulog(LOG_NOTICE, "Resource Groups Unlocked\n");
+		return 0;
+	}
+
+	if (lockstate == 1) {
+		rg_lockall(L_USER); /* Doing this multiple times
+				       has no effect */
+		clulog(LOG_NOTICE, "Resource Groups Locked\n");
+		return 0;
+	}
+
+	clulog(LOG_DEBUG, "Invalid lock state in callback: %d\n", lockstate);
+	return 0;
+}
+
+
+int
+do_lockreq(int fd, int req)
+{
+	int ret;
+	char state;
+	cluster_member_list_t *m = member_list();
+
+	state = (req==RG_LOCK)?1:0;
+	ret = vf_write(m, VFF_IGN_CONN_ERRORS, "rg_lockdown", &state, 1);
+	cml_free(m);
+
+	if (ret == 0) {
+		msg_send_simple(fd, RG_SUCCESS, 0, 0);
+	} else {
+		msg_send_simple(fd, RG_FAIL, 0, 0);
+	}
+
 	return 0;
 }
 
@@ -292,9 +362,38 @@
 	switch (msg_hdr.gh_command) {
 	case RG_STATUS:
 		clulog(LOG_DEBUG, "Sending service states to fd%d\n",fd);
-		send_rg_states(fd);
+		send_rg_states(fd, msg_hdr.gh_arg1);
 		break;
 
+
+	case RG_LOCK:
+		msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+		if (rg_quorate()) {
+			do_lockreq(fd, RG_LOCK);
+		}
+
+		msg_close(fd);
+		break;
+
+	case RG_UNLOCK:
+		msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+		if (rg_quorate()) {
+			do_lockreq(fd, RG_UNLOCK);
+		}
+
+		msg_close(fd);
+		break;
+
+	case RG_QUERY_LOCK:
+		msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+		if (rg_quorate()) {
+			ret = (rg_locked() & L_USER) ? RG_LOCK : RG_UNLOCK;
+			msg_send_simple(fd, ret, 0, 0);
+		}
+		msg_close(fd);
+		break;
+
+
 	case RG_ACTION_REQUEST:
 
 		ret = msg_receive_timeout(fd, &msg_sm, sizeof(msg_sm), 
@@ -308,17 +407,6 @@
 		/* Decode SmMessageSt message */
 		swab_SmMessageSt(&msg_sm);
 
-		if (rg_locked()) {
-			msg_sm.sm_data.d_ret = RG_EAGAIN;
-			/* Encode before responding... */
-			swab_SmMessageSt(&msg_sm);
-
-			if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
-		    	    sizeof (SmMessageSt))
-				clulog(LOG_ERR, "#40: Error replying to "
-				       "action request.\n");
-		}
-
 		if (!svc_exists(msg_sm.sm_data.d_svcName)) {
 			msg_sm.sm_data.d_ret = RG_ENOSERVICE;
 			/* No such service! */
@@ -375,18 +463,19 @@
 		break;
 	case CE_SUSPEND:
 		clulog(LOG_DEBUG, "Suspend Event\n");
-		rg_lockall();
+		rg_lockall(L_SYS);
 		break;
 	case CE_MEMB_CHANGE:
 		clulog(LOG_DEBUG, "Membership Change Event\n");
-		if (rg_quorate()) {
-			rg_unlockall();
+		if (rg_quorate() && running) {
+			rg_unlockall(L_SYS);
 			membership_update();
 		}
 		break;
 	case CE_QUORATE:
 		rg_set_quorate();
-		rg_unlockall();
+		rg_unlockall(L_SYS);
+		rg_unlockall(L_USER);
 		clulog(LOG_NOTICE, "Quorum Achieved\n");
 		membership_update();
 		break;
@@ -394,7 +483,7 @@
 		clulog(LOG_EMERG, "#1: Quorum Dissolved\n");
 		rg_set_inquorate();
 		member_list_update(NULL);		/* Clear member list */
-		rg_lockall();
+		rg_lockall(L_SYS);
 		rg_doall(RG_INIT, 1, "Emergency stop of %s");
 		rg_set_uninitialized();
 		break;
@@ -430,7 +519,7 @@
 		 */
 	}
 
-	while (tv.tv_sec || tv.tv_usec) {
+	while (running && (tv.tv_sec || tv.tv_usec)) {
 		FD_ZERO(&rfds);
 		max = msg_fill_fdset(&rfds, MSG_LISTEN, RG_PURPOSE);
 		FD_SET(clusterfd, &rfds);
@@ -489,8 +578,6 @@
 		return 0;
 	}
 
-
-
 	return 0;
 }
 
@@ -512,7 +599,7 @@
 void
 hard_exit(void)
 {
-	rg_lockall();
+	rg_lockall(L_SYS);
 	rg_doall(RG_INIT, 1, "Emergency stop of %s");
 	vf_shutdown();
 	exit(1);
@@ -522,8 +609,8 @@
 void
 cleanup(int cluster_fd)
 {
-	rg_lockall();
-	rg_doall(RG_STOP, 1, NULL);
+	rg_lockall(L_SYS);
+	rg_doall(RG_STOP_EXITING, 1, NULL);
 	vf_shutdown();
 	kill_resource_groups();
 	member_list_update(NULL);
@@ -648,8 +735,11 @@
 	if (foreground)
 		clu_log_console(1);
 
-	if (!foreground && (geteuid() == 0)) 
+	if (!foreground && (geteuid() == 0)) {
 		daemon_init(argv[0]);
+		if (!debug && !watchdog_init())
+			clulog(LOG_NOTICE, "Failed to start watchdog\n");
+	}
 
 	/*
 	   We need quorum before we can read the configuration data from
@@ -723,6 +813,8 @@
 		return -1;
 	}
 
+	vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb);
+
 	if (clu_login(cluster_fd, RG_SERVICE_GROUP) == -1) {
 		if (errno != ENOSYS) {
 			clu_log_console(1);
@@ -756,7 +848,6 @@
 	
 	/*malloc_dump_table(); */ /* Only works if alloc.c us used */
 	/*malloc_stats();*/
-	/*malloc_dump_table(1352, 1352);*/
 
 	exit(0);
 }
--- cluster/rgmanager/src/daemons/restree.c	2006/01/20 16:27:29	1.10.2.2.6.5
+++ cluster/rgmanager/src/daemons/restree.c	2006/06/16 20:07:46	1.10.2.2.6.6
@@ -19,8 +19,8 @@
 #include <libxml/parser.h>
 #include <libxml/xmlmemory.h>
 #include <libxml/xpath.h>
-#include <magma.h>
 #include <ccs.h>
+#include <rg_locks.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <resgroup.h>
@@ -54,6 +54,7 @@
 
 /* XXX from reslist.c */
 void * act_dup(resource_act_t *acts);
+time_t get_time(char *action, int depth, resource_node_t *node);
 
 
 const char *res_ops[] = {
@@ -512,7 +513,7 @@
 		node->rn_parent = parent;
 		node->rn_resource = curres;
 		node->rn_state = RES_STOPPED;
-               node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+		node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
 		curres->r_refs++;
 
 		list_insert(tree, node);
@@ -862,6 +863,44 @@
 
 
 void
+set_time(char *action, int depth, resource_node_t *node)
+{
+	time_t now;
+	int x = 0;
+
+	time(&now);
+
+	for (; node->rn_actions[x].ra_name; x++) {
+
+		if (strcmp(node->rn_actions[x].ra_name, action) ||
+	    	    node->rn_actions[x].ra_depth != depth)
+			continue;
+
+		node->rn_actions[x].ra_last = now;
+		break;
+	}
+}
+
+
+time_t
+get_time(char *action, int depth, resource_node_t *node)
+{
+	int x = 0;
+
+	for (; node->rn_actions[x].ra_name; x++) {
+
+		if (strcmp(node->rn_actions[x].ra_name, action) ||
+	    	    node->rn_actions[x].ra_depth != depth)
+			continue;
+
+		return node->rn_actions[x].ra_last;
+	}
+
+	return (time_t)0;
+}
+
+
+void
 clear_checks(resource_node_t *node)
 {
 	time_t now;
@@ -899,8 +938,8 @@
    @see			_res_op_by_level res_exec
  */
 int
-_res_op(resource_node_t **tree, resource_t *first, char *type,
-	   void * __attribute__((unused))ret, int realop)
+_res_op(resource_node_t **tree, resource_t *first,
+	char *type, void * __attribute__((unused))ret, int realop)
 {
 	int rv, me;
 	resource_node_t *node;
@@ -965,13 +1004,20 @@
 		/* Start starts before children */
 		if (me && (op == RS_START)) {
 			node->rn_flags &= ~RF_NEEDSTART;
+
 			rv = res_exec(node, op, 0);
-			if (rv != 0)
+			if (rv != 0) {
+				node->rn_state = RES_FAILED;
 				return rv;
+			}
 
-			time(&node->rn_resource->r_started);
+			set_time("start", 0, node);
 			clear_checks(node);
-			++node->rn_resource->r_incarnations;
+
+			if (node->rn_state != RES_STARTED) {
+				++node->rn_resource->r_incarnations;
+				node->rn_state = RES_STARTED;
+			}
 		}
 
 		if (node->rn_child) {
@@ -983,13 +1029,18 @@
 		/* Stop/status/etc stops after children have stopped */
 		if (me && (op == RS_STOP)) {
 			node->rn_flags &= ~RF_NEEDSTOP;
-			--node->rn_resource->r_incarnations;
 			rv = res_exec(node, op, 0);
 
 			if (rv != 0) {
-				++node->rn_resource->r_incarnations;
+				node->rn_state = RES_FAILED;
 				return rv;
 			}
+
+			if (node->rn_state != RES_STOPPED) {
+				--node->rn_resource->r_incarnations;
+				node->rn_state = RES_STOPPED;
+			}
+
 		} else if (me && (op == RS_STATUS)) {
 
 			rv = do_status(node);
--- cluster/rgmanager/src/daemons/rg_locks.c	2005/03/02 07:07:01	1.4.2.2
+++ cluster/rgmanager/src/daemons/rg_locks.c	2006/06/16 20:07:46	1.4.2.2.6.1
@@ -167,11 +167,11 @@
 
 
 int
-rg_lockall(void)
+rg_lockall(int flag)
 {
 	pthread_mutex_lock(&locks_mutex);
 	if (!__rg_lock)
-		__rg_lock = 1;
+		__rg_lock |= flag;
 	pthread_mutex_unlock(&locks_mutex);
 	return 0;
 }
@@ -189,11 +189,11 @@
 
 
 int
-rg_unlockall(void)
+rg_unlockall(int flag)
 {
 	pthread_mutex_lock(&locks_mutex);
 	if (__rg_lock)
-		__rg_lock = 0;
+		__rg_lock &= ~flag;
 	pthread_cond_broadcast(&unlock_cond);
 	pthread_mutex_unlock(&locks_mutex);
 	return 0;
@@ -201,21 +201,6 @@
 
 
 int
-rg_wait_unlockall(void)
-{
-	pthread_mutex_lock(&locks_mutex);
-	if (!__rg_lock) {
-		pthread_mutex_unlock(&locks_mutex);
-		return 0;
-	}
-
-	pthread_cond_wait(&unlock_cond, &locks_mutex);
-	pthread_mutex_unlock(&locks_mutex);
-	return 0;
-}
-
-
-int
 rg_set_quorate(void)
 {
 	pthread_mutex_lock(&locks_mutex);
--- cluster/rgmanager/src/daemons/rg_state.c	2006/02/02 19:00:02	1.4.2.8.6.3
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/06/16 20:07:46	1.4.2.8.6.4
@@ -117,6 +117,7 @@
 	struct timeval start, now;
 	uint64_t nodeid, *p;
 	int flags;
+	int conv = 0, err;
 	int block = !(dflt_flags & CLK_NOWAIT);
 
 	/* Holder not supported for this call */
@@ -128,6 +129,37 @@
 		gettimeofday(&start, NULL);
 		start.tv_sec += 30;
 	}
+
+	/* Ripped from global.c in magma */
+	if (!(dflt_flags & CLK_CONVERT) &&
+	    (block || ((dflt_flags & CLK_EX) == 0))) {
+		/* Acquire NULL lock */
+		ret = clu_lock(resource, CLK_NULL, lockpp);
+		err = errno;
+		if (ret == 0) {
+			if ((flags & CLK_EX) == 0) {
+				/* User only wanted a NULL lock... */
+				return 0;
+			}
+			/*
+			   Ok, NULL lock was taken, rest of blocking
+			   call should be done using lock conversions.
+			 */
+			flags |= CLK_CONVERT;
+			conv = 1;
+		} else {
+			switch(err) {
+			case EINVAL:
+				/* Oops, null locks don't work on this
+				   plugin; use normal spam mode */
+				break;
+			default:
+				errno = err;
+				return -1;
+			}
+		}
+	}
+
 	while (1) {
 		if (block) {
 			gettimeofday(&now, NULL);
@@ -144,9 +176,15 @@
 			}
 		}
 
-		ret = clu_lock(resource, flags | CLK_NOWAIT, lockpp);
+		*lockpp = NULL;
 
-		if ((ret != 0) && (errno == EAGAIN) && block) {
+		/* Take the lock (convert if possible). */
+		ret = clu_lock(resource, flags | CLK_NOWAIT |
+			       ((conv && !timed_out) ? CLK_CONVERT : 0),
+			       lockpp);
+		err = errno;
+
+		if ((ret != 0) && (err == EAGAIN) && block) {
 			if (timed_out) {
 				p = (uint64_t *)*lockpp;
 				if (p) {
@@ -175,6 +213,16 @@
 		break;
 	}
 
+	/* Fatal error.  If we took an automatic NL lock with the hopes of
+	   converting it, release the lock before returning */
+	if (conv == 1 && ret < 0) {
+		clu_unlock(resource, *lockpp);
+		*lockpp = NULL;
+	}
+
+	if (ret < 0)
+		errno = err;
+
 	return ret;
 }
 
@@ -369,6 +417,46 @@
 }
 
 
+int vf_read_local(char *, uint64_t *, void *, uint32_t *);
+int
+get_rg_state_local(char *name, rg_state_t *svcblk)
+{
+	char res[256];
+	int ret;
+	void *data = NULL;
+	uint32_t datalen = 0;
+	uint64_t viewno;
+
+	/* ... */
+	if (name)
+		strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
+
+	snprintf(res, sizeof(res),"usrm::rg=\"%s\"", svcblk->rs_name);
+	ret = vf_read_local(res, &viewno, &data, &datalen);
+
+	if (ret != VFR_OK || datalen == 0 ||
+	    datalen != sizeof(*svcblk)) {
+		if (data)
+			free(data);
+
+		svcblk->rs_owner = NODE_ID_NONE;
+		svcblk->rs_last_owner = NODE_ID_NONE;
+		svcblk->rs_state = RG_STATE_UNINITIALIZED;
+       		svcblk->rs_restarts = 0;
+		svcblk->rs_transition = 0;	
+		strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
+
+		return FAIL;
+	}
+
+	/* Copy out the data. */
+	memcpy(svcblk, data, sizeof(*svcblk));
+	free(data);
+
+	return 0;
+}
+
+
 /**
  * Advise service manager as to whether or not to stop a service, given
  * that we already know it's legal to run the service.
@@ -380,6 +468,7 @@
  *			1 = STOP service - return whatever it returns.
  *			2 = DO NOT stop service, return 0 (success)
  *                      3 = DO NOT stop service, return RG_EFORWARD
+ *			4 = DO NOT stop service, return RG_EAGAIN
  */
 int
 svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req)
@@ -504,6 +593,7 @@
  * @return		0 = DO NOT start service, return FAIL
  *			1 = START service - return whatever it returns.
  *			2 = DO NOT start service, return 0
+ *			3 = DO NOT start service, return RG_EAGAIN
  */
 int
 svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
@@ -519,10 +609,6 @@
 		break;
 		
 	case RG_STATE_STOPPING:
-		clulog(LOG_DEBUG, "RG %s is stopping\n", svcName);
-		ret = 2;
-		break;
-
 	case RG_STATE_STARTED:
 	case RG_STATE_CHECK:
 	case RG_STATE_STARTING:
@@ -548,7 +634,14 @@
 			break;
 		}
 
+		/* We are allowed to do something with the service.  Make
+		   sure we're not locked */
 		if (svcStatus->rs_owner == NODE_ID_NONE) {
+			if (rg_locked()) {
+				ret = 3;
+				break;
+			}
+
 			clulog(LOG_NOTICE,
 			       "Starting stopped service%s\n",
 			       svcName);
@@ -556,6 +649,13 @@
 			break;
 		}
 
+		if (rg_locked()) {
+			clulog(LOG_WARNING, "Not initiating failover of %s: "
+			       "Resource groups locked!\n", svcName);
+			ret = 3;
+			break;
+		}
+
 		/*
 		 * Service is running but owner is down -> FAILOVER
 		 */
@@ -588,6 +688,12 @@
 		break;
 
 	case RG_STATE_STOPPED:
+		/* Don't actually enable if the RG is locked! */
+		if (rg_locked()) {
+			ret = 3;
+			break;
+		}
+
 		clulog(LOG_NOTICE, "Starting stopped service %s\n",
 		       svcName);
 		ret = 1;
@@ -596,6 +702,12 @@
 	case RG_STATE_DISABLED:
 	case RG_STATE_UNINITIALIZED:
 		if (req == RG_ENABLE) {
+			/* Don't actually enable if the RG is locked! */
+			if (rg_locked()) {
+				ret = 3;
+				break;
+			}
+
 			clulog(LOG_NOTICE,
 			       "Starting disabled service %s\n",
 			       svcName);
@@ -656,6 +768,9 @@
 	case 2: /* Don't start service, return 0 */
 		rg_unlock(svcName, lockp);
 		return 0;
+	case 3:
+		rg_unlock(svcName, lockp);
+		return RG_EAGAIN;
 	default:
 		break;
 	}
@@ -738,14 +853,12 @@
 	}
 	rg_unlock(svcName, lockp);
 
-	if (svcStatus.rs_state == RG_STATE_STARTED &&
-	    svcStatus.rs_owner != my_id())
-		/* Don't check status for other resource groups */
+	if (svcStatus.rs_owner != my_id())
+		/* Don't check status for anything not owned */
 		return SUCCESS;
 
-	if (svcStatus.rs_state != RG_STATE_STARTED &&
-	    svcStatus.rs_owner == my_id())
-		/* Not-running RGs should not be checked yet. */
+	if (svcStatus.rs_state != RG_STATE_STARTED)
+		/* Not-running RGs should not be checked either. */
 		return SUCCESS;
 
 	return group_op(svcName, RG_STATUS);
@@ -798,6 +911,9 @@
 	case 3:
 		rg_unlock(svcName, lockp);
 		return RG_EFORWARD;
+	case 4:
+		rg_unlock(svcName, lockp);
+		return RG_EAGAIN;
 	default:
 		break;
 	}
@@ -1077,7 +1193,7 @@
 handle_relocate_req(char *svcName, int request, uint64_t preferred_target,
 		    uint64_t *new_owner)
 {
-	cluster_member_list_t *allowed_nodes;
+	cluster_member_list_t *allowed_nodes, *backup = NULL;
 	uint64_t target = preferred_target, me = my_id();
 	int ret, x;
 	
@@ -1102,19 +1218,23 @@
 		   If we can't start it on the preferred target, then we'll try
 	 	   other nodes.
 		 */
+		//count_resource_groups(allowed_nodes);
+		backup = cml_dup(allowed_nodes);
+
 		for (x = 0; x < allowed_nodes->cml_count; x++) {
 			if (allowed_nodes->cml_members[x].cm_id == me ||
-		    	    allowed_nodes->cml_members[x].cm_id == preferred_target)
+		    	    allowed_nodes->cml_members[x].cm_id ==
+			    		preferred_target)
 				continue;
 			allowed_nodes->cml_members[x].cm_state = STATE_DOWN;
 		}
 
 		/*
-		 * First, see if it's legal to relocate to the target node.  Legal
-		 * means: the node is online and is in the [restricted] failover
-		 * domain of the service, or the service has no failover domain.
+		 * First, see if it's legal to relocate to the target node.
+		 * Legal means: the node is online and is in the
+		 * [restricted] failover domain of the service, or the
+		 * service has no failover domain.
 		 */
-
 		target = best_target_node(allowed_nodes, me, svcName, 1);
 
 		cml_free(allowed_nodes);
@@ -1155,7 +1275,12 @@
 	 * Ok, so, we failed to send it to the preferred target node.
 	 * Try to start it on all other nodes.
 	 */
-	allowed_nodes = member_list();
+	if (backup) {
+		allowed_nodes = backup;
+	} else {
+		allowed_nodes = member_list();
+		//count_resource_groups(allowed_nodes);
+	}
 
 	if (preferred_target != NODE_ID_NONE)
 		memb_mark_down(allowed_nodes, preferred_target);
@@ -1208,12 +1333,14 @@
 	 * We're done.
 	 */
 exhausted:
-	clulog(LOG_WARNING,
-	       "#70: Attempting to restart service %s locally.\n",
-	       svcName);
-	if (svc_start(svcName, RG_START_RECOVER) == 0) {
-		*new_owner = me;
-		return FAIL;
+	if (!rg_locked()) {
+		clulog(LOG_WARNING,
+		       "#70: Attempting to restart service %s locally.\n",
+		       svcName);
+		if (svc_start(svcName, RG_START_RECOVER) == 0) {
+			*new_owner = me;
+			return FAIL;
+		}
 	}
 
 	if (svc_stop(svcName, RG_STOP) != 0) {
@@ -1263,6 +1390,12 @@
 	 */
 	ret = svc_start(svcName, req);
 
+	/* 
+	   If services are locked, return the error 
+	  */
+	if (ret == RG_EAGAIN)
+		return RG_EAGAIN;
+
 	/*
 	 * If we succeeded, then we're done.
 	 */
--- cluster/rgmanager/src/daemons/rg_thread.c	2006/01/20 16:27:29	1.7.2.3.6.2
+++ cluster/rgmanager/src/daemons/rg_thread.c	2006/06/16 20:07:46	1.7.2.3.6.3
@@ -71,7 +71,7 @@
 }
 
 
-static void
+static int 
 wait_initialize(const char *name)
 {
 	resthread_t *t;
@@ -80,15 +80,21 @@
 		pthread_mutex_lock(&reslist_mutex);
 		t = find_resthread_byname(name);
 
-		assert(t);
+		if (!t) {
+			pthread_mutex_unlock(&reslist_mutex);
+			return -1;
+		}
+
 		if (t->rt_status != RG_STATE_UNINITIALIZED)  {
 			pthread_mutex_unlock(&reslist_mutex);
-			return;
+			return 0;
 		}
 
 		pthread_mutex_unlock(&reslist_mutex);
 		usleep(50000);
 	}
+
+	assert(0);
 }
 
 
@@ -191,7 +197,6 @@
 	pthread_cond_wait(&my_queue_cond, &my_queue_mutex);
 	pthread_mutex_unlock(&my_queue_mutex);
 
-
 	while(1) {
 		pthread_mutex_lock(&reslist_mutex);
  		pthread_mutex_lock(&my_queue_mutex);
@@ -201,7 +206,6 @@
 			   loop with the lock held. */
 			break;
 		}
-		
 		pthread_mutex_unlock(&my_queue_mutex);
 		pthread_mutex_unlock(&reslist_mutex);
 
@@ -216,6 +220,8 @@
 		myself = find_resthread_byname(myname);
 		assert(myself);
 		myself->rt_request = req->rr_request;
+		if (req->rr_request == RG_STOP_EXITING)
+			myself->rt_status = RG_STATE_STOPPING;
 		pthread_mutex_unlock(&reslist_mutex);
 
 		switch(req->rr_request) {
@@ -289,6 +295,30 @@
 
 			break;
 
+		case RG_STOP_EXITING:
+			/* We're out of here. Don't allow starts anymore */
+			error = svc_stop(myname, RG_STOP);
+
+			if (error == 0) {
+				ret = RG_SUCCESS;
+
+			} else if (error == RG_EFORWARD) {
+				ret = RG_NONE;
+				break;
+			} else {
+				/*
+				 * Bad news. 
+				 */
+				ret = RG_FAIL;
+			}
+
+			pthread_mutex_lock(&my_queue_mutex);
+			purge_all(&my_queue);
+			pthread_mutex_unlock(&my_queue_mutex);
+
+			break;
+
+
 		case RG_DISABLE:
 			/* Disable and user stop requests need to be
 			   forwarded; they're user requests */
@@ -454,6 +484,7 @@
 	int ret;
 	resthread_t *resgroup = NULL;
 
+retry:
 	pthread_mutex_lock(&reslist_mutex);
 	while (resgroup == NULL) {
 		resgroup = find_resthread_byname(resgroupname);
@@ -468,10 +499,14 @@
 		return ret;
 	}
 
+	ret = (resgroup->rt_status == RG_STATE_STOPPING);
+
 	pthread_mutex_unlock(&reslist_mutex);
-	wait_initialize(resgroupname);
+	if (wait_initialize(resgroupname) < 0) {
+		goto retry;
+	}
 
-	return 0;
+	return ret;
 }
 
 
@@ -521,6 +556,9 @@
 	resthread_t *resgroup;
 
 	if (spawn_if_needed(resgroupname) != 0) {
+		/* Usually, we get here if the thread is killing
+		   stuff.  This prevents us from queueing START requests
+		   while we're exiting */
 		return -1;
 	}
 
/cvs/cluster/cluster/rgmanager/src/resources/svclib_nfslock,v  -->  standard output
revision 1.2.2.1
--- cluster/rgmanager/src/resources/svclib_nfslock
+++ -	2006-06-16 20:07:49.459256000 +0000
@@ -0,0 +1,251 @@
+#!/bin/bash
+#
+# Do reclaim-broadcasts when we kill lockd during shutdown/startup
+# of a cluster service.
+#
+# Exported functions:
+#
+#  notify_list_store
+#  notify_list_merge
+#  notify_list_broadcast
+#
+
+#
+# Usage:
+# statd_notify <directory> <hostname|ip>
+#
+# Copy out a list from <directory>, merge them with the system nfs lock
+# list, and send them out as <hostname|ip> after generating a random
+# state (needed so clients will reclaim their locks)
+#
+nfslock_statd_notify()
+{
+	declare tmpdir=/tmp/statd-$2.$$
+	declare nl_dir=$1
+	declare nl_ip=$2
+	declare command		# Work around bugs in rpc.statd
+	declare pid_xxx		# Work around bugs in rpc.statd
+	declare owner
+
+	[ -z "$lockd_pid" ] && return 0
+	if ! [ -d $nl_dir ]; then
+		return 0
+	fi
+
+	if [ -z "`ls $nl_dir/sm/* 2> /dev/null`" ]; then
+		ocf_log debug "No hosts to notify"
+		return 0
+	fi
+
+	# Ok, copy the HA directory to something we can use.
+	rm -rf $tmpdir
+       	mkdir -p $tmpdir/sm
+	
+	# Copy in our specified entries
+	cp -f $nl_dir/sm/* $tmpdir/sm
+
+	# Copy in our global entries
+	# XXX This might be what we just copied.
+
+	if [ -d "/var/lib/nfs/statd/sm" ]; then
+		owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+		cp -f /var/lib/nfs/statd/sm/* $tmpdir/sm
+	elif [ -d "/var/lib/nfs/sm" ]; then
+		owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+		cp -f /var/lib/nfs/sm/* $tmpdir/sm
+	fi
+
+	#
+	# Generate a random state file.  If this ends up being what a client
+	# already has in its list, that's bad, but the chances of this
+	# are small - and relocations should be rare.
+	#
+	dd if=/dev/urandom of=$tmpdir/state bs=1 count=4 &> /dev/null
+
+	#
+	# Make sure we set permissions, or statd will not like it.
+	#
+	chown -R $owner $tmpdir
+
+	#
+	# Tell rpc.statd to notify clients.  Don't go into background, 
+	# because statd is buggy and won't exit like it's supposed to after
+	# sending the notifications out.
+	#
+	ocf_log info "Sending reclaim notifications via $nl_ip"
+	command="rpc.statd -NFP $tmpdir -n $nl_ip"
+	eval $command 2>&1 &
+	sleep 3 # XXX - the instance of rpc.statd we just spawned is supposed
+	        # to exit after it finishes notifying clients.
+	        # rpc.statd spawned which is still running handles the actual
+	        # new SM_MON requests... we hope 3 seconds is enough time
+	        # to get all the SM_NOTIFY messages out.  rpc.statd = bugged
+	#
+	# clean up
+	#
+	pid_xxx=`ps auwwx | grep "$command" | grep -v grep | awk '{print $2}'`
+	kill $pid_xxx
+	rm -rf $tmpdir
+
+	return 0
+}
+
+
+#
+# Copy of isSlave from svclib_ip and/or ip.sh
+#
+nfslock_isSlave()
+{
+        declare intf=$1
+        declare line
+
+        if [ -z "$intf" ]; then
+                ocf_log err "usage: isSlave <I/F>"
+                return 1
+        fi
+
+        line=$(/sbin/ip link list dev $intf)
+        if [ $? -ne 0 ]; then
+                ocf_log err "$intf not found"
+                return 1
+        fi
+
+        if [ "$line" = "${line/<*SLAVE*>/}" ]; then
+                return 2
+        fi
+
+        # Yes, it is a slave device.  Ignore.
+        return 0
+}
+
+
+#
+# Get all the IPs on the system except loopback IPs
+#
+nfslock_ip_address_list()
+{
+        declare idx dev family ifaddr
+
+        while read idx dev family ifaddr; do
+
+		if [ "$family" != "inet" ] && [ "$family" != "inet6" ]; then
+			continue
+		fi
+
+		if [ "$dev" = "lo" ]; then
+			# Ignore loopback
+			continue
+		fi
+
+                nfslock_isSlave $dev
+                if [ $? -ne 2 ]; then
+                        continue
+                fi
+
+                idx=${idx/:/}
+
+                echo $dev $family ${ifaddr/\/*/} ${ifaddr/*\//}
+
+        done < <(/sbin/ip -o addr list | awk '{print $1,$2,$3,$4}')
+
+        return 0
+}
+	
+
+#
+# Usage: broadcast_notify <state_directory>
+#
+# Send the contents of <state_directory> out via all IPs on the system.
+#
+notify_list_broadcast()
+{
+        declare dev family addr maskbits ip_name
+	declare lockd_pid=$(pidof lockd)
+	declare nl_dir=$1
+
+        while read dev family addr maskbits; do
+		if [ "$family" != "inet" ]; then
+			continue
+		fi
+
+		ip_name=$(clufindhostname -i $addr)
+		if [ -z "$ip_name" ]; then
+			nfslock_statd_notify $nl_dir $addr
+		else
+			nfslock_statd_notify $nl_dir $ip_name
+		fi
+
+	done < <(nfslock_ip_address_list)
+}
+
+
+#
+# Store the lock monitor list from rpc.statd - do this during a teardown
+# after the IP addresses of a service have been taken offline.  Note that
+# this should be done by HA-callout programs, but this feature is not in
+# RHEL3. 
+#
+notify_list_store()
+{
+	declare nl_dir=$1
+	declare owner
+
+	mkdir -p $nl_dir/sm
+
+	if [ -d "/var/lib/nfs/statd/sm" ]; then
+	        if [ -z "`ls /var/lib/nfs/statd/sm/* 2> /dev/null`" ]; then
+			return 1
+			# nothing to do!
+		fi
+
+		owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+		cp -af /var/lib/nfs/statd/sm/* $nl_dir/sm
+		chown -R $owner $nl_dir
+		return 0
+	elif [ -d "/var/lib/nfs/sm" ]; then
+	        if [ -z "`ls /var/lib/nfs/sm/* 2> /dev/null`" ]; then
+			return 1
+			# nothing to do!
+		fi
+
+		owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}')
+		cp -af /var/lib/nfs/sm/* $nl_dir/sm
+		chown -R $owner $nl_dir
+		return 0
+	fi
+
+	return 1
+}
+
+
+#
+# Merge the contents of <nl_dir>/sm with the system-wide list
+# Make sure ownership is right, or statd will hiccup.  This should not
+# actually ever be needed because statd will, upon getting a SM_MON
+# request, create all the entries in this list.  It's mostly for
+# housekeeping for next time we relocate the service.
+#
+notify_list_merge()
+{
+	declare nl_dir=$1
+	declare owner
+
+	if [ -z "`ls $nl_dir/* 2> /dev/null`" ]; then
+		return 1
+	fi
+
+	if [ -d "/var/lib/nfs/statd/sm" ]; then
+		owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+ 		cp -af $nl_dir/sm/* /var/lib/nfs/statd/sm
+		chown -R $owner $nl_dir
+		return 0
+	elif [ -d "/var/lib/nfs/sm" ]; then
+		owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}')
+ 		cp -af $nl_dir/sm/* /var/lib/nfs/sm
+		chown -R $owner $nl_dir
+		return 0
+	fi
+
+	return 1
+}
+
--- cluster/rgmanager/src/resources/Makefile	2005/12/06 18:37:04	1.4.2.3.6.2
+++ cluster/rgmanager/src/resources/Makefile	2006/06/16 20:07:46	1.4.2.3.6.3
@@ -20,7 +20,7 @@
 RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
 	script.sh netfs.sh clusterfs.sh smb.sh
 
-TARGETS=${RESOURCES} ocf-shellfuncs
+TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock
 
 all:
 
--- cluster/rgmanager/src/resources/clusterfs.sh	2005/12/07 20:14:29	1.1.2.3.4.4
+++ cluster/rgmanager/src/resources/clusterfs.sh	2006/06/16 20:07:46	1.1.2.3.4.5
@@ -37,7 +37,16 @@
 YES=0
 NO=1
 YES_STR="yes"
-INVALIDATEBUFFERS="/bin/true"
+
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+	. $(dirname $0)/svclib_nfslock
+	NFS_TRICKS=0
+else
+	unset OCF_RESKEY_nfslock
+fi
+
 
 . $(dirname $0)/ocf-shellfuncs
 
@@ -135,6 +144,18 @@
 	    <content type="string"/>
 	</parameter>
 
+	<parameter name="nfslock" inherit="service%nfslock">
+	    <longdesc lang="en">
+	        If set, the node will try to kill lockd and issue 
+		reclaims across all remaining network interface cards.
+		This happens always, regardless of unmounting failed.
+	    </longdesc>
+	    <shortdesc lang="en">
+	        Enable NFS lock workarounds
+	    </shortdesc>
+	    <content type="boolean"/>
+	</parameter>
+
     </parameters>
 
     <actions>
@@ -774,6 +795,23 @@
 		esac
 	fi
 
+	#
+	# Always do this hackery on clustered file systems.
+	#
+	if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
+		ocf_log warning "Dropping node-wide NFS locks"
+		mkdir -p $mp/.clumanager/statd
+		# Copy out the notify list; our 
+			# IPs are already torn down
+			if notify_list_store $mp/.clumanager/statd; then
+				notify_list_broadcast $mp/.clumanager/statd
+			fi
+	fi
+
+	# Always invalidate buffers on clusterfs resources
+	clubufflush -f $dev
+
 	if [ -z "$force_umount" ]; then
 		ocf_log debug "Not umounting $dev (clustered file system)"
 		return $SUCCESS
@@ -782,7 +820,6 @@
 	#
 	# Unmount the device.  
 	#
-
 	while [ ! "$done" ]; do
 		isMounted $dev $mp
 		case $? in
--- cluster/rgmanager/src/resources/fs.sh	2005/12/07 20:14:29	1.4.2.6.4.4
+++ cluster/rgmanager/src/resources/fs.sh	2006/06/16 20:07:46	1.4.2.6.4.5
@@ -39,6 +39,13 @@
 YES_STR="yes"
 INVALIDATEBUFFERS="/bin/true"
 
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+	. $(dirname $0)/svclib_nfslock
+	NFS_TRICKS=0
+fi
+
 . $(dirname $0)/ocf-shellfuncs
 
 meta_data()
@@ -126,7 +133,6 @@
         </parameter>
 	-->
 
-
 	<parameter name="self_fence">
 	    <longdesc lang="en">
 	        If set and unmounting the file system fails, the node will
@@ -139,6 +145,18 @@
 	    <content type="boolean"/>
 	</parameter>
 
+	<parameter name="nfslock" inherit="service%nfslock">
+	    <longdesc lang="en">
+	        If set and unmounting the file system fails, the node will
+		try to kill lockd and issue reclaims across all remaining
+		network interface cards.
+	    </longdesc>
+	    <shortdesc lang="en">
+	        Enable NFS lock workarounds
+	    </shortdesc>
+	    <content type="boolean"/>
+	</parameter>
+
 	<parameter name="fsid">
 	    <longdesc lang="en">
 	    	File system ID for NFS exports.  This can be overridden
@@ -316,6 +334,7 @@
 verify_options()
 {
 	declare -i ret=$OCF_SUCCESS
+	declare o
 
 	#
 	# From mount(8)
@@ -762,6 +781,63 @@
 }
 
 
+#
+# Enable quotas on the mount point if the user requested them
+#
+enable_fs_quotas()
+{
+	declare -i need_check=0
+	declare quotaopts=""
+	declare mopt
+	declare opts=$1
+	declare mp=$2
+
+	if [ -z "`which quotaon`" ]; then
+		ocf_log err "quotaon not found in $PATH"
+		return 1
+	fi
+
+	for mopt in `echo $opts | sed -e s/,/\ /g`; do
+		case $mopt in
+		usrquota)
+			quotaopts="u$quotaopts"
+			continue
+			;;
+		grpquota)
+			quotaopts="g$quotaopts"
+			continue
+			;;
+		noquota)
+			quotaopts=""
+			return 0
+			;;
+		esac
+	done
+
+	[ -z "$quotaopts" ] && return 0
+
+	# Ok, create quota files if they don't exist
+	for f in quota.user aquota.user quota.group aquota.group; do
+		if ! [ -f "$mp/$f" ]; then
+			ocf_log info "$mp/$f was missing - creating"
+			touch "$mp/$f" 
+			chmod 600 "$mp/$f"
+			need_check=1
+		fi
+	done
+
+	if [ $need_check -eq 1 ]; then
+		ocf_log info "Checking quota info in $mp"
+		quotacheck -$quotaopts $mp
+	fi
+
+	ocf_log info "Enabling Quotas on $mp"
+	ocf_log debug "quotaon -$quotaopts $mp"
+	quotaon -$quotaopts $mp
+
+	return $?
+}
+
 
 #
 # startFilesystem
@@ -958,6 +1034,18 @@
 		return $FAIL
 	fi
 
+	#
+	# Create this for the NFS NLM broadcast bit
+	#
+	if [ $NFS_TRICKS -eq 0 ]; then
+		if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+	   	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
+			mkdir -p $mp/.clumanager/statd
+			notify_list_merge $mp/.clumanager/statd
+		fi
+	fi
+
+	enable_fs_quotas $opts $mp
 	activeMonitor start || return $OCF_ERR_GENERIC
 	
 	return $SUCCESS
@@ -1048,6 +1136,7 @@
 
 			activeMonitor stop || return $OCF_ERR_GENERIC
 
+			quotaoff -gu $mp &> /dev/null
 			umount $mp
 			if  [ $? -eq 0 ]; then
 				umount_failed=
@@ -1059,6 +1148,22 @@
 
 			if [ "$force_umount" ]; then
 				killMountProcesses $mp
+				if [ $try -eq 1 ]; then
+	        		  if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+				     [ "$OCF_RESKEY_nfslock" = "1" ]; then
+				    ocf_log warning \
+					"Dropping node-wide NFS locks"
+				    pkill -KILL -x lockd
+	          		    mkdir -p $mp/.clumanager/statd
+				    # Copy out the notify list; our 
+				    # IPs are already torn down
+				    if notify_list_store $mp/.clumanager/statd
+				    then
+				      notify_list_broadcast \
+				        $mp/.clumanager/statd
+				    fi
+				  fi
+				fi
 			fi
 
 			if [ $try -ge $max_tries ]; then
--- cluster/rgmanager/src/resources/ip.sh	2005/12/07 20:14:29	1.5.2.4.4.9
+++ cluster/rgmanager/src/resources/ip.sh	2006/06/16 20:07:46	1.5.2.4.4.10
@@ -30,6 +30,13 @@
 PATH=/bin:/sbin:/usr/bin:/usr/sbin
 export LC_ALL LANG PATH
 
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+	. $(dirname $0)/svclib_nfslock
+	NFS_TRICKS=0
+fi
+
 . $(dirname $0)/ocf-shellfuncs
 
 
@@ -90,6 +97,19 @@
             </shortdesc>
             <content type="boolean" default="1"/>
         </parameter>
+
+	<parameter name="nfslock" inherit="service%nfslock">
+	    <longdesc lang="en">
+	        If set and unmounting the file system fails, the node will
+		try to kill lockd and issue reclaims across all remaining
+		network interface cards.
+	    </longdesc>
+	    <shortdesc lang="en">
+	        Enable NFS lock workarounds
+	    </shortdesc>
+	    <content type="boolean"/>
+	</parameter>
+
     </parameters>
 
     <actions>
@@ -865,6 +885,13 @@
 	fi
 	ip_op ${OCF_RESKEY_family} add ${OCF_RESKEY_address}
 
+	if [ $NFS_TRICKS -eq 0 ]; then
+		if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+	   	   [ "$OCF_RESKEY_nfslock" = "1" ]; then
+			notify_list_broadcast /var/lib/nfs/statd
+		fi
+	fi
+
 	exit $?
 	;;
 stop)
--- cluster/rgmanager/src/resources/nfsclient.sh	2006/01/27 21:06:57	1.3.2.2.6.4
+++ cluster/rgmanager/src/resources/nfsclient.sh	2006/06/16 20:07:46	1.3.2.2.6.5
@@ -95,6 +95,18 @@
             <content type="string"/>
         </parameter>
 
+        <parameter name="nfslock" inherit="nfsexport%nfslock">
+            <longdesc lang="en">
+	    	This tells us whether the service in question has the
+		NFS lock workarounds enabled.  If so, we always unexport
+		* rather than the specified client.
+            </longdesc>
+            <shortdesc lang="en">
+	    	NFS Lock workaround flag
+            </shortdesc>
+            <content type="boolean"/>
+        </parameter>
+
         <parameter name="options">
             <longdesc lang="en">Defines a list of options for this
                 particular client.  See 'man 5 exports' for a list
@@ -106,6 +118,19 @@
             <content type="string"/>
         </parameter>
 
+        <parameter name="allow_recover">
+            <longdesc lang="en">
+		Allows recovery of this NFS client (default = 1) if it
+		disappears from the export list.  If set to 0, the service
+		will be restarted.  This is useful to help preserve export
+		ordering.
+            </longdesc>
+            <shortdesc lang="en">
+		Allow recovery
+            </shortdesc>
+            <content type="boolean"/>
+        </parameter>
+
     </parameters>
 
     <actions>
@@ -282,6 +307,14 @@
 stop)
 	verify_all || exit $OCF_ERR_ARGS
 
+	if [ "$OCF_RESKEY_nfslock" = "1" ]; then
+		#
+		# If the NFS lock workarounds were enabled, unexport from
+		# the world
+		#
+		export OCF_RESKEY_target="*"
+	fi
+
 	ocf_log info "Removing export: ${OCF_RESKEY_target}:${OCF_RESKEY_path}"
 	exportfs -u "${OCF_RESKEY_target}:${OCF_RESKEY_path}"
 	rv=$?
@@ -299,9 +332,26 @@
 	# * Exports longer than 14 chars have line breaks inserted, which
 	#   broke the way the status check worked.
 	#
-	exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
-		"^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target}"
-	rv=$?
+        # Status check fix from Craig Lewis: 
+        # * Exports with RegExp metacharacters need to be escaped. 
+        #   These metacharacters are: * ? . 
+        # 
+	export OCF_RESKEY_target_regexp=$(echo $OCF_RESKEY_target | \ 
+		sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') 
+        exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \ 
+		"^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}" 
+	rv=$? 
+	;;
+
+recover)
+	if [ "$OCF_RESKEY_allow_recover" = "0" ] || \
+	   [ "$OCF_RESKEY_allow_recover" = "no" ] || \
+	   [ "$OCF_RESKEY_allow_recover" = "false" ]; then
+		exit 1
+	fi
+
+	$0 stop || exit 1
+	$0 start || exit 1
 	;;
 
 restart)
--- cluster/rgmanager/src/resources/nfsexport.sh	2005/12/07 22:53:28	1.4.2.1.6.3
+++ cluster/rgmanager/src/resources/nfsexport.sh	2006/06/16 20:07:46	1.4.2.1.6.4
@@ -97,6 +97,19 @@
             </shortdesc>
 	    <content type="string"/>
         </parameter>
+
+        <parameter name="nfslock" inherit="nfslock">
+            <longdesc lang="en">
+                If you can see this, your GUI is broken.
+		This inherits an unspecified nfslock parameter so that
+		it works with fs or clusterfs resources.
+            </longdesc>
+            <shortdesc lang="en">
+                If you can see this, your GUI is broken.
+            </shortdesc>
+	    <content type="boolean"/>
+        </parameter>
+
     </parameters>
 
     <actions>
--- cluster/rgmanager/src/resources/ocf-shellfuncs	2005/10/17 20:53:12	1.2.2.1
+++ cluster/rgmanager/src/resources/ocf-shellfuncs	2006/06/16 20:07:46	1.2.2.2
@@ -1,5 +1,5 @@
 #
-# 	$Id: ocf-shellfuncs,v 1.2.2.1 2005/10/17 20:53:12 lhh Exp $
+# 	$Id: ocf-shellfuncs,v 1.2.2.2 2006/06/16 20:07:46 lhh Exp $
 #
 # 	Common helper functions for the OCF Resource Agents supplied by
 # 	heartbeat.
--- cluster/rgmanager/src/resources/service.sh	2005/12/06 18:37:04	1.1.2.1.6.2
+++ cluster/rgmanager/src/resources/service.sh	2006/06/16 20:07:46	1.1.2.1.6.3
@@ -5,7 +5,12 @@
 # resources. ;(
 #
 
-
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+	. $(dirname $0)/svclib_nfslock
+	NFS_TRICKS=0
+fi
 
 meta_data()
 {
@@ -89,6 +94,22 @@
             <content type="boolean"/>
         </parameter>
 
+	<parameter name="nfslock">
+	    <longdesc lang="en">
+	    	Enable NFS lock workarounds.  When used with a compatible
+		HA-callout program like clunfslock, this could be used
+		to provide NFS lock failover, but at significant cost to
+		other services on the machine.  This requires a compatible
+		version of nfs-utils and manual configuration of rpc.statd;
+		see 'man rpc.statd' to see if your version supports
+		the -H parameter.
+	    </longdesc>
+	    <shortdesc lang="en">
+	        Enable NFS lock workarounds
+	    </shortdesc>
+	    <content type="boolean"/>
+	</parameter>
+                
         <parameter name="recovery">
             <longdesc lang="en">
 	        This currently has three possible options: "restart" tries
@@ -144,6 +165,17 @@
 #
 case $1 in
 	start)
+		#
+		# XXX If this is set, we kill lockd.  If there is no
+		# child IP address, then clients will NOT get the reclaim
+		# notification.
+		#
+		if [ $NFS_TRICKS -eq 0 ]; then
+			if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+	   		   [ "$OCF_RESKEY_nfslock" = "1" ]; then
+				pkill -KILL -x lockd
+			fi
+		fi
 		exit 0
 		;;
 	stop)
--- cluster/rgmanager/src/utils/Makefile	2005/10/17 20:30:45	1.3.2.1.6.2
+++ cluster/rgmanager/src/utils/Makefile	2006/06/16 20:07:47	1.3.2.1.6.3
@@ -23,7 +23,7 @@
 CFLAGS+= -L${libdir} -DPACKAGE_VERSION=\"${RELEASE}\"
 
 LDFLAGS+= -lmagmamsg -lmagma -lpthread -ldl -lncurses -L../clulib -lclulib -lccs
-TARGETS=clubufflush clufindhostname clustat clusvcadm clulog
+TARGETS=clubufflush clufindhostname clustat clusvcadm clulog clunfslock
 
 all: ${TARGETS}
 
@@ -52,6 +52,10 @@
 clusvcadm: clusvcadm.o
 	$(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS)
 
+clunfslock: clunfslock.sh
+	cp clunfslock.sh clunfslock
+	chmod 755 clunfslock
+
 clean:
 	rm -f *.o $(TARGETS)
 
--- cluster/rgmanager/src/utils/clustat.c	2006/01/20 16:27:30	1.5.2.3.6.7
+++ cluster/rgmanager/src/utils/clustat.c	2006/06/16 20:07:47	1.5.2.3.6.8
@@ -18,6 +18,12 @@
 #define FLAG_RGMGR 0x4
 #define FLAG_NOCFG 0x8	/* Shouldn't happen */
 
+#define RG_VERBOSE 0x1
+
+#define QSTAT_ONLY 1
+#define VERSION_ONLY 2
+#define NODEID_ONLY 3
+
 
 int running = 1;
 
@@ -35,7 +41,7 @@
 
 
 rg_state_list_t *
-rg_state_list(uint64_t local_node_id)
+rg_state_list(uint64_t local_node_id, int fast)
 {
 	int fd, n, x;
 	rg_state_list_t *rsl = NULL;
@@ -49,7 +55,7 @@
 		return NULL;
 	}
 
-	msg_send_simple(fd, RG_STATUS, 0, 0);
+	msg_send_simple(fd, RG_STATUS, fast, 0);
 
 	rsl = malloc(sizeof(rg_state_list_t));
 	if (!rsl) {
@@ -70,8 +76,10 @@
 				"from Resource Group Manager\n");
 			break;
 		}
+
 		if (n < 0) {
-			if (errno == EINTR)
+			if (errno == EAGAIN ||
+			    errno == EINTR)
 				continue;
 			fprintf(stderr, "Failed to receive "
 				"service data: select: %s\n",
@@ -80,8 +88,16 @@
 		}
 
 		n = msg_receive_simple(fd, &msgp, tv.tv_sec);
-	        if (n < sizeof(generic_msg_hdr))
+		if (n < 0) {
+			if (errno == EAGAIN)
+				continue;
+			perror("msg_receive_simple");
+			break;
+		}
+	        if (n < sizeof(generic_msg_hdr)) {
+			printf("Error: Malformed message\n");
 			break;
+		}
 
 		if (!msgp) {
 			printf("Error: no message?!\n");
@@ -99,6 +115,7 @@
 			return NULL;
 		}
 
+
 		rsmp = (rg_state_msg_t *)msgp;
 
 		swab_rg_state_t(&rsmp->rsm_state);
@@ -119,6 +136,7 @@
 		msgp = NULL;
 	}
 
+	msg_send_simple(fd, RG_SUCCESS, 0, 0);
 	msg_close(fd);
 
 	if (!rsl->rgl_count) {
@@ -260,8 +278,9 @@
 	return "unknown";
 }
 
+
 void
-txt_rg_state(rg_state_t *rs, cluster_member_list_t *members)
+_txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
 	char owner[31];
 
@@ -286,39 +305,90 @@
 
 
 void
-xml_rg_state(rg_state_t *rs, cluster_member_list_t *members)
+_txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags)
+{
+	printf("Service Name      : %s\n", rs->rs_name);
+	printf("  Current State   : %s (%d)\n",
+	       rg_state_str(rs->rs_state), rs->rs_state);
+	printf("  Owner           : %s\n",
+	       my_memb_id_to_name(members, rs->rs_owner));
+	printf("  Last Owner      : %s\n",
+	       my_memb_id_to_name(members, rs->rs_last_owner));
+	printf("  Last Transition : %s\n",
+	       ctime((time_t *)(&rs->rs_transition)));
+}
+
+
+void
+txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
+	if (flags & RG_VERBOSE) 
+		_txt_rg_state_v(rs, members, flags);
+	else
+		_txt_rg_state(rs, members, flags);
+}
+
+
+void
+xml_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
+{
+	char time_str[32];
+	int x;
+
+	/* Chop off newlines */
+	ctime_r((time_t *)&rs->rs_transition, time_str);
+	for (x = 0; time_str[x]; x++) {
+		if (time_str[x] < 32) {
+			time_str[x] = 0;
+			break;
+		}
+	}
+
 	printf("    <group name=\"%s\" state=\"%d\" state_str=\"%s\" "
-	       " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\"/>\n",
+	       " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\""
+	       " last_transition=\"%llu\" last_transition_str=\"%s\"/>\n",
 	       rs->rs_name,
 	       rs->rs_state,
 	       rg_state_str(rs->rs_state),
 	       my_memb_id_to_name(members, rs->rs_owner),
 	       my_memb_id_to_name(members, rs->rs_last_owner),
-	       rs->rs_restarts);
+	       rs->rs_restarts,
+	       (long long unsigned)rs->rs_transition,
+	       time_str);
 }
 
 
 void
-txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members)
+txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members, 
+	      char *svcname, int flags)
 {
 	int x;
 
 	if (!rgl || !members)
 		return;
 
-	printf("  %-20.20s %-30.30s %-14.14s\n",
-	       "Service Name", "Owner (Last)", "State");
-	printf("  %-20.20s %-30.30s %-14.14s\n",
-	       "------- ----", "----- ------", "-----");
+	if (!(flags & RG_VERBOSE)) {
+		printf("  %-20.20s %-30.30s %-14.14s\n",
+		       "Service Name", "Owner (Last)", "State");
+		printf("  %-20.20s %-30.30s %-14.14s\n",
+		       "------- ----", "----- ------", "-----");
+	} else {
+		printf("Service Information\n"
+		       "------- -----------\n\n");
+	}
 
-	for (x = 0; x < rgl->rgl_count; x++)
-		txt_rg_state(&rgl->rgl_states[x], members);
+	for (x = 0; x < rgl->rgl_count; x++) {
+		if (svcname &&
+		    strcmp(rgl->rgl_states[x].rs_name, svcname))
+			continue;
+		txt_rg_state(&rgl->rgl_states[x], members, flags);
+	}
 }
 
 
 void
-xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members)
+xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members,
+	      char *svcname)
 {
 	int x;
 
@@ -327,8 +397,12 @@
 
 	printf("  <groups>\n");
 
-	for (x = 0; x < rgl->rgl_count; x++)
-		xml_rg_state(&rgl->rgl_states[x], members);
+	for (x = 0; x < rgl->rgl_count; x++) {
+		if (svcname &&
+		    strcmp(rgl->rgl_states[x].rs_name, svcname))
+			continue;
+		xml_rg_state(&rgl->rgl_states[x], members, 0);
+	}
 
 	printf("  </groups>\n");
 }
@@ -408,22 +482,25 @@
 
 
 void
-txt_member_states(cluster_member_list_t *membership)
+txt_member_states(cluster_member_list_t *membership, char *name)
 {
 	int x;
 
 	printf("  %-40.40s %s\n", "Member Name", "Status");
 	printf("  %-40.40s %s\n", "------ ----", "------");
 
-	for (x = 0; x < membership->cml_count; x++)
+	for (x = 0; x < membership->cml_count; x++) {
+		if (name && strcmp(membership->cml_members[x].cm_name, name))
+			continue;
 		txt_member_state(&membership->cml_members[x]);
+	}
 
 	printf("\n");
 }
 
 
 void
-xml_member_states(cluster_member_list_t *membership)
+xml_member_states(cluster_member_list_t *membership, char *name)
 {
 	int x;
 
@@ -431,38 +508,50 @@
 		return;
 
 	printf("  <nodes>\n");
-	for (x = 0; x < membership->cml_count; x++)
+	for (x = 0; x < membership->cml_count; x++) {
+		if (name && strcmp(membership->cml_members[x].cm_name, name))
+			continue;
 		xml_member_state(&membership->cml_members[x]);
+	}
 	printf("  </nodes>\n");
 }
 
 
 void
 txt_cluster_status(int qs, cluster_member_list_t *membership,
-		   rg_state_list_t *rgs)
+		   rg_state_list_t *rgs, char *name, char *svcname, 
+		   int flags)
 {
-	txt_quorum_state(qs);
-
-	if (!membership || !(qs & QF_GROUPMEMBER)) {
-		printf("Resource Group Manager not running; no service "
-		       "information available.\n\n");
+	if (!svcname && !name) {
+		txt_quorum_state(qs);
+		if (!membership || !(qs & QF_GROUPMEMBER)) {
+			printf("Resource Group Manager not running; "
+			       "no service information available.\n\n");
+		}
 	}
 
-	txt_member_states(membership);
-	txt_rg_states(rgs, membership);
+	if (!svcname || (name && svcname))
+		txt_member_states(membership, name);
+	if (!name || (name && svcname))
+		txt_rg_states(rgs, membership, svcname, flags);
 }
 
 
 void
 xml_cluster_status(int qs, cluster_member_list_t *membership,
-		   rg_state_list_t *rgs)
+		   rg_state_list_t *rgs, char *name, char *svcname,
+		   int flags)
 {
 	printf("<?xml version=\"1.0\"?>\n");
-	printf("<clustat version=\"4.1\">\n");
-	xml_quorum_state(qs);
-	xml_member_states(membership);
-	if (rgs)
-		xml_rg_states(rgs, membership);
+	printf("<clustat version=\"4.1.1\">\n");
+
+	if (!svcname && !name)
+		xml_quorum_state(qs);
+	if (!svcname || (name && svcname)) 
+		xml_member_states(membership, name);
+	if (rgs &&
+	    (!name || (name && svcname)))
+		xml_rg_states(rgs, membership, svcname);
 	printf("</clustat>\n");
 }
 
@@ -545,9 +634,12 @@
 "                       with -x.\n"
 "    -I                 Display local node ID and exit\n"
 "    -m <member>        Display status of <member> and exit\n"
-"    -s <service>       Display statis of <service> and exit\n"
+"    -s <service>       Display status of <service> and exit\n"
 "    -v                 Display version & cluster plugin and exit\n"
 "    -x                 Dump information as XML\n"
+"    -Q			Return 0 if quorate, 1 if not (no output)\n"
+"    -f			Enable fast clustat reports\n"
+"    -l			Use long format for services\n"
 "\n", basename(arg0));
 }
 
@@ -559,37 +651,32 @@
 	cluster_member_list_t *membership;
 	rg_state_list_t *rgs = NULL;
 	uint64_t local_node_id;
+	int fast = 0;
+	int runtype = 0;
 
 	int refresh_sec = 0, errors = 0;
-	int opt, xml = 0;
-	char *member_name;
-	char *rg_name;
+	int opt, xml = 0, flags = 0;
+	char *member_name = NULL;
+	char *rg_name = NULL;
 
-	/* Connect & grab all our info */
-	fd = clu_connect(RG_SERVICE_GROUP, 0);
-	if (fd < 0) {
-		printf("Could not connect to cluster service\n");
-		return 1;
-	}
-	
-	while ((opt = getopt(argc, argv, "Is:m:i:xvQh?")) != EOF) {
+	while ((opt = getopt(argc, argv, "fIls:m:i:xvQh?")) != EOF) {
 		switch(opt) {
 		case 'v':
-			printf("%s version %s\n", basename(argv[0]),
-			       PACKAGE_VERSION);
-			printf("Connected via: %s\n", clu_plugin_version());
-			goto cleanup;
+			runtype = VERSION_ONLY;
+			break;
 
 		case 'I':
-			printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32),
-			       (uint32_t)(local_node_id&0xffffffff)); 
-			goto cleanup;
+			runtype = NODEID_ONLY;
+			break;
 
 		case 'i':
 			refresh_sec = atoi(optarg);
 			if (refresh_sec <= 0)
 				refresh_sec = 1;
 			break;
+		case 'l':
+			flags |= RG_VERBOSE;
+			break;
 
 		case 'm':
 			member_name = optarg;
@@ -597,9 +684,8 @@
 
 		case 'Q':
 			/* Return to shell: 0 true, 1 false... */
-			ret = !(clu_quorum_status(RG_SERVICE_GROUP) &
-				QF_QUORATE);
-			goto cleanup;
+			runtype = QSTAT_ONLY;
+			break;
 
 		case 's':
 			rg_name = optarg;
@@ -615,6 +701,9 @@
 
 			xml = 1;
 			break;
+		case 'f':
+			++fast;
+			break;
 		case '?':
 		case 'h':
 			usage(argv[0]);
@@ -631,6 +720,37 @@
 		return 1;
 	}
 
+	/* Connect & grab all our info */
+	fd = clu_connect(RG_SERVICE_GROUP, 0);
+
+	switch(runtype) {
+	case QSTAT_ONLY:
+		if (fd < 0)
+		       break;
+		ret = !(clu_quorum_status(RG_SERVICE_GROUP) &
+			QF_QUORATE);
+		goto cleanup;
+	case VERSION_ONLY:
+		printf("%s version %s\n", basename(argv[0]),
+		       PACKAGE_VERSION);
+		if (fd < 0)
+		       break;
+		printf("Connected via: %s\n", clu_plugin_version());
+		goto cleanup;
+	case NODEID_ONLY:
+		if (fd < 0)
+		       break;
+		clu_local_nodeid(NULL, &local_node_id);
+		printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32),
+		       (uint32_t)(local_node_id&0xffffffff)); 
+		goto cleanup;
+	}
+
+	if (fd < 0) {
+		printf("Could not connect to cluster service\n");
+		return 1;
+	}
+
 	/* XXX add member/rg single-shot state */
 	signal(SIGINT, term_handler);
 	signal(SIGTERM, term_handler);
@@ -639,7 +759,7 @@
 		qs = clu_quorum_status(RG_SERVICE_GROUP);
 		membership = build_member_list(&local_node_id);
 		
-		rgs = rg_state_list(local_node_id);
+		rgs = rg_state_list(local_node_id, fast);
 
 		if (refresh_sec) {
 			setupterm((char *) 0, STDOUT_FILENO, (int *) 0);
@@ -647,9 +767,11 @@
 		}
 
 		if (xml)
-			xml_cluster_status(qs, membership, rgs);
+			xml_cluster_status(qs, membership, rgs, member_name,
+					   rg_name,flags);
 		else
-			txt_cluster_status(qs, membership, rgs);
+			txt_cluster_status(qs, membership, rgs, member_name,
+					   rg_name,flags);
 
 		if (membership)
 			cml_free(membership);
--- cluster/rgmanager/src/utils/clusvcadm.c	2005/07/28 21:19:51	1.2.2.3.6.3
+++ cluster/rgmanager/src/utils/clusvcadm.c	2006/06/16 20:07:47	1.2.2.3.6.4
@@ -52,11 +52,107 @@
 }
 
 
+int
+do_lock_req(int req)
+{
+	int cfd = -1;
+	int fd = -1;
+	int ret = RG_FAIL;
+	cluster_member_list_t *membership = NULL;
+	uint64_t me;
+	generic_msg_hdr hdr;
+
+	fd = clu_connect(RG_SERVICE_GROUP, 0);
+	if (fd < 0) {
+		printf("Could not connect to cluster service\n");
+		goto out;
+	}
+
+	membership = clu_member_list(RG_SERVICE_GROUP);
+	msg_update(membership);
+	clu_local_nodeid(RG_SERVICE_GROUP, &me);
+
+	fd = msg_open(me, RG_PORT, 0, 5);
+	if (fd < 0) {
+		printf("Could not connect to resource group manager\n");
+		goto out;
+	}
+
+	if (msg_send_simple(fd, req, 0, 0) < 0) {
+		printf("Communication failed\n");
+		goto out;
+	}
+
+	if (msg_receive_timeout(fd, &hdr, sizeof(hdr), 5) < sizeof(hdr)) {
+		printf("Receive failed\n");
+		goto out;
+	}
+
+	swab_generic_msg_hdr(&hdr);
+	ret = hdr.gh_command;
+
+out:
+	if (membership)
+		cml_free(membership);
+
+	if (fd >= 0)
+		msg_close(fd);
+
+	if (cfd >= 0)
+		clu_disconnect(cfd);
+
+	return ret;
+}
+
+
+int
+do_lock(void)
+{
+	if (do_lock_req(RG_LOCK) != RG_SUCCESS) {
+		printf("Lock operation failed\n");
+		return 1;
+	}
+	printf("Resource groups locked\n");
+	return 0;
+}
+
+
+int
+do_unlock(void)
+{
+	if (do_lock_req(RG_UNLOCK) != RG_SUCCESS) {
+		printf("Unlock operation failed\n");
+		return 1;
+	}
+	printf("Resource groups unlocked\n");
+	return 0;
+}
+
+
+int
+do_query_lock(void)
+{
+	switch(do_lock_req(RG_QUERY_LOCK)) {
+	case RG_LOCK:
+		printf("Resource groups locked\n");
+		break;
+	case RG_UNLOCK:
+		printf("Resource groups unlocked\n");
+		break;
+	default:
+		printf("Query operation failed\n");
+		return 1;
+	}
+	return 0;
+}
+
 
 void
 usage(char *name)
 {
-printf("usage: %s -d <group>             Disable <group>\n", name);
+printf("Resource Group Control Commands:\n");
+printf("       %s -v                     Display version and exit\n",name);
+printf("       %s -d <group>             Disable <group>\n", name);
 printf("       %s -e <group>             Enable <group>\n",
        name);
 printf("       %s -e <group> -m <member> Enable <group>"
@@ -67,7 +163,16 @@
 printf("       %s -R <group>             Restart a group in place.\n",
        name);
 printf("       %s -s <group>             Stop <group>\n", name);
-printf("       %s -v                     Display version and exit\n",name);
+printf("\n");
+printf("Resource Group Locking (for cluster Shutdown / Debugging):\n");
+printf("       %s -l                     Lock local resource group manager.\n"
+       "                                 This prevents resource groups from\n"
+       "                                 starting on the local node.\n",
+       name);
+printf("       %s -S                     Show lock state\n", name);
+printf("       %s -u                     Unlock local resource group manager.\n"
+       "                                 This allows resource groups to start\n"
+       "                                 on the local node.\n", name);
 }
 
 
@@ -90,8 +195,17 @@
 		return 1;
 	}
 
-	while ((opt = getopt(argc, argv, "e:d:r:n:m:vR:s:S:qh?")) != EOF) {
+	while ((opt = getopt(argc, argv, "lSue:d:r:n:m:vR:s:qh?")) != EOF) {
 		switch (opt) {
+		case 'l':
+			return do_lock();
+
+		case 'S':
+			return do_query_lock();
+
+		case 'u':
+			return do_unlock();
+
 		case 'e':
 			/* ENABLE */
 			actionstr = "trying to enable";