cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-05-31 19:08 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-05-31 19:08 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2007-05-31 19:08:14

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c resrules.c restree.c 
	rgmanager/src/resources: script.sh 

Log message:
	Fix 234249, 229650

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.43&r2=1.44
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.32&r2=1.33
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.30&r2=1.31
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.9&r2=1.10

--- cluster/rgmanager/ChangeLog	2007/05/22 17:01:08	1.43
+++ cluster/rgmanager/ChangeLog	2007/05/31 19:08:13	1.44
@@ -1,3 +1,9 @@
+2007-05-31 Lon Hohberger <lhh@redhat.com>
+	* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
+	in /usr/share/cluster when processing resource rules
+	* src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: 
+	Implement independent subtrees, per bug #229650
+
 2007-05-22 Lon Hohberger <lhh@redhat.com>
 	* src/resources/SAPInstance, SAPDatabase: Add primary attrs
 
--- cluster/rgmanager/include/reslist.h	2007/03/22 23:46:58	1.19
+++ cluster/rgmanager/include/reslist.h	2007/05/31 19:08:14	1.20
@@ -35,6 +35,8 @@
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
 #define RF_NEEDSTOP	(1<<3)  /** Used when deleting/changing resources */
 #define RF_COMMON	(1<<4)	/** " */
+#define RF_INDEPENDENT	(1<<5)  /** Define this for a resource if it is
+				  otherwise an independent subtree */
 
 #define RES_STOPPED	(0)
 #define RES_STARTED	(1)
--- cluster/rgmanager/src/daemons/groups.c	2007/04/27 18:10:10	1.32
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/31 19:08:14	1.33
@@ -816,6 +816,7 @@
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
+#if 0
 	/*
 	   Do NOT return error codes if we failed to stop for one of these
 	   reasons.  It didn't start, either, so it's safe to assume that
@@ -833,6 +834,7 @@
 			break;
 		}
 	}
+#endif
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/resrules.c	2007/04/04 19:22:29	1.21
+++ cluster/rgmanager/src/daemons/resrules.c	2007/05/31 19:08:14	1.22
@@ -1025,7 +1025,7 @@
 {
 	DIR *dir;
 	struct dirent *de;
-	char *fn;//, *dot;
+	char *fn, *dot;
 	char path[2048];
 	struct stat st_buf;
 
@@ -1040,10 +1040,23 @@
 		if (!fn)
 			continue;
 		
+		/* Ignore files with common backup extension */
 		if ((fn != NULL) && (strlen(fn) > 0) && 
 			(fn[strlen(fn)-1] == '~')) 
 			continue;
 
+ 		dot = strrchr(fn, '.');
+ 		if (dot) {
+ 			/* Ignore RPM installed save files, patches,
+ 			   diffs, etc. */
+ 			if (!strncasecmp(dot, ".rpm", 4)) {
+ 				fprintf(stderr, "Warning: "
+ 					"Ignoring %s/%s: Bad extension %s\n",
+ 					rpath, de->d_name, dot);
+ 				continue;
+ 			}
+ 		}
+
 		snprintf(path, sizeof(path), "%s/%s",
 			 rpath, de->d_name);
 		
@@ -1053,8 +1066,10 @@
 		if (S_ISDIR(st_buf.st_mode))
 			continue;
 		
-		if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP))
-			load_resource_rulefile(path, rules);
+  		if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP)) {
+  			printf("Loading resource rule from %s\n", path);
+   			load_resource_rulefile(path, rules);
+  		}
 	}
 	xmlCleanupParser();
 
--- cluster/rgmanager/src/daemons/restree.c	2007/05/03 15:15:17	1.30
+++ cluster/rgmanager/src/daemons/restree.c	2007/05/31 19:08:14	1.31
@@ -39,6 +39,9 @@
 void malloc_zap_mutex(void);
 #endif
 
+#define FL_FAILURE	0x1
+#define FL_RECOVERABLE	0x2
+
 
 /* XXX from resrules.c */
 int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
 	node->rn_resource = curres;
 	node->rn_state = RES_STOPPED;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+	if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+	if (conf_get(tok, &ref) == 0) {
+#endif
+		if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+			node->rn_flags |= RF_INDEPENDENT;
+		free(ref);
+	}
+
+
 	curres->r_refs++;
 
 	*newnode = node;
@@ -777,6 +793,8 @@
 				printf("NEEDSTART ");
 			if (node->rn_flags & RF_COMMON)
 				printf("COMMON ");
+			if (node->rn_flags & RF_INDEPENDENT)
+				printf("INDEPENDENT ");
 			printf("]");
 		}
 		printf(" {\n");
@@ -841,10 +859,11 @@
 #endif
 
 			/* Do op on all children at our level */
-			rv += _res_op(&node->rn_child, first,
+			rv |= _res_op(&node->rn_child, first,
 			     	     rule->rr_childtypes[x].rc_name, 
 		     		     ret, op);
-			if (rv != 0 && op != RS_STOP)
+
+			if (rv & FL_FAILURE && op != RS_STOP)
 				return rv;
 		}
 
@@ -856,46 +875,6 @@
 }
 
 
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
-			void *ret, int op)
-{
-	resource_node_t *node = *tree;
-	resource_t *res = node->rn_resource;
-	resource_rule_t *rule = res->r_rule;
-	int x, rv = 0, lev;
-
-	for (x = 0; rule->rr_childtypes &&
-	     rule->rr_childtypes[x].rc_name; x++) {
-
-		if(op == RS_STOP)
-			lev = rule->rr_childtypes[x].rc_stoplevel;
-		else
-			lev = rule->rr_childtypes[x].rc_startlevel;
-
-		if (lev)
-			continue;
-
-		/*
-		printf("%s children of %s type %s (default level)\n",
-		       agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       rule->rr_childtypes[x].rc_name);
-		 */
-
-		rv = _res_op(&node->rn_child, first,
-			     rule->rr_childtypes[x].rc_name, 
-			     ret, op);
-		if (rv != 0)
-			return rv;
-	}
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _xx_child_internal(resource_node_t *node, resource_t *first,
 		   resource_node_t *child, void *ret, int op)
@@ -929,13 +908,14 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		list_for(&node->rn_child, child, y) {
-			rv = _xx_child_internal(node, first, child, ret, op);
-			if (rv)
+			rv |= _xx_child_internal(node, first, child, ret, op);
+
+			if (rv & FL_FAILURE)
 				return rv;
 		}
 	} else {
 		list_for_rev(&node->rn_child, child, y) {
-			rv += _xx_child_internal(node, first, child, ret, op);
+			rv |= _xx_child_internal(node, first, child, ret, op);
 		}
 	}
 
@@ -976,7 +956,7 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		rv =  _do_child_levels(tree, first, ret, op);
-	       	if (rv != 0)
+	       	if (rv & FL_FAILURE)
 			return rv;
 
 		/* Start default level after specified ones */
@@ -995,6 +975,22 @@
 }
 
 
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+	int x;
+	resource_node_t *child;
+
+	list_for(&node->rn_child, child, x) {
+		if (child->rn_child)
+			mark_nodes(child->rn_child, state, flags);
+	}
+
+	node->rn_state = state;
+	node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
 /**
    Do a status on a resource node.  This takes into account the last time the
    status operation was run and selects the highest possible resource depth
@@ -1223,7 +1219,7 @@
 		rv = res_exec(node, agent_op_str(op), NULL, 0);
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		set_time("start", 0, node);
@@ -1236,14 +1232,43 @@
 	} else if (me && (op == RS_STATUS)) {
 		/* Check status before children*/
 		rv = do_status(node);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			/*
+			   If this node's status has failed, all of its
+			   dependent children are failed, whether or not this
+			   node is independent or not.
+			 */
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If we're an independent subtree, return a flag
+			   stating that this section is recoverable apart
+			   from siblings in the resource tree.  All child
+			   resources of this node must be restarted,
+			   but siblings of this node are not affected. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
+
 	}
 
 	if (node->rn_child) {
 		rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If this node is independent of its siblings,
+			   that one of its dependent children failed
+			   does not matter: its dependent children must
+			   also be independent of this node's siblings. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
 	}
 
 	/* Stop should occur after children have stopped */
@@ -1253,7 +1278,7 @@
 
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		if (node->rn_state != RES_STOPPED) {
@@ -1292,24 +1317,31 @@
 	char *type, void * __attribute__((unused))ret, int realop)
 {
   	resource_node_t *node;
- 	int count = 0, rv;
+ 	int count = 0, rv = 0;
  	
  	if (realop == RS_STOP) {
  		list_for_rev(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
- 				return rv;
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
  		}
  	} else {
  		list_for(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
+
+			/* If we hit a problem during a 'status' op in an
+			   independent subtree, rv will have the
+			   FL_RECOVERABLE bit set, but not FL_FAILURE.
+			   If we ever hit FL_FAILURE during a status
+			   operation, we're *DONE* - even if the subtree
+			   is flagged w/ indy-subtree */
+			  
+ 			if (rv & FL_FAILURE) 
  				return rv;
  		}
  	}
-	return 0;
+
+	return rv;
 }
 
 /**
@@ -1378,7 +1410,30 @@
 int
 res_status(resource_node_t **tree, resource_t *res, void *ret)
 {
-	return _res_op(tree, res, NULL, ret, RS_STATUS);
+	int rv;
+	rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+	if (rv & FL_FAILURE)
+		return rv;
+
+	clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+	       "Attempting inline recovery\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+
+	rv = res_condstop(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+	rv = res_condstart(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+
+	clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 0;
+out_fail:
+	clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 1;
 }
 
 
--- cluster/rgmanager/src/resources/script.sh	2007/04/05 15:08:20	1.9
+++ cluster/rgmanager/src/resources/script.sh	2007/05/31 19:08:14	1.10
@@ -118,5 +118,5 @@
 declare -i rv=$?
 if [ $rv -ne 0 ]; then
 	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
-	return $OCF_ERR_GENERIC
+	exit $OCF_ERR_GENERIC
 fi



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-11-26 21:46 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-11-26 21:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-11-26 21:46:27

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c 
	                       reslist.c resrules.c restree.c rg_state.c 
	                       test.c 
	rgmanager/src/resources: service.sh vm.sh 
Added files:
	rgmanager/include: restart_counter.h 
	rgmanager/src/daemons: restart_counter.c 

Log message:
	Implement restart counters per #247139

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.28&r2=1.31.2.29
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/restart_counter.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.6&r2=1.15.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restart_counter.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14.2.3&r2=1.14.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.11&r2=1.11.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.12&r2=1.25.2.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.9&r2=1.34.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/reslist.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14.2.4&r2=1.14.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.7&r2=1.16.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.12&r2=1.23.2.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.13&r2=1.24.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6.2.5&r2=1.6.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.6&r2=1.7.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.8&r2=1.1.2.9

--- cluster/rgmanager/ChangeLog	2007/11/26 21:37:17	1.31.2.28
+++ cluster/rgmanager/ChangeLog	2007/11/26 21:46:26	1.31.2.29
@@ -1,3 +1,21 @@
+2007-11-26 Lon Hohberger <lhh@redhat.com>
+	* include/reslist.h: Add restart counters to resource node structure
+	(intended for top-level resources, i.e. services, vms...)
+	* include/restart_counter.h: Add header file for restart counter
+	* src/daemons/Makefile: Fix build to include restart counters
+	* src/daemons/restart_counter.c: Implement restart counters #247139
+	* src/daemons/fo_domain.c, groups.c, restart_counter.c, resrules.c,
+	restree.c, test.c: Glue for restart counters.
+	* src/daemons/reslist.c: Glue for restart counters.  Make expand_time
+	parser more robust to allow things like '1h30m' as a time value.
+	* src/daemons/main.c: Mark quorum disk offline in the correct
+	place to avoid extraneous log messages
+	* src/daemons/rg_state.c: Allow marking service as stopped if
+	stuck in recover state.  Make service which failed to start
+	go to stopped state.  Glue for restart counters.
+	* src/resources/service.sh, vm.sh: Add parameters for restart
+	counters #247139
+
 2007-11-14 Lon Hohberger <lhh@redhat.com>
 	* src/utils/clulog.c: Make clulog honor rgmanager log levels
 	(#289501)
--- cluster/rgmanager/include/reslist.h	2007/08/02 14:46:51	1.15.2.6
+++ cluster/rgmanager/include/reslist.h	2007/11/26 21:46:26	1.15.2.7
@@ -126,6 +126,7 @@
 	struct _rg_node	*rn_child, *rn_parent;
 	resource_t	*rn_resource;
 	resource_act_t	*rn_actions;
+	restart_counter_t rn_restart_counter;
 	int	rn_state; /* State of this instance of rn_resource */
 	int	rn_flags;
 	int	rn_last_status;
--- cluster/rgmanager/src/daemons/Makefile	2007/07/24 13:53:08	1.14.2.3
+++ cluster/rgmanager/src/daemons/Makefile	2007/11/26 21:46:27	1.14.2.4
@@ -38,7 +38,8 @@
 clurgmgrd: rg_thread.o rg_locks.o main.o groups.o  \
 		rg_queue.o rg_forward.o reslist.o \
 		resrules.o restree.o fo_domain.o nodeevent.o \
-		rg_event.o watchdog.o rg_state.o ../clulib/libclulib.a
+		rg_event.o watchdog.o rg_state.o \
+		restart_counter.o ../clulib/libclulib.a
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm
 
 #
@@ -56,7 +57,8 @@
 # packages should run 'make check' as part of the build process.
 #
 rg_test: rg_locks-noccs.o test-noccs.o reslist-noccs.o \
-		resrules-noccs.o restree-noccs.o fo_domain-noccs.o
+		resrules-noccs.o restree-noccs.o fo_domain-noccs.o \
+		restart_counter.o 
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) -llalloc $(LDFLAGS) -lccs -lcman
 
 clurmtabd: clurmtabd.o clurmtabd_lib.o
--- cluster/rgmanager/src/daemons/fo_domain.c	2006/09/27 16:28:41	1.11
+++ cluster/rgmanager/src/daemons/fo_domain.c	2007/11/26 21:46:27	1.11.2.1
@@ -27,6 +27,7 @@
 #include <list.h>
 #include <clulog.h>
 #include <resgroup.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <ccs.h>
 #include <pthread.h>
--- cluster/rgmanager/src/daemons/groups.c	2007/08/02 14:46:51	1.25.2.12
+++ cluster/rgmanager/src/daemons/groups.c	2007/11/26 21:46:27	1.25.2.13
@@ -20,6 +20,7 @@
 //#define DEBUG
 #include <platform.h>
 #include <resgroup.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <vf.h>
 #include <message.h>
@@ -178,6 +179,29 @@
 }
 
 
+resource_node_t *
+node_by_ref(resource_node_t **tree, char *name)
+{
+	resource_t *res;
+	resource_node_t *node, *ret = NULL;
+	char rgname[64];
+	int x;
+
+	list_for(&_tree, node, x) {
+
+		res = node->rn_resource;
+		res_build_name(rgname, sizeof(rgname), res);
+
+		if (!strcasecmp(name, rgname)) {
+			ret = node;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+
 int
 count_resource_groups_local(cman_node_t *mp)
 {
@@ -1583,6 +1607,28 @@
 }
 
 
+int
+check_restart(char *rg_name)
+{
+	resource_node_t *node;
+	int ret = 1;
+
+	pthread_rwlock_rdlock(&resource_lock);
+	node = node_by_ref(&_tree, rg_name);
+	if (node) {
+		ret = restart_add(node->rn_restart_counter);
+		if (ret) {
+			/* Clear it out - caller is about 
+			   to relocate the service anyway */
+			restart_clear(node->rn_restart_counter);
+		}
+	}
+	pthread_rwlock_unlock(&resource_lock);
+
+	return ret;
+}
+
+
 void
 kill_resource_groups(void)
 {
--- cluster/rgmanager/src/daemons/main.c	2007/08/21 16:39:02	1.34.2.9
+++ cluster/rgmanager/src/daemons/main.c	2007/11/26 21:46:27	1.34.2.10
@@ -165,6 +165,7 @@
 
 	old_membership = member_list();
 	new_ml = get_member_list(h);
+	memb_mark_down(new_ml, 0);
 
 	for (x = 0; x < new_ml->cml_count; x++) {
 
@@ -181,19 +182,25 @@
 			quorate = cman_is_listening(h,
 					new_ml->cml_members[x].cn_nodeid,
 					port);
+
 			if (quorate == 0) {
 				clulog(LOG_DEBUG, "Node %d is not listening\n",
 					new_ml->cml_members[x].cn_nodeid);
 				new_ml->cml_members[x].cn_member = 0;
 			} else if (quorate < 0) {
+				if (errno == ENOTCONN) {
+					new_ml->cml_members[x].cn_member = 0;
+					break;
+				}
 				perror("cman_is_listening");
 				usleep(50000);
 				continue;
 			}
-
 #ifdef DEBUG
-			printf("Node %d IS listening\n",
-			       new_ml->cml_members[x].cn_nodeid);
+		       	else {
+				printf("Node %d IS listening\n",
+				       new_ml->cml_members[x].cn_nodeid);
+			}
 #endif
 			break;
 		} while(1);
@@ -201,7 +208,6 @@
 
 	cman_finish(h);
 	member_list_update(new_ml);
-	member_set_state(0, 0);		/* Mark qdisk as dead */
 
 	/*
 	 * Handle nodes lost.  Do our local node event first.
--- cluster/rgmanager/src/daemons/reslist.c	2007/07/31 17:54:54	1.14.2.4
+++ cluster/rgmanager/src/daemons/reslist.c	2007/11/26 21:46:27	1.14.2.5
@@ -26,6 +26,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <list.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
 #ifndef NO_CCS
--- cluster/rgmanager/src/daemons/resrules.c	2007/07/31 17:54:54	1.16.2.7
+++ cluster/rgmanager/src/daemons/resrules.c	2007/11/26 21:46:27	1.16.2.8
@@ -27,6 +27,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <list.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
 #include <dirent.h>
@@ -218,43 +219,70 @@
 
 
 int
-expand_time(char *val)
+expand_time (char *val)
 {
-	int l = strlen(val);
-	char c = val[l - 1];
-	int ret = atoi(val);
+	int curval, len;
+	int ret = 0;
+	char *start = val, ival[16];
 
-	if (ret <= 0)
-		return 0;
+	if (!val)
+		return (time_t)0;
+
+	while (start[0]) {
+
+		len = 0;
+		curval = 0;
+		memset(ival, 0, sizeof(ival));
+
+		while (isdigit(start[len])) {
+			ival[len] = start[len];
+			len++;
+		}
+
+		if (len) {
+			curval = atoi(ival);
+		} else {
+			len = 1;
+		}
 
-	if ((c >= '0') && (c <= '9'))
-		return ret;
+		switch(start[len]) {
+		case 0:
+		case 'S':
+		case 's':
+			break;
+		case 'M':
+        	case 'm':
+			curval *= 60;
+			break;
+		case 'h':
+		case 'H':
+			curval *= 3600;
+			break;
+		case 'd':
+		case 'D':
+			curval *= 86400;
+			break;
+		case 'w':
+		case 'W':
+			curval *= 604800;
+			break;
+		case 'y':
+		case 'Y':
+			curval *= 31536000;
+			break;
+		default:
+			curval = 0;
+		}
 
-	switch(c) {
-	case 'S':
-	case 's':
-		return (ret);
-	case 'M':
-	case 'm':
-		return (ret * 60);
-	case 'h':
-	case 'H':
-		return (ret * 3600);
-	case 'd':
-	case 'D':
-		return (ret * 86400);
-	case 'w':
-	case 'W':
-		return (ret * 604800);
-	case 'y':
-	case 'Y':
-		return (ret * 31536000);
+		ret += (time_t)curval;
+		start += len;
 	}
 
 	return ret;
 }
 
 
+
 /**
  * Store a resource action
  * @param actsp		Action array; may be modified and returned!
--- cluster/rgmanager/src/daemons/restree.c	2007/09/25 21:09:23	1.23.2.12
+++ cluster/rgmanager/src/daemons/restree.c	2007/11/26 21:46:27	1.23.2.13
@@ -30,6 +30,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <list.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
 #include <clulog.h>
@@ -432,6 +433,39 @@
 }
 
 
+static inline void
+assign_restart_policy(resource_t *curres, resource_node_t *parent,
+		      resource_node_t *node)
+{
+	char *val;
+	int max_restarts = 0;
+	time_t restart_expire_time = 0;
+
+	node->rn_restart_counter = NULL;
+
+	if (!curres || !node)
+		return;
+	if (parent) /* Non-parents don't get one for now */
+		return;
+
+	val = res_attr_value(curres, "max_restarts");
+	if (!val)
+		return;
+	max_restarts = atoi(val);
+	if (max_restarts <= 0)
+		return;
+	val = res_attr_value(curres, "restart_expire_time");
+	if (val) {
+		restart_expire_time = (time_t)expand_time(val);
+		if (!restart_expire_time)
+			return;
+	}
+
+	node->rn_restart_counter = restart_init(restart_expire_time,
+						max_restarts);
+}
+
+
 static inline int
 do_load_resource(int ccsfd, char *base,
 	         resource_rule_t *rule,
@@ -514,6 +548,7 @@
 	node->rn_state = RES_STOPPED;
 	node->rn_flags = 0;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+	assign_restart_policy(curres, parent, node);
 
 	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
 #ifndef NO_CCS
@@ -768,6 +803,11 @@
 			destroy_resource_tree(&(*tree)->rn_child);
 
 		list_remove(tree, node);
+
+		if (node->rn_restart_counter) {
+			restart_cleanup(node->rn_restart_counter);
+		}
+
 		if(node->rn_actions){
 			free(node->rn_actions);
 		}
--- cluster/rgmanager/src/daemons/rg_state.c	2007/08/30 16:03:03	1.24.2.13
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/11/26 21:46:27	1.24.2.14
@@ -1315,7 +1315,8 @@
 	}
 
 	if ((svcStatus.rs_state != RG_STATE_STOPPING) &&
-	     (svcStatus.rs_state != RG_STATE_ERROR)) {
+	    (svcStatus.rs_state != RG_STATE_ERROR) &&
+	    (svcStatus.rs_state != RG_STATE_RECOVER)) {
 		rg_unlock(&lockp);
 		return 0;
 	}
@@ -1721,8 +1722,10 @@
 	 * We got sent here from handle_start_req.
 	 * We're DONE.
 	 */
-	if (request == RG_START_RECOVER)
+	if (request == RG_START_RECOVER) {
+		_svc_stop_finish(svcName, 0, RG_STATE_STOPPED);
 		return RG_EFAIL;
+	}
 
 	/*
 	 * All potential places for the service to start have been exhausted.
@@ -1731,7 +1734,7 @@
 exhausted:
 	if (!rg_locked()) {
 		clulog(LOG_WARNING,
-		       "#70: Attempting to restart service %s locally.\n",
+		       "#70: Failed to relocate %s; restarting locally\n",
 		       svcName);
 		if (svc_start(svcName, RG_START_RECOVER) == 0) {
 			*new_owner = me;
@@ -1969,6 +1972,14 @@
 					   new_owner);
 	}
 
+	/* Check restart counter/timer for this resource */
+	if (check_restart(svcName) > 0) {
+		clulog(LOG_NOTICE, "Restart threshold for %s exceeded; "
+		       "attempting to relocate\n", svcName);
+		return handle_relocate_req(svcName, RG_START_RECOVER, -1,
+					   new_owner);
+	}
+
 	return handle_start_req(svcName, RG_START_RECOVER, new_owner);
 }
 
--- cluster/rgmanager/src/daemons/test.c	2007/07/31 17:54:54	1.6.2.5
+++ cluster/rgmanager/src/daemons/test.c	2007/11/26 21:46:27	1.6.2.6
@@ -25,6 +25,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <list.h>
+#include <restart_counter.h>
 #include <reslist.h>
 #include <pthread.h>
 
--- cluster/rgmanager/src/resources/service.sh	2007/11/13 17:38:43	1.7.2.6
+++ cluster/rgmanager/src/resources/service.sh	2007/11/26 21:46:27	1.7.2.7
@@ -154,6 +154,32 @@
             </shortdesc>
             <content type="string"/>
         </parameter>
+
+        <parameter name="max_restarts">
+            <longdesc lang="en">
+	    	Maximum restarts for this service.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Maximum restarts for this service.
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+        <parameter name="restart_expire_time">
+            <longdesc lang="en">
+	    	Restart expiration time
+            </longdesc>
+            <shortdesc lang="en">
+	    	Restart expiration time.  A restart is forgotten
+		after this time.  When combined with the max_restarts
+		option, this lets administrators specify a threshold
+		for when to fail over services.  If max_restarts
+		is exceeded in this given expiration time, the service
+		is relocated instead of restarted again.
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
     </parameters>
 
     <actions>
--- cluster/rgmanager/src/resources/vm.sh	2007/11/14 18:58:26	1.1.2.8
+++ cluster/rgmanager/src/resources/vm.sh	2007/11/26 21:46:27	1.1.2.9
@@ -184,6 +184,31 @@
             <content type="string" default="live"/>
         </parameter>
 
+        <parameter name="max_restarts">
+            <longdesc lang="en">
+	    	Maximum restarts for this service.
+            </longdesc>
+            <shortdesc lang="en">
+	    	Maximum restarts for this service.
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
+        <parameter name="restart_expire_time">
+            <longdesc lang="en">
+	    	Restart expiration time
+            </longdesc>
+            <shortdesc lang="en">
+	    	Restart expiration time.  A restart is forgotten
+		after this time.  When combined with the max_restarts
+		option, this lets administrators specify a threshold
+		for when to fail over services.  If max_restarts
+		is exceeded in this given expiration time, the service
+		is relocated instead of restarted again.
+            </shortdesc>
+            <content type="string"/>
+        </parameter>
+
     </parameters>
 
     <actions>



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-08-02 14:53 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-08-02 14:53 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2007-08-02 14:53:38

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c 
	                       rg_forward.c rg_state.c 
	rgmanager/src/resources: vm.sh 

Log message:
	Fix #248727, round 2

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.54&r2=1.55
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.22&r2=1.23
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.38&r2=1.39
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.35&r2=1.36
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.38&r2=1.39
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=1.6&r2=1.7

--- cluster/rgmanager/ChangeLog	2007/07/31 18:00:25	1.54
+++ cluster/rgmanager/ChangeLog	2007/08/02 14:53:37	1.55
@@ -1,3 +1,16 @@
+2007-08-02 Lon Hohberger <lhh@redhat.com>
+	* general: More fixes around #248727
+	* include/reslist.h, src/daemons/restree.c: Make last-value be
+	returned or resources which have been checked recently
+	* src/daemons/groups.c: Make VMs use migrate semantics instead of
+	relocate semantics when employing failover domain rules
+	* src/daemons/nodeevent.c: Fix VMs ending up on wrong nodes when
+	simultaneous boot occurs
+	* src/daemons/rg_forward.c: Fix erroneous timeout
+	* src/daemons/rg_state.c: Handle RG_STATE_MIGRATE in svc_advise_*
+	Handle certain migration failures.
+	* src/resources/vm.sh: Handle certain migration failures
+
 2007-07-31 Lon Hohberger <lhh@redhat.com>
 	* general: Make VMs not change state when added/removed from the
 	cluster config or bounce services/VMs when minor config changes
--- cluster/rgmanager/include/reslist.h	2007/07/31 18:00:25	1.22
+++ cluster/rgmanager/include/reslist.h	2007/08/02 14:53:37	1.23
@@ -128,6 +128,10 @@
 	resource_act_t	*rn_actions;
 	int	rn_state; /* State of this instance of rn_resource */
 	int	rn_flags;
+	int	rn_last_status;
+	int 	rn_last_depth;
+	int	rn_checked;
+	int	rn_pad;
 } resource_node_t;
 
 typedef struct _fod_node {
--- cluster/rgmanager/src/daemons/groups.c	2007/07/31 18:00:25	1.38
+++ cluster/rgmanager/src/daemons/groups.c	2007/08/02 14:53:38	1.39
@@ -54,6 +54,7 @@
 
 void res_build_name(char *, size_t, resource_t *);
 int get_rg_state_local(char *, rg_state_t *);
+int group_migratory(char *, int);
 
 
 struct status_arg {
@@ -503,13 +504,14 @@
 consider_relocate(char *svcName, rg_state_t *svcStatus, uint32_t nodeid,
 		  cluster_member_list_t *membership)
 {
-	int a, b;
+	int a, b, req = RG_RELOCATE;
 
 	/*
 	   Service must be running locally in order to consider for
 	   a relocate
 	 */
-	if (svcStatus->rs_state != RG_STATE_STARTED ||
+	if ((svcStatus->rs_state != RG_STATE_STARTING &&
+	    svcStatus->rs_state != RG_STATE_STARTED) ||
 	    svcStatus->rs_owner != my_id())
 		return;
 
@@ -529,11 +531,16 @@
 	if (a <= b)
 		return;
 
-	clulog(LOG_DEBUG, "Relocating group %s to better node %s\n",
+	if (group_migratory(svcName, 1)) {
+		req = RG_MIGRATE;
+	}
+
+	clulog(LOG_NOTICE, "%s %s to better node %s\n",
+	       req==RG_MIGRATE ? "Migrating":"Relocating",
 	       svcName,
 	       memb_id_to_name(membership, nodeid));
 
-	rt_enqueue_request(svcName, RG_RELOCATE, NULL, 0, nodeid, 0, 0);
+	rt_enqueue_request(svcName, req, NULL, 0, nodeid, 0, 0);
 }
 
 
--- cluster/rgmanager/src/daemons/main.c	2007/07/23 20:49:13	1.41
+++ cluster/rgmanager/src/daemons/main.c	2007/08/02 14:53:38	1.42
@@ -43,7 +43,8 @@
 #ifdef WRAP_THREADS
 void dump_thread_states(FILE *);
 #endif
-int configure_logging(int ccsfd, int debug);
+int configure_rgmanager(int ccsfd, int debug);
+void set_transition_throttling(int);
 
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
@@ -730,7 +731,7 @@
 
 	if (need_reconfigure || check_config_update()) {
 		need_reconfigure = 0;
-		configure_logging(-1, 0);
+		configure_rgmanager(-1, 0);
 		init_resource_groups(1);
 		return 0;
 	}
@@ -789,7 +790,7 @@
  * Configure logging based on data in cluster.conf
  */
 int
-configure_logging(int ccsfd, int dbg)
+configure_rgmanager(int ccsfd, int dbg)
 {
 	char *v;
 	char internal = 0;
@@ -812,6 +813,12 @@
 		free(v);
 	}
 
+	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
+		if (!dbg)
+			set_transition_throttling(atoi(v));
+		free(v);
+	}
+
 	if (internal)
 		ccs_disconnect(ccsfd);
 
@@ -956,7 +963,7 @@
 	   We know we're quorate.  At this point, we need to
 	   read the resource group trees from ccsd.
 	 */
-	configure_logging(-1, debug);
+	configure_rgmanager(-1, debug);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
 	if (init_resource_groups(0) != 0) {
--- cluster/rgmanager/src/daemons/nodeevent.c	2007/07/23 20:49:13	1.8
+++ cluster/rgmanager/src/daemons/nodeevent.c	2007/08/02 14:53:38	1.9
@@ -42,6 +42,7 @@
 #endif
 static nevent_t *event_queue = NULL;
 static pthread_t ne_thread = 0;
+static int transition_throttling = 5;
 int ne_queue_request(int local, int nodeid, int state);
 
 void hard_exit(void);
@@ -53,6 +54,15 @@
 extern int shutdown_pending;
 
 
+void
+set_transition_throttling(int nsecs)
+{
+	if (nsecs < 0)
+		nsecs = 0;
+	transition_throttling = nsecs;
+}
+
+
 /**
   Called to handle the transition of a cluster member from up->down or
   down->up.  This handles initializing services (in the local node-up case),
@@ -88,11 +98,16 @@
 		if (shutdown_pending) {
 			clulog(LOG_NOTICE, "Processing delayed exit signal\n");
 			running = 0;
+			return;
 		}
 		setup_signal(SIGINT, flag_shutdown);
 		setup_signal(SIGTERM, flag_shutdown);
 		setup_signal(SIGHUP, flag_reconfigure);
 
+		/* Let things settle if we're booting multiple */
+		if (transition_throttling)
+			sleep(transition_throttling);
+
 		eval_groups(1, nodeID, 1);
 		return;
 	}
--- cluster/rgmanager/src/daemons/restree.c	2007/07/31 18:00:25	1.35
+++ cluster/rgmanager/src/daemons/restree.c	2007/08/02 14:53:38	1.36
@@ -666,8 +666,10 @@
 			}
 		}
 		/* No resource rule matching the child?  Press on... */
-		if (!flags)
+		if (!flags) {
+			free(ref);
 			continue;
+		}
 
 		flags = 0;
 		/* Don't descend on anything we should have already picked
@@ -687,11 +689,9 @@
 			break;
 		}
 
-		if (flags == 2) {
-			free(ref);
-			continue;
-		}
 		free(ref);
+		if (flags == 2)
+			continue;
 
 		x = 1;
 		switch(do_load_resource(ccsfd, tok, childrule, tree,
@@ -1040,8 +1040,11 @@
 	}
 
 	/* No check levels ready at the moment. */
-	if (idx == -1)
-		return 0;
+	if (idx == -1) {
+		if (node->rn_checked)
+			return node->rn_last_status;
+  		return 0;
+	}
 
  	/* Clear all check levels lower than us */
  	for (x = 0; node->rn_actions[x].ra_name; x++) {
@@ -1064,11 +1067,14 @@
  		node->rn_actions[idx].ra_depth,
  		(int)node->rn_actions[idx].ra_interval);*/
  
-	node->rn_actions[idx].ra_last = now;
-	if ((x = res_exec(node, RS_STATUS, NULL,
-                         node->rn_actions[idx].ra_depth)) == 0)
-		return 0;
-
+ 	node->rn_actions[idx].ra_last = now;
+ 	x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth);
+ 
+ 	node->rn_last_status = x;
+ 	node->rn_last_depth = node->rn_actions[idx].ra_depth;
+ 	node->rn_checked = 1;
+ 	if (x == 0)
+  		return 0;
 	if (!has_recover)
 		return x;
 
@@ -1127,14 +1133,18 @@
 
 	now = res->r_started;
 
-       for (; node->rn_actions[x].ra_name; x++) {
+	for (; node->rn_actions[x].ra_name; x++) {
 
-               if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
-                   strcmp(node->rn_actions[x].ra_name, "status"))
+		if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
+		    strcmp(node->rn_actions[x].ra_name, "status"))
 			continue;
 
-               node->rn_actions[x].ra_last = now;
+		node->rn_actions[x].ra_last = now;
 	}
+
+	node->rn_checked = 0;
+	node->rn_last_status = 0;
+	node->rn_last_depth = 0;
 }
 
 
--- cluster/rgmanager/src/daemons/rg_forward.c	2007/07/23 20:49:13	1.10
+++ cluster/rgmanager/src/daemons/rg_forward.c	2007/08/02 14:53:38	1.11
@@ -122,10 +122,12 @@
 				m = NULL;
 				continue;
 			}
-			goto out_fail;
+
+			if (ret == 0)
+				continue;
 		}
 		break;
-	} while(++retries < 60); /* old 60 second rule */
+	} while(++retries < 60); /* old 600 second rule */
 
 	swab_SmMessageSt(&msg);
 
--- cluster/rgmanager/src/daemons/rg_state.c	2007/07/23 20:49:13	1.38
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/08/02 14:53:38	1.39
@@ -35,6 +35,7 @@
 #include <ccs.h>
 #include <rg_queue.h>
 #include <msgsimple.h>
+#include <res-ocf.h>
 
 /* XXX - copied :( */
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
@@ -475,6 +476,7 @@
 	case RG_STATE_CHECK:
 	case RG_STATE_STARTING:
 	case RG_STATE_RECOVER:
+	case RG_STATE_MIGRATE:
 		if ((svcStatus->rs_owner != my_id()) &&
 		    memb_online(membership, svcStatus->rs_owner)) {
 			/*
@@ -597,6 +599,10 @@
 		       "#43: Service %s has failed; can not start.\n",
 		       svcName);
 		break;
+
+	case RG_STATE_MIGRATE:
+		ret = 4;
+		break;
 		
 	case RG_STATE_STOPPING:
 	case RG_STATE_STARTED:
@@ -909,16 +915,60 @@
        
 	ret = group_migrate(svcName, target);
 
-	if (ret == -1 || ret > 0) {
+	switch(ret) {
+	default:
+	case -1:
+	case OCF_RA_ERROR:
+		svc_fail(svcName);
 		/* XXX run svc_status again here to see if it's still
 		   healthy; if it is, don't FAIL it; it could be that
 		   the target node simply died; in this case, set status
 		   back to started */
-		/* if ret > 0 { svc_status... */
-		svc_fail(svcName);
+		return RG_EFAIL;
+		break;
+	case OCF_RA_NOT_RUNNING:
+		/* For these two, the VM was either not running or 
+		   migration is simply impossible. */
+		/* Don't mark the service as failed; since it's either
+		   recoverable or still running. */
 		ret = RG_EFAIL;
+		break;
+	case OCF_RA_NOT_CONFIGURED:
+		ret = RG_EINVAL;
+		break;
+	case 0:
+		return 0;
 	}
 
+	/* Ok, we've hit a recoverable condition.  Since VMs and migratory
+	   services are ... well, migratable, we can just flip the state
+	   back to 'started' and error checking will fix it later. */
+	if (rg_lock(svcName, &lockp) < 0) {
+		clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return ret;
+	}
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#46: Failed getting status for RG %s\n",
+		       svcName);
+		return ret;
+	}
+
+	if (svcStatus.rs_last_owner != my_id() ||
+	    svcStatus.rs_owner != target ||
+	    svcStatus.rs_state != RG_STATE_MIGRATE) {
+		rg_unlock(&lockp);
+		return ret;
+	}
+
+	svcStatus.rs_owner = my_id();
+	svcStatus.rs_state = RG_STATE_STARTED;
+
+	set_rg_state(svcName, &svcStatus);
+	rg_unlock(&lockp);
+
 	return ret;
 }
 
@@ -971,7 +1021,8 @@
 		}
 
 		msg_send(&ctx, &msgp, sizeof(msgp));
-		msg_receive(&ctx, &response, sizeof (response), 5);
+		if (msg_receive(&ctx, &response, sizeof (response), 5) != sizeof(response))
+			goto cont;;
 
 		swab_SmMessageSt(&response);
 		if (response.sm_data.d_ret == RG_SUCCESS)
@@ -979,6 +1030,7 @@
 		else
 			ret = -1;
 
+cont:
 		msg_close(&ctx);
 	}
 
@@ -2046,7 +2098,7 @@
 	allowed_nodes = member_list();
 
 	while (memb_count(allowed_nodes)) {
-		target = best_target_node(allowed_nodes, -1,
+		target = best_target_node(allowed_nodes, 0,
 	 				  svcName, 1);
 	  	if (target == me) {
 	   		ret = handle_start_remote_req(svcName, request);
@@ -2055,7 +2107,8 @@
 	    	} else if (target < 0) {
 			goto out;
 	       	} else {
-			ret = relocate_service(svcName, request, target);
+			ret = relocate_service(svcName, RG_START_REMOTE,
+					       target);
 		}
 
 		switch(ret) {
--- cluster/rgmanager/src/resources/vm.sh	2007/07/31 18:00:25	1.6
+++ cluster/rgmanager/src/resources/vm.sh	2007/08/02 14:53:38	1.7
@@ -24,6 +24,8 @@
 
 . $(dirname $0)/ocf-shellfuncs || exit 1
 
+. $(dirname $0)/ocf-shellfuncs
+
 #
 # Virtual Machine start/stop script (requires the xm command)
 #
@@ -410,8 +412,22 @@
 migrate()
 {
 	declare target=$1
+	declare errstr rv
+	
+	err=$(xm migrate $OCF_RESKEY_name $target 2>&1 | head -1)
+	rv=$?
+
+	if [ $rv -ne 0 ]; then
+		if [ "$err" != "${err/does not exist/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - domain does not exist"
+			return $OCF_NOT_RUNNING
+		fi
+		if [ "$err" != "${err/Connection refused/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - connect refused"
+			return $OCF_ERR_CONFIGURED
+		fi
+	fi
 
-	xm migrate $OCF_RESKEY_name $target
 	return $?
 }
 



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-08-02 14:47 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-08-02 14:47 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL51
Changes by:	lhh at sourceware.org	2007-08-02 14:47:45

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c 
	                       rg_forward.c rg_state.c 
	rgmanager/src/resources: vm.sh 

Log message:
	Fix #248727, round 2

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.31.2.19.2.2&r2=1.31.2.19.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.15.2.4.2.1&r2=1.15.2.4.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.25.2.9.2.2&r2=1.25.2.9.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.34.2.6.2.1&r2=1.34.2.6.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.4.2.3.2.1&r2=1.4.2.3.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.23.2.8.2.1&r2=1.23.2.8.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.8.2.1.2.1&r2=1.8.2.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.24.2.10.2.1&r2=1.24.2.10.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&only_with_tag=RHEL51&r1=1.1.2.4.2.2&r2=1.1.2.4.2.3

--- cluster/rgmanager/ChangeLog	2007/07/31 17:56:10	1.31.2.19.2.2
+++ cluster/rgmanager/ChangeLog	2007/08/02 14:47:45	1.31.2.19.2.3
@@ -1,3 +1,16 @@
+2007-08-02 Lon Hohberger <lhh@redhat.com>
+	* general: More fixes around #248727
+	* include/reslist.h, src/daemons/restree.c: Make last-value be
+	returned or resources which have been checked recently
+	* src/daemons/groups.c: Make VMs use migrate semantics instead of
+	relocate semantics when employing failover domain rules
+	* src/daemons/nodeevent.c: Fix VMs ending up on wrong nodes when
+	simultaneous boot occurs
+	* src/daemons/rg_forward.c: Fix erroneous timeout
+	* src/daemons/rg_state.c: Handle RG_STATE_MIGRATE in svc_advise_*
+	Handle certain migration failures.
+	* src/resources/vm.sh: Handle certain migration failures
+
 2007-07-31 Lon Hohberger <lhh@redhat.com>
 	* general: Make VMs not change state when added/removed from the
 	cluster config or bounce services/VMs when minor config changes
--- cluster/rgmanager/include/reslist.h	2007/07/31 17:56:10	1.15.2.4.2.1
+++ cluster/rgmanager/include/reslist.h	2007/08/02 14:47:45	1.15.2.4.2.2
@@ -128,6 +128,10 @@
 	resource_act_t	*rn_actions;
 	int	rn_state; /* State of this instance of rn_resource */
 	int	rn_flags;
+	int	rn_last_status;
+	int 	rn_last_depth;
+	int	rn_checked;
+	int	rn_pad;
 } resource_node_t;
 
 typedef struct _fod_node {
--- cluster/rgmanager/src/daemons/groups.c	2007/07/31 17:56:10	1.25.2.9.2.2
+++ cluster/rgmanager/src/daemons/groups.c	2007/08/02 14:47:45	1.25.2.9.2.3
@@ -500,13 +500,14 @@
 consider_relocate(char *svcName, rg_state_t *svcStatus, uint32_t nodeid,
 		  cluster_member_list_t *membership)
 {
-	int a, b;
+	int a, b, req = RG_RELOCATE;
 
 	/*
 	   Service must be running locally in order to consider for
 	   a relocate
 	 */
-	if (svcStatus->rs_state != RG_STATE_STARTED ||
+	if ((svcStatus->rs_state != RG_STATE_STARTING &&
+	    svcStatus->rs_state != RG_STATE_STARTED) ||
 	    svcStatus->rs_owner != my_id())
 		return;
 
@@ -526,11 +527,16 @@
 	if (a <= b)
 		return;
 
-	clulog(LOG_DEBUG, "Relocating group %s to better node %s\n",
+	if (group_migratory(svcName, 1)) {
+		req = RG_MIGRATE;
+	}
+
+	clulog(LOG_NOTICE, "%s %s to better node %s\n",
+	       req==RG_MIGRATE ? "Migrating":"Relocating",
 	       svcName,
 	       memb_id_to_name(membership, nodeid));
 
-	rt_enqueue_request(svcName, RG_RELOCATE, NULL, 0, nodeid, 0, 0);
+	rt_enqueue_request(svcName, req, NULL, 0, nodeid, 0, 0);
 }
 
 
--- cluster/rgmanager/src/daemons/main.c	2007/07/24 18:49:18	1.34.2.6.2.1
+++ cluster/rgmanager/src/daemons/main.c	2007/08/02 14:47:45	1.34.2.6.2.2
@@ -43,7 +43,7 @@
 #ifdef WRAP_THREADS
 void dump_thread_states(FILE *);
 #endif
-int configure_logging(int ccsfd, int debug);
+int configure_rgmanager(int ccsfd, int debug);
 
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
@@ -730,7 +730,7 @@
 
 	if (need_reconfigure || check_config_update()) {
 		need_reconfigure = 0;
-		configure_logging(-1, 0);
+		configure_rgmanager(-1, 0);
 		init_resource_groups(1);
 		return 0;
 	}
@@ -789,7 +789,7 @@
  * Configure logging based on data in cluster.conf
  */
 int
-configure_logging(int ccsfd, int dbg)
+configure_rgmanager(int ccsfd, int dbg)
 {
 	char *v;
 	char internal = 0;
@@ -812,6 +812,12 @@
 		free(v);
 	}
 
+	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
+		if (!dbg)
+			set_transition_throttling(atoi(v));
+		free(v);
+	}
+
 	if (internal)
 		ccs_disconnect(ccsfd);
 
@@ -956,7 +962,7 @@
 	   We know we're quorate.  At this point, we need to
 	   read the resource group trees from ccsd.
 	 */
-	configure_logging(-1, debug);
+	configure_rgmanager(-1, debug);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
 	if (init_resource_groups(0) != 0) {
--- cluster/rgmanager/src/daemons/nodeevent.c	2007/07/24 18:49:18	1.4.2.3.2.1
+++ cluster/rgmanager/src/daemons/nodeevent.c	2007/08/02 14:47:45	1.4.2.3.2.2
@@ -42,6 +42,7 @@
 #endif
 static nevent_t *event_queue = NULL;
 static pthread_t ne_thread = 0;
+static int transition_throttling = 5;
 int ne_queue_request(int local, int nodeid, int state);
 
 void hard_exit(void);
@@ -53,6 +54,15 @@
 extern int shutdown_pending;
 
 
+void
+set_transition_throttling(int nsecs)
+{
+	if (nsecs < 0)
+		nsecs = 0;
+	transition_throttling = nsecs;
+}
+
+
 /**
   Called to handle the transition of a cluster member from up->down or
   down->up.  This handles initializing services (in the local node-up case),
@@ -88,11 +98,16 @@
 		if (shutdown_pending) {
 			clulog(LOG_NOTICE, "Processing delayed exit signal\n");
 			running = 0;
+			return;
 		}
 		setup_signal(SIGINT, flag_shutdown);
 		setup_signal(SIGTERM, flag_shutdown);
 		setup_signal(SIGHUP, flag_reconfigure);
 
+		/* Let things settle if we're booting multiple */
+		if (transition_throttling)
+			sleep(transition_throttling);
+
 		eval_groups(1, nodeID, 1);
 		return;
 	}
--- cluster/rgmanager/src/daemons/restree.c	2007/07/31 17:56:10	1.23.2.8.2.1
+++ cluster/rgmanager/src/daemons/restree.c	2007/08/02 14:47:45	1.23.2.8.2.2
@@ -665,8 +665,10 @@
 			}
 		}
 		/* No resource rule matching the child?  Press on... */
-		if (!flags)
+		if (!flags) {
+			free(ref);
 			continue;
+		}
 
 		flags = 0;
 		/* Don't descend on anything we should have already picked
@@ -686,11 +688,9 @@
 			break;
 		}
 
-		if (flags == 2) {
-			free(ref);
-			continue;
-		}
 		free(ref);
+		if (flags == 2)
+			continue;
 
 		x = 1;
 		switch(do_load_resource(ccsfd, tok, childrule, tree,
@@ -1035,12 +1035,21 @@
 	}
 
 	/* No check levels ready at the moment. */
-	if (idx == -1)
+	if (idx == -1) {
+		if (node->rn_checked)
+			return node->rn_last_status;
 		return 0;
+	}
 
-       node->rn_actions[idx].ra_last = now;
-	if ((x = res_exec(node, RS_STATUS, NULL,
-                         node->rn_actions[idx].ra_depth)) == 0)
+
+	node->rn_actions[idx].ra_last = now;
+	x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth);
+
+	node->rn_last_status = x;
+	node->rn_last_depth = node->rn_actions[idx].ra_depth;
+	node->rn_checked = 1;
+
+	if (x == 0)
 		return 0;
 
 	if (!has_recover)
@@ -1101,14 +1110,18 @@
 
 	now = res->r_started;
 
-       for (; node->rn_actions[x].ra_name; x++) {
+	for (; node->rn_actions[x].ra_name; x++) {
 
-               if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
-                   strcmp(node->rn_actions[x].ra_name, "status"))
+		if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
+		    strcmp(node->rn_actions[x].ra_name, "status"))
 			continue;
 
-               node->rn_actions[x].ra_last = now;
+		node->rn_actions[x].ra_last = now;
 	}
+
+	node->rn_checked = 0;
+	node->rn_last_status = 0;
+	node->rn_last_depth = 0;
 }
 
 
--- cluster/rgmanager/src/daemons/rg_forward.c	2007/07/24 18:49:18	1.8.2.1.2.1
+++ cluster/rgmanager/src/daemons/rg_forward.c	2007/08/02 14:47:45	1.8.2.1.2.2
@@ -122,10 +122,12 @@
 				m = NULL;
 				continue;
 			}
-			goto out_fail;
+
+			if (ret == 0)
+				continue;
 		}
 		break;
-	} while(++retries < 60); /* old 60 second rule */
+	} while(++retries < 60); /* old 600 second rule */
 
 	swab_SmMessageSt(&msg);
 
--- cluster/rgmanager/src/daemons/rg_state.c	2007/07/24 18:49:18	1.24.2.10.2.1
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/08/02 14:47:45	1.24.2.10.2.2
@@ -35,6 +35,7 @@
 #include <ccs.h>
 #include <rg_queue.h>
 #include <msgsimple.h>
+#include <res-ocf.h>
 
 /* XXX - copied :( */
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
@@ -467,6 +468,7 @@
 	case RG_STATE_CHECK:
 	case RG_STATE_STARTING:
 	case RG_STATE_RECOVER:
+	case RG_STATE_MIGRATE:
 		if ((svcStatus->rs_owner != my_id()) &&
 		    memb_online(membership, svcStatus->rs_owner)) {
 			/*
@@ -583,6 +585,10 @@
 		       "#43: Service %s has failed; can not start.\n",
 		       svcName);
 		break;
+
+	case RG_STATE_MIGRATE:
+		ret = 4;
+		break;
 		
 	case RG_STATE_STOPPING:
 	case RG_STATE_STARTED:
@@ -892,16 +898,60 @@
        
 	ret = group_migrate(svcName, target);
 
-	if (ret == -1 || ret > 0) {
+	switch(ret) {
+	default:
+	case -1:
+	case OCF_RA_ERROR:
+		svc_fail(svcName);
 		/* XXX run svc_status again here to see if it's still
 		   healthy; if it is, don't FAIL it; it could be that
 		   the target node simply died; in this case, set status
 		   back to started */
-		/* if ret > 0 { svc_status... */
-		svc_fail(svcName);
+		return RG_EFAIL;
+		break;
+	case OCF_RA_NOT_RUNNING:
+		/* For these two, the VM was either not running or 
+		   migration is simply impossible. */
+		/* Don't mark the service as failed; since it's either
+		   recoverable or still running. */
 		ret = RG_EFAIL;
+		break;
+	case OCF_RA_NOT_CONFIGURED:
+		ret = RG_EINVAL;
+		break;
+	case 0:
+		return 0;
 	}
 
+	/* Ok, we've hit a recoverable condition.  Since VMs and migratory
+	   services are ... well, migratable, we can just flip the state
+	   back to 'started' and error checking will fix it later. */
+	if (rg_lock(svcName, &lockp) < 0) {
+		clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return ret;
+	}
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#46: Failed getting status for RG %s\n",
+		       svcName);
+		return ret;
+	}
+
+	if (svcStatus.rs_last_owner != my_id() ||
+	    svcStatus.rs_owner != target ||
+	    svcStatus.rs_state != RG_STATE_MIGRATE) {
+		rg_unlock(&lockp);
+		return ret;
+	}
+
+	svcStatus.rs_owner = my_id();
+	svcStatus.rs_state = RG_STATE_STARTED;
+
+	set_rg_state(svcName, &svcStatus);
+	rg_unlock(&lockp);
+
 	return ret;
 }
 
@@ -954,7 +1004,8 @@
 		}
 
 		msg_send(&ctx, &msgp, sizeof(msgp));
-		msg_receive(&ctx, &response, sizeof (response), 5);
+		if (msg_receive(&ctx, &response, sizeof (response), 5) != sizeof(response))
+			goto cont;;
 
 		swab_SmMessageSt(&response);
 		if (response.sm_data.d_ret == RG_SUCCESS)
@@ -962,6 +1013,7 @@
 		else
 			ret = -1;
 
+cont:
 		msg_close(&ctx);
 	}
 
@@ -1937,7 +1989,7 @@
 	allowed_nodes = member_list();
 
 	while (memb_count(allowed_nodes)) {
-		target = best_target_node(allowed_nodes, -1,
+		target = best_target_node(allowed_nodes, 0,
 		    			  svcName, 1);
 		if (target == me) {
 		      	ret = handle_start_remote_req(svcName, request);
@@ -1947,7 +1999,7 @@
 			ret = RG_EFAIL;
 			goto out;
 		} else {
-			ret = relocate_service(svcName, request, target);
+			ret = relocate_service(svcName, RG_START_REMOTE, target);
 		}
 
 		switch(ret) {
--- cluster/rgmanager/src/resources/vm.sh	2007/07/31 17:56:10	1.1.2.4.2.2
+++ cluster/rgmanager/src/resources/vm.sh	2007/08/02 14:47:45	1.1.2.4.2.3
@@ -22,6 +22,8 @@
 
 export PATH
 
+. $(dirname $0)/ocf-shellfuncs
+
 #
 # Virtual Machine start/stop script (requires the xm command)
 #
@@ -216,7 +218,7 @@
 	# controlled externally; the external monitoring app
 	# should.
 	#
-	declare cmdline="on_shutdown=\"destroy\" on_reboot=\"destroy\" on_crash=\"destroy\""
+	declare cmdline="restart=\"never\""
 	declare varp val temp
 
 	#
@@ -375,8 +377,22 @@
 migrate()
 {
 	declare target=$1
+	declare errstr rv
+	
+	err=$(xm migrate $OCF_RESKEY_name $target 2>&1 | head -1)
+	rv=$?
+
+	if [ $rv -ne 0 ]; then
+		if [ "$err" != "${err/does not exist/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - domain does not exist"
+			return $OCF_NOT_RUNNING
+		fi
+		if [ "$err" != "${err/Connection refused/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - connect refused"
+			return $OCF_ERR_CONFIGURED
+		fi
+	fi
 
-	xm migrate $OCF_RESKEY_name $target
 	return $?
 }
 



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-08-02 14:46 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-08-02 14:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-08-02 14:46:52

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c 
	                       rg_forward.c rg_state.c 
	rgmanager/src/resources: vm.sh 

Log message:
	Fix #248727, round 2

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.23&r2=1.31.2.24
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.5&r2=1.15.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.11&r2=1.25.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.7&r2=1.34.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.4&r2=1.4.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.9&r2=1.23.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8.2.2&r2=1.8.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.11&r2=1.24.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.6&r2=1.1.2.7

--- cluster/rgmanager/ChangeLog	2007/07/31 17:54:54	1.31.2.23
+++ cluster/rgmanager/ChangeLog	2007/08/02 14:46:51	1.31.2.24
@@ -1,3 +1,16 @@
+2007-08-02 Lon Hohberger <lhh@redhat.com>
+	* general: More fixes around #248727
+	* include/reslist.h, src/daemons/restree.c: Make last-value be
+	returned or resources which have been checked recently
+	* src/daemons/groups.c: Make VMs use migrate semantics instead of
+	relocate semantics when employing failover domain rules
+	* src/daemons/nodeevent.c: Fix VMs ending up on wrong nodes when
+	simultaneous boot occurs
+	* src/daemons/rg_forward.c: Fix erroneous timeout
+	* src/daemons/rg_state.c: Handle RG_STATE_MIGRATE in svc_advise_*
+	Handle certain migration failures.
+	* src/resources/vm.sh: Handle certain migration failures
+
 2007-07-31 Lon Hohberger <lhh@redhat.com>
 	* general: Make VMs not change state when added/removed from the
 	cluster config or bounce services/VMs when minor config changes
--- cluster/rgmanager/include/reslist.h	2007/07/31 17:54:54	1.15.2.5
+++ cluster/rgmanager/include/reslist.h	2007/08/02 14:46:51	1.15.2.6
@@ -128,6 +128,10 @@
 	resource_act_t	*rn_actions;
 	int	rn_state; /* State of this instance of rn_resource */
 	int	rn_flags;
+	int	rn_last_status;
+	int 	rn_last_depth;
+	int	rn_checked;
+	int	rn_pad;
 } resource_node_t;
 
 typedef struct _fod_node {
--- cluster/rgmanager/src/daemons/groups.c	2007/07/31 17:54:54	1.25.2.11
+++ cluster/rgmanager/src/daemons/groups.c	2007/08/02 14:46:51	1.25.2.12
@@ -500,13 +500,14 @@
 consider_relocate(char *svcName, rg_state_t *svcStatus, uint32_t nodeid,
 		  cluster_member_list_t *membership)
 {
-	int a, b;
+	int a, b, req = RG_RELOCATE;
 
 	/*
 	   Service must be running locally in order to consider for
 	   a relocate
 	 */
-	if (svcStatus->rs_state != RG_STATE_STARTED ||
+	if ((svcStatus->rs_state != RG_STATE_STARTING &&
+	    svcStatus->rs_state != RG_STATE_STARTED) ||
 	    svcStatus->rs_owner != my_id())
 		return;
 
@@ -526,11 +527,16 @@
 	if (a <= b)
 		return;
 
-	clulog(LOG_DEBUG, "Relocating group %s to better node %s\n",
+	if (group_migratory(svcName, 1)) {
+		req = RG_MIGRATE;
+	}
+
+	clulog(LOG_NOTICE, "%s %s to better node %s\n",
+	       req==RG_MIGRATE ? "Migrating":"Relocating",
 	       svcName,
 	       memb_id_to_name(membership, nodeid));
 
-	rt_enqueue_request(svcName, RG_RELOCATE, NULL, 0, nodeid, 0, 0);
+	rt_enqueue_request(svcName, req, NULL, 0, nodeid, 0, 0);
 }
 
 
--- cluster/rgmanager/src/daemons/main.c	2007/07/24 13:53:08	1.34.2.7
+++ cluster/rgmanager/src/daemons/main.c	2007/08/02 14:46:51	1.34.2.8
@@ -43,7 +43,7 @@
 #ifdef WRAP_THREADS
 void dump_thread_states(FILE *);
 #endif
-int configure_logging(int ccsfd, int debug);
+int configure_rgmanager(int ccsfd, int debug);
 
 void node_event(int, int, int, int);
 void node_event_q(int, int, int, int);
@@ -730,7 +730,7 @@
 
 	if (need_reconfigure || check_config_update()) {
 		need_reconfigure = 0;
-		configure_logging(-1, 0);
+		configure_rgmanager(-1, 0);
 		init_resource_groups(1);
 		return 0;
 	}
@@ -789,7 +789,7 @@
  * Configure logging based on data in cluster.conf
  */
 int
-configure_logging(int ccsfd, int dbg)
+configure_rgmanager(int ccsfd, int dbg)
 {
 	char *v;
 	char internal = 0;
@@ -812,6 +812,12 @@
 		free(v);
 	}
 
+	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
+		if (!dbg)
+			set_transition_throttling(atoi(v));
+		free(v);
+	}
+
 	if (internal)
 		ccs_disconnect(ccsfd);
 
@@ -956,7 +962,7 @@
 	   We know we're quorate.  At this point, we need to
 	   read the resource group trees from ccsd.
 	 */
-	configure_logging(-1, debug);
+	configure_rgmanager(-1, debug);
 	clulog(LOG_NOTICE, "Resource Group Manager Starting\n");
 
 	if (init_resource_groups(0) != 0) {
--- cluster/rgmanager/src/daemons/nodeevent.c	2007/07/24 13:53:08	1.4.2.4
+++ cluster/rgmanager/src/daemons/nodeevent.c	2007/08/02 14:46:51	1.4.2.5
@@ -42,6 +42,7 @@
 #endif
 static nevent_t *event_queue = NULL;
 static pthread_t ne_thread = 0;
+static int transition_throttling = 5;
 int ne_queue_request(int local, int nodeid, int state);
 
 void hard_exit(void);
@@ -53,6 +54,15 @@
 extern int shutdown_pending;
 
 
+void
+set_transition_throttling(int nsecs)
+{
+	if (nsecs < 0)
+		nsecs = 0;
+	transition_throttling = nsecs;
+}
+
+
 /**
   Called to handle the transition of a cluster member from up->down or
   down->up.  This handles initializing services (in the local node-up case),
@@ -88,11 +98,16 @@
 		if (shutdown_pending) {
 			clulog(LOG_NOTICE, "Processing delayed exit signal\n");
 			running = 0;
+			return;
 		}
 		setup_signal(SIGINT, flag_shutdown);
 		setup_signal(SIGTERM, flag_shutdown);
 		setup_signal(SIGHUP, flag_reconfigure);
 
+		/* Let things settle if we're booting multiple */
+		if (transition_throttling)
+			sleep(transition_throttling);
+
 		eval_groups(1, nodeID, 1);
 		return;
 	}
--- cluster/rgmanager/src/daemons/restree.c	2007/07/31 17:54:54	1.23.2.9
+++ cluster/rgmanager/src/daemons/restree.c	2007/08/02 14:46:51	1.23.2.10
@@ -665,8 +665,10 @@
 			}
 		}
 		/* No resource rule matching the child?  Press on... */
-		if (!flags)
+		if (!flags) {
+			free(ref);
 			continue;
+		}
 
 		flags = 0;
 		/* Don't descend on anything we should have already picked
@@ -686,11 +688,9 @@
 			break;
 		}
 
-		if (flags == 2) {
-			free(ref);
-			continue;
-		}
 		free(ref);
+		if (flags == 2)
+			continue;
 
 		x = 1;
 		switch(do_load_resource(ccsfd, tok, childrule, tree,
@@ -1035,12 +1035,21 @@
 	}
 
 	/* No check levels ready at the moment. */
-	if (idx == -1)
+	if (idx == -1) {
+		if (node->rn_checked)
+			return node->rn_last_status;
 		return 0;
+	}
 
-       node->rn_actions[idx].ra_last = now;
-	if ((x = res_exec(node, RS_STATUS, NULL,
-                         node->rn_actions[idx].ra_depth)) == 0)
+
+	node->rn_actions[idx].ra_last = now;
+	x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth);
+
+	node->rn_last_status = x;
+	node->rn_last_depth = node->rn_actions[idx].ra_depth;
+	node->rn_checked = 1;
+
+	if (x == 0)
 		return 0;
 
 	if (!has_recover)
@@ -1101,14 +1110,18 @@
 
 	now = res->r_started;
 
-       for (; node->rn_actions[x].ra_name; x++) {
+	for (; node->rn_actions[x].ra_name; x++) {
 
-               if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
-                   strcmp(node->rn_actions[x].ra_name, "status"))
+		if (strcmp(node->rn_actions[x].ra_name, "monitor") &&
+		    strcmp(node->rn_actions[x].ra_name, "status"))
 			continue;
 
-               node->rn_actions[x].ra_last = now;
+		node->rn_actions[x].ra_last = now;
 	}
+
+	node->rn_checked = 0;
+	node->rn_last_status = 0;
+	node->rn_last_depth = 0;
 }
 
 
--- cluster/rgmanager/src/daemons/rg_forward.c	2007/07/24 13:53:08	1.8.2.2
+++ cluster/rgmanager/src/daemons/rg_forward.c	2007/08/02 14:46:51	1.8.2.3
@@ -122,10 +122,12 @@
 				m = NULL;
 				continue;
 			}
-			goto out_fail;
+
+			if (ret == 0)
+				continue;
 		}
 		break;
-	} while(++retries < 60); /* old 60 second rule */
+	} while(++retries < 60); /* old 600 second rule */
 
 	swab_SmMessageSt(&msg);
 
--- cluster/rgmanager/src/daemons/rg_state.c	2007/07/24 13:53:08	1.24.2.11
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/08/02 14:46:51	1.24.2.12
@@ -35,6 +35,7 @@
 #include <ccs.h>
 #include <rg_queue.h>
 #include <msgsimple.h>
+#include <res-ocf.h>
 
 /* XXX - copied :( */
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
@@ -467,6 +468,7 @@
 	case RG_STATE_CHECK:
 	case RG_STATE_STARTING:
 	case RG_STATE_RECOVER:
+	case RG_STATE_MIGRATE:
 		if ((svcStatus->rs_owner != my_id()) &&
 		    memb_online(membership, svcStatus->rs_owner)) {
 			/*
@@ -583,6 +585,10 @@
 		       "#43: Service %s has failed; can not start.\n",
 		       svcName);
 		break;
+
+	case RG_STATE_MIGRATE:
+		ret = 4;
+		break;
 		
 	case RG_STATE_STOPPING:
 	case RG_STATE_STARTED:
@@ -892,16 +898,60 @@
        
 	ret = group_migrate(svcName, target);
 
-	if (ret == -1 || ret > 0) {
+	switch(ret) {
+	default:
+	case -1:
+	case OCF_RA_ERROR:
+		svc_fail(svcName);
 		/* XXX run svc_status again here to see if it's still
 		   healthy; if it is, don't FAIL it; it could be that
 		   the target node simply died; in this case, set status
 		   back to started */
-		/* if ret > 0 { svc_status... */
-		svc_fail(svcName);
+		return RG_EFAIL;
+		break;
+	case OCF_RA_NOT_RUNNING:
+		/* For these two, the VM was either not running or 
+		   migration is simply impossible. */
+		/* Don't mark the service as failed; since it's either
+		   recoverable or still running. */
 		ret = RG_EFAIL;
+		break;
+	case OCF_RA_NOT_CONFIGURED:
+		ret = RG_EINVAL;
+		break;
+	case 0:
+		return 0;
 	}
 
+	/* Ok, we've hit a recoverable condition.  Since VMs and migratory
+	   services are ... well, migratable, we can just flip the state
+	   back to 'started' and error checking will fix it later. */
+	if (rg_lock(svcName, &lockp) < 0) {
+		clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return ret;
+	}
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#46: Failed getting status for RG %s\n",
+		       svcName);
+		return ret;
+	}
+
+	if (svcStatus.rs_last_owner != my_id() ||
+	    svcStatus.rs_owner != target ||
+	    svcStatus.rs_state != RG_STATE_MIGRATE) {
+		rg_unlock(&lockp);
+		return ret;
+	}
+
+	svcStatus.rs_owner = my_id();
+	svcStatus.rs_state = RG_STATE_STARTED;
+
+	set_rg_state(svcName, &svcStatus);
+	rg_unlock(&lockp);
+
 	return ret;
 }
 
@@ -954,7 +1004,8 @@
 		}
 
 		msg_send(&ctx, &msgp, sizeof(msgp));
-		msg_receive(&ctx, &response, sizeof (response), 5);
+		if (msg_receive(&ctx, &response, sizeof (response), 5) != sizeof(response))
+			goto cont;;
 
 		swab_SmMessageSt(&response);
 		if (response.sm_data.d_ret == RG_SUCCESS)
@@ -962,6 +1013,7 @@
 		else
 			ret = -1;
 
+cont:
 		msg_close(&ctx);
 	}
 
@@ -1937,7 +1989,7 @@
 	allowed_nodes = member_list();
 
 	while (memb_count(allowed_nodes)) {
-		target = best_target_node(allowed_nodes, -1,
+		target = best_target_node(allowed_nodes, 0,
 		    			  svcName, 1);
 		if (target == me) {
 		      	ret = handle_start_remote_req(svcName, request);
@@ -1947,7 +1999,7 @@
 			ret = RG_EFAIL;
 			goto out;
 		} else {
-			ret = relocate_service(svcName, request, target);
+			ret = relocate_service(svcName, RG_START_REMOTE, target);
 		}
 
 		switch(ret) {
--- cluster/rgmanager/src/resources/vm.sh	2007/07/31 17:54:55	1.1.2.6
+++ cluster/rgmanager/src/resources/vm.sh	2007/08/02 14:46:52	1.1.2.7
@@ -22,6 +22,8 @@
 
 export PATH
 
+. $(dirname $0)/ocf-shellfuncs
+
 #
 # Virtual Machine start/stop script (requires the xm command)
 #
@@ -375,8 +377,22 @@
 migrate()
 {
 	declare target=$1
+	declare errstr rv
+	
+	err=$(xm migrate $OCF_RESKEY_name $target 2>&1 | head -1)
+	rv=$?
+
+	if [ $rv -ne 0 ]; then
+		if [ "$err" != "${err/does not exist/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - domain does not exist"
+			return $OCF_NOT_RUNNING
+		fi
+		if [ "$err" != "${err/Connection refused/}" ]; then
+			ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - connect refused"
+			return $OCF_ERR_CONFIGURED
+		fi
+	fi
 
-	xm migrate $OCF_RESKEY_name $target
 	return $?
 }
 



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-05-31 18:58 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-05-31 18:58 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-05-31 18:58:46

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c resrules.c restree.c 
	rgmanager/src/resources: script.sh 

Log message:
	Fix bugzilla #229650; implement __independent_subtree feature

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.9&r2=1.31.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.2&r2=1.15.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.5&r2=1.25.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.4&r2=1.16.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.3&r2=1.23.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1

--- cluster/rgmanager/ChangeLog	2007/05/31 18:38:44	1.31.2.9
+++ cluster/rgmanager/ChangeLog	2007/05/31 18:58:46	1.31.2.10
@@ -1,6 +1,8 @@
 2007-05-31 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
 	in /usr/share/cluster when processing resource rules
+	* src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: 
+	Implement independent subtrees, per bug #229650
 
 2007-05-22 Lon Hohberger <lhh@redhat.com>
 	* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h	2007/03/23 00:06:34	1.15.2.2
+++ cluster/rgmanager/include/reslist.h	2007/05/31 18:58:46	1.15.2.3
@@ -35,6 +35,8 @@
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
 #define RF_NEEDSTOP	(1<<3)  /** Used when deleting/changing resources */
 #define RF_COMMON	(1<<4)	/** " */
+#define RF_INDEPENDENT	(1<<5)  /** Define this for a resource if it is
+				  otherwise an independent subtree */
 
 #define RES_STOPPED	(0)
 #define RES_STARTED	(1)
@@ -56,10 +58,10 @@
 
 
 typedef struct _resource_attribute {
-	int	ra_flags;
-	/* XXX possible alignment problem on ia64 */
 	char	*ra_name;
 	char	*ra_value;
+	int	ra_flags;
+	int	_pad_;
 } resource_attr_t;
 
 
@@ -78,6 +80,7 @@
 	time_t	ra_last;
 	time_t	ra_interval;
 	int	ra_depth;
+	int 	_pad_;
 } resource_act_t;
 
 
--- cluster/rgmanager/src/daemons/groups.c	2007/05/10 16:23:43	1.25.2.5
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/31 18:58:46	1.25.2.6
@@ -813,6 +813,7 @@
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
+#if 0
 	/*
 	   Do NOT return error codes if we failed to stop for one of these
 	   reasons.  It didn't start, either, so it's safe to assume that
@@ -830,6 +831,7 @@
 			break;
 		}
 	}
+#endif
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:37:50	1.16.2.4
+++ cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:58:46	1.16.2.5
@@ -262,6 +262,7 @@
 		acts[0].ra_depth = depth;
 		acts[0].ra_timeout = timeout;
 		acts[0].ra_interval = interval;
+		acts[0].ra_last = 0;
 		acts[1].ra_name = NULL;
 
 		*actsp = acts;
@@ -271,7 +272,7 @@
 	for (x = 0; acts[x].ra_name; x++) {
 		if (!strcmp(acts[x].ra_name, name) &&
 		    (depth == acts[x].ra_depth || depth == -1)) {
-			printf("Replacing action '%s' depth %d: ",
+			fprintf(stderr, "Replacing action '%s' depth %d: ",
 			       name, acts[x].ra_depth);
 			if (timeout >= 0) {
 				printf("timeout: %d->%d ",
@@ -306,6 +307,7 @@
 	acts[x].ra_depth = depth;
 	acts[x].ra_timeout = timeout;
 	acts[x].ra_interval = interval;
+	acts[x].ra_last = 0;
 
 	acts[x+1].ra_name = NULL;
 
--- cluster/rgmanager/src/daemons/restree.c	2007/05/03 15:14:16	1.23.2.3
+++ cluster/rgmanager/src/daemons/restree.c	2007/05/31 18:58:46	1.23.2.4
@@ -39,6 +39,9 @@
 void malloc_zap_mutex(void);
 #endif
 
+#define FL_FAILURE	0x1
+#define FL_RECOVERABLE	0x2
+
 
 /* XXX from resrules.c */
 int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
 	node->rn_resource = curres;
 	node->rn_state = RES_STOPPED;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+	if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+	if (conf_get(tok, &ref) == 0) {
+#endif
+		if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+			node->rn_flags |= RF_INDEPENDENT;
+		free(ref);
+	}
+
+
 	curres->r_refs++;
 
 	*newnode = node;
@@ -718,7 +734,6 @@
 		    resource_rule_t **rulelist,
 		    resource_t **reslist)
 {
-	resource_rule_t *curr;
 	resource_node_t *root = NULL;
 	char tok[512];
 
@@ -777,6 +792,8 @@
 				printf("NEEDSTART ");
 			if (node->rn_flags & RF_COMMON)
 				printf("COMMON ");
+			if (node->rn_flags & RF_INDEPENDENT)
+				printf("INDEPENDENT ");
 			printf("]");
 		}
 		printf(" {\n");
@@ -838,10 +855,11 @@
 #endif
 
 			/* Do op on all children at our level */
-			rv += _res_op(&node->rn_child, first,
+			rv |= _res_op(&node->rn_child, first,
 			     	     rule->rr_childtypes[x].rc_name, 
 		     		     ret, op);
-			if (rv != 0 && op != RS_STOP)
+
+			if (rv & FL_FAILURE && op != RS_STOP)
 				return rv;
 		}
 
@@ -853,46 +871,6 @@
 }
 
 
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
-			void *ret, int op)
-{
-	resource_node_t *node = *tree;
-	resource_t *res = node->rn_resource;
-	resource_rule_t *rule = res->r_rule;
-	int x, rv = 0, lev;
-
-	for (x = 0; rule->rr_childtypes &&
-	     rule->rr_childtypes[x].rc_name; x++) {
-
-		if(op == RS_STOP)
-			lev = rule->rr_childtypes[x].rc_stoplevel;
-		else
-			lev = rule->rr_childtypes[x].rc_startlevel;
-
-		if (lev)
-			continue;
-
-		/*
-		printf("%s children of %s type %s (default level)\n",
-		       agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       rule->rr_childtypes[x].rc_name);
-		 */
-
-		rv = _res_op(&node->rn_child, first,
-			     rule->rr_childtypes[x].rc_name, 
-			     ret, op);
-		if (rv != 0)
-			return rv;
-	}
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _xx_child_internal(resource_node_t *node, resource_t *first,
 		   resource_node_t *child, void *ret, int op)
@@ -926,13 +904,14 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		list_for(&node->rn_child, child, y) {
-			rv = _xx_child_internal(node, first, child, ret, op);
-			if (rv)
+			rv |= _xx_child_internal(node, first, child, ret, op);
+
+			if (rv & FL_FAILURE)
 				return rv;
 		}
 	} else {
 		list_for_rev(&node->rn_child, child, y) {
-			rv += _xx_child_internal(node, first, child, ret, op);
+			rv |= _xx_child_internal(node, first, child, ret, op);
 		}
 	}
 
@@ -973,7 +952,7 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		rv =  _do_child_levels(tree, first, ret, op);
-	       	if (rv != 0)
+	       	if (rv & FL_FAILURE)
 			return rv;
 
 		/* Start default level after specified ones */
@@ -992,6 +971,22 @@
 }
 
 
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+	int x;
+	resource_node_t *child;
+
+	list_for(&node->rn_child, child, x) {
+		if (child->rn_child)
+			mark_nodes(child->rn_child, state, flags);
+	}
+
+	node->rn_state = state;
+	node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
 /**
    Do a status on a resource node.  This takes into account the last time the
    status operation was run and selects the highest possible resource depth
@@ -1123,130 +1118,6 @@
 			in the subtree).
    @see			_res_op_by_level res_exec
  */
-#if 0
-int
-_res_op(resource_node_t **tree, resource_t *first,
-	char *type, void * __attribute__((unused))ret, int realop)
-{
-	int rv, me;
-	resource_node_t *node;
-	int op;
-
-	list_do(tree, node) {
-
-		/* Restore default operation. */
-		op = realop;
-
-		/* If we're starting by type, do that funky thing. */
-		if (type && strlen(type) &&
-		    strcmp(node->rn_resource->r_rule->rr_type, type))
-			continue;
-
-		/* If the resource is found, all nodes in the subtree must
-		   have the operation performed as well. */
-		me = !first || (node->rn_resource == first);
-
-		/*
-		printf("begin %s: %s %s [0x%x]\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource),
-		       node->rn_flags);
-		 */
-
-		if (me) {
-			/*
-			   If we've been marked as a node which
-			   needs to be started or stopped, clear
-			   that flag and start/stop this resource
-			   and all resource babies.
-
-			   Otherwise, don't do anything; look for
-			   children with RF_NEEDSTART and
-			   RF_NEEDSTOP flags.
-
-			   CONDSTART and CONDSTOP are no-ops if
-			   the appropriate flag is not set.
-			 */
-		       	if ((op == RS_CONDSTART) &&
-			    (node->rn_flags & RF_NEEDSTART)) {
-				/*
-				printf("Node %s:%s - CONDSTART\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_START;
-			}
-
-			if ((op == RS_CONDSTOP) &&
-			    (node->rn_flags & RF_NEEDSTOP)) {
-				/*
-				printf("Node %s:%s - CONDSTOP\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_STOP;
-			}
-		}
-
-		/* Start starts before children */
-		if (me && (op == RS_START)) {
-			node->rn_flags &= ~RF_NEEDSTART;
-
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			set_time("start", 0, node);
-			clear_checks(node);
-
-			if (node->rn_state != RES_STARTED) {
-				++node->rn_resource->r_incarnations;
-				node->rn_state = RES_STARTED;
-			}
-		}
-
-		if (node->rn_child) {
-			rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-			if (rv != 0)
-				return rv;
-		}
-
-		/* Stop/status/etc stops after children have stopped */
-		if (me && (op == RS_STOP)) {
-			node->rn_flags &= ~RF_NEEDSTOP;
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			if (node->rn_state != RES_STOPPED) {
-				--node->rn_resource->r_incarnations;
-				node->rn_state = RES_STOPPED;
-			}
-
-		} else if (me && (op == RS_STATUS)) {
-
-			rv = do_status(node);
-			if (rv != 0)
-				return rv;
-		}
-
-		/*
-		printf("end %s: %s %s\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource));
-		 */
-	} while (!list_done(tree, node));
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _res_op_internal(resource_node_t **tree, resource_t *first,
 		 char *type, void *__attribute__((unused))ret, int realop,
@@ -1309,7 +1180,7 @@
 		rv = res_exec(node, agent_op_str(op), NULL, 0);
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		set_time("start", 0, node);
@@ -1322,14 +1193,43 @@
 	} else if (me && (op == RS_STATUS)) {
 		/* Check status before children*/
 		rv = do_status(node);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			/*
+			   If this node's status has failed, all of its
+			   dependent children are failed, whether or not this
+			   node is independent or not.
+			 */
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If we're an independent subtree, return a flag
+			   stating that this section is recoverable apart
+			   from siblings in the resource tree.  All child
+			   resources of this node must be restarted,
+			   but siblings of this node are not affected. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
+
 	}
 
 	if (node->rn_child) {
 		rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If this node is independent of its siblings,
+			   that one of its dependent children failed
+			   does not matter: its dependent children must
+			   also be independent of this node's siblings. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
 	}
 
 	/* Stop should occur after children have stopped */
@@ -1339,7 +1239,7 @@
 
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		if (node->rn_state != RES_STOPPED) {
@@ -1378,24 +1278,31 @@
 	char *type, void * __attribute__((unused))ret, int realop)
 {
   	resource_node_t *node;
- 	int count = 0, rv;
+ 	int count = 0, rv = 0;
  	
  	if (realop == RS_STOP) {
  		list_for_rev(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
- 				return rv;
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
  		}
  	} else {
  		list_for(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
+
+			/* If we hit a problem during a 'status' op in an
+			   independent subtree, rv will have the
+			   FL_RECOVERABLE bit set, but not FL_FAILURE.
+			   If we ever hit FL_FAILURE during a status
+			   operation, we're *DONE* - even if the subtree
+			   is flagged w/ indy-subtree */
+			  
+ 			if (rv & FL_FAILURE) 
  				return rv;
  		}
  	}
-	return 0;
+
+	return rv;
 }
 
 /**
@@ -1464,7 +1371,30 @@
 int
 res_status(resource_node_t **tree, resource_t *res, void *ret)
 {
-	return _res_op(tree, res, NULL, ret, RS_STATUS);
+	int rv;
+	rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+	if (rv & FL_FAILURE)
+		return rv;
+
+	clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+	       "Attempting inline recovery\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+
+	rv = res_condstop(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+	rv = res_condstart(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+
+	clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 0;
+out_fail:
+	clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 1;
 }
 
 
--- cluster/rgmanager/src/resources/script.sh	2006/08/18 15:26:23	1.8
+++ cluster/rgmanager/src/resources/script.sh	2007/05/31 18:58:46	1.8.2.1
@@ -115,5 +115,5 @@
 declare -i rv=$?
 if [ $rv -ne 0 ]; then
 	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
-	return $OCF_ERR_GENERIC
+	exit $OCF_ERR_GENERIC
 fi



^ permalink raw reply	[flat|nested] 7+ messages in thread
* [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
@ 2007-05-03 15:02 lhh
  0 siblings, 0 replies; 7+ messages in thread
From: lhh @ 2007-05-03 15:02 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	lhh at sourceware.org	2007-05-03 15:02:48

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/make : defines.mk.input 
	rgmanager/src/clulib: Makefile alloc.c clulog.c msgsimple.c 
	                      vft.c 
	rgmanager/src/daemons: groups.c rg_state.c 
	rgmanager/src/daemons/tests: delta-test004-test005.expected 
	                             delta-test005-test006.expected 
	                             delta-test006-test007.expected 
	                             delta-test007-test008.expected 
	                             delta-test008-test009.expected 
	                             delta-test009-test010.expected 
	                             delta-test010-test011.expected 
	                             delta-test011-test012.expected 
	                             delta-test012-test013.expected 
	                             delta-test013-test014.expected 
	                             delta-test014-test015.expected 
	                             delta-test015-test016.expected 
	                             delta-test016-test017.expected 
	                             test005.expected test006.expected 
	                             test007.expected test008.expected 
	                             test009.expected test010.expected 
	                             test011.expected test012.expected 
	                             test013.expected test014.expected 
	                             test015.expected test016.expected 
	                             test017.expected 
	rgmanager/src/resources: fs.sh nfsclient.sh nfsexport.sh 
	rgmanager/src/utils: clulog.c 
Added files:
	rgmanager/src/clulib: wrap_lock.c 

Log message:
	2007-05-03 Lon Hohberger <lhh@redhat.com>
	* Merge patch from Crosswalk development team:
	* Scott Cannata
	* Henry Harris
	* Leonard Maiorani
	* src/daemons/groups.c, rg_state.c: Apply patch from Andrey
	Mirkin to fix bug #237144; prevents exclusive services from
	being accidentally (or intentionally) being started on the
	same node
	* src/daemons/restree.c: Don't print (null) when an attr
	isn't inherited
	* src/daemons/reslist.c: Try all direct ancestors while
	performing run-time inheritance resolution #231521
	* src/daemons/tests/*: Clean up test cases; don't look for (null)
	any more

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.24&r2=1.5.2.25
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.8.2.6&r2=1.8.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/make/defines.mk.input.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3&r2=1.3.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/wrap_lock.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.2&r2=1.4.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/alloc.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.4&r2=1.3.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.3&r2=1.2.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msgsimple.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.1&r2=1.4.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.7&r2=1.7.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.8.2.18&r2=1.8.2.19
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.19&r2=1.4.2.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test004-test005.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test005-test006.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test006-test007.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test007-test008.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test008-test009.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test009-test010.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test010-test011.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test011-test012.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test012-test013.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test013-test014.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test014-test015.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test015-test016.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/delta-test016-test017.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test005.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test006.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test007.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test008.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test009.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test010.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test011.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test012.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test013.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test014.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test015.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test016.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/tests/test017.expected.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.21&r2=1.4.2.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.11&r2=1.3.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsexport.sh.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.4.2.5&r2=1.4.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clulog.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3

--- cluster/rgmanager/ChangeLog	2006/11/03 16:26:18	1.5.2.24
+++ cluster/rgmanager/ChangeLog	2007/05/03 15:02:45	1.5.2.25
@@ -1,3 +1,21 @@
+2007-05-03 Lon Hohberger <lhh@redhat.com>
+	* Merge patch from Crosswalk development team:
+		* Scott Cannata
+		* Henry Harris
+		* Leonard Maiorani
+	* src/daemons/groups.c, rg_state.c: Apply patch from Andrey
+	Mirkin to fix bug #237144; prevents exclusive services from
+	being accidentally (or intentionally) being started on the
+	same node
+	* src/daemons/restree.c: Don't print (null) when an attr
+	isn't inherited
+	* src/daemons/reslist.c: Try all direct ancestors while 
+	performing run-time inheritance resolution #231521
+	* src/daemons/tests/*: Clean up test cases; don't look for (null)
+	any more
+
+=====
+
 2006-11-03 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/restree.c: Merge patch from Jeff Layton to instrument
 	res_exec more closely. (#213246)
--- cluster/rgmanager/include/reslist.h	2007/03/20 19:40:06	1.8.2.6
+++ cluster/rgmanager/include/reslist.h	2007/05/03 15:02:46	1.8.2.7
@@ -25,11 +25,6 @@
 #include <libxml/xpath.h>
 
 
-#define RA_PRIMARY	(1<<0)
-#define RA_UNIQUE	(1<<1)
-#define RA_REQUIRED	(1<<2)
-#define RA_INHERIT	(1<<3)
-
 #define RF_INLINE	(1<<0)
 #define RF_DEFINED	(1<<1)
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
@@ -70,11 +65,24 @@
 #include <res-ocf.h>
 
 
+typedef enum {
+/*
+#define RA_PRIMARY	(1<<0)
+#define RA_UNIQUE	(1<<1)
+#define RA_REQUIRED	(1<<2)
+#define RA_INHERIT	(1<<3)
+ */
+	RA_PRIMARY = (1<<0),
+	RA_UNIQUE  = (1<<1),
+	RA_REQUIRED= (1<<2),
+	RA_INHERIT = (1<<3),
+	RA_SPEC    = (1<<4)
+} ra_flag_t;
+
 typedef struct _resource_attribute {
-	int	ra_flags;
-	/* XXX possible alignment problem on ia64 */
 	char	*ra_name;
 	char	*ra_value;
+	ra_flag_t ra_flags;
 } resource_attr_t;
 
 
--- cluster/rgmanager/make/Attic/defines.mk.input	2004/10/27 20:26:22	1.3
+++ cluster/rgmanager/make/Attic/defines.mk.input	2007/05/03 15:02:46	1.3.2.1
@@ -32,3 +32,11 @@
 
 # Default CFLAGS
 CFLAGS += -DSHAREDIR=\"@SHAREDIR@\" -Wall ${INCLUDE}
+
+#
+# Wrappers around pthread_mutex / pthread_rwlock calls for deadlock
+# detection (and other things)
+#
+#CFLAGS += -DSHAREDIR=\"/usr/share/cluster\" -Wall ${INCLUDE} -DWRAP_LOCKS -DMAX_DISPATCH_RETRIES=3
+#LDFLAGS += -Wl,-wrap,pthread_mutex_lock,-wrap,pthread_mutex_unlock,-wrap,pthread_rwlock_rdlock,-wrap,pthread_rwlock_wrlock,-wrap,pthread_rwlock_unlock
+
/cvs/cluster/cluster/rgmanager/src/clulib/wrap_lock.c,v  -->  standard output
revision 1.1.2.1
--- cluster/rgmanager/src/clulib/wrap_lock.c
+++ -	2007-05-03 15:02:50.265394000 +0000
@@ -0,0 +1,224 @@
+/*
+  Copyright Red Hat, Inc. 2007
+  Copyright Crosswalk 2006-2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#ifdef WRAP_LOCKS
+#include <stdio.h>
+#include <sys/types.h>
+#include <gettid.h>
+#include <pthread.h>
+#include <string.h>
+#include <errno.h>
+#include <string.h>
+#include <signal.h>
+
+int __real_pthread_mutex_lock(pthread_mutex_t *lock);
+int
+__wrap_pthread_mutex_lock(pthread_mutex_t *lock)
+{
+	int status;
+	struct timespec delay;
+
+	while (1) {
+		status = __real_pthread_mutex_lock(lock);
+
+		switch(status) {
+		case EDEADLK:
+			/* Already own it: Note the error, but continue */
+			fprintf(stderr, "[%d] %s(%p): %s; continuing\n",
+				gettid(),
+				__FUNCTION__, lock, strerror(status));
+			/* deliberate fallthrough */
+		case 0:
+			return 0;
+		case EBUSY:
+			/* Try again */
+			break;
+		default:
+			/* Other return codes */
+			fprintf(stderr, "[%d] %s(%p): %s\n", gettid(),
+				__FUNCTION__, lock, strerror(status));
+			raise(SIGSEGV);
+			/* EINVAL? */
+			return 0;
+		}
+
+		delay.tv_sec = 0;
+		delay.tv_nsec = 100000;
+		nanosleep(&delay, NULL);
+	}
+
+	/* Not reached */
+	return 0;
+}
+
+
+int __real_pthread_mutex_unlock(pthread_mutex_t *lock);
+int
+__wrap_pthread_mutex_unlock(pthread_mutex_t *lock)
+{
+	int status;
+	struct timespec delay;
+
+	while (1) {
+		status = __real_pthread_mutex_unlock(lock);
+
+		switch(status) {
+		case EPERM:
+			/* Don't own it: Note the error, but continue */
+			fprintf(stderr, "[%d] %s(%p): %s; continuing\n",
+				gettid(),
+				__FUNCTION__, lock, strerror(status));
+			/* deliberate fallthrough */
+		case 0:
+			return 0;
+		default:
+			fprintf(stderr, "[%d] %s(%p): %s\n", gettid(),
+				__FUNCTION__, lock, strerror(status));
+			raise(SIGSEGV);
+			return 0;
+		}
+
+		delay.tv_sec = 0;
+		delay.tv_nsec = 100000;
+		nanosleep(&delay, NULL);
+	}
+
+	/* Not reached */
+	return 0;
+}
+
+
+int __real_pthread_rwlock_rdlock(pthread_rwlock_t *lock);
+int
+__wrap_pthread_rwlock_rdlock(pthread_rwlock_t *lock)
+{
+	int status;
+	struct timespec delay;
+
+	while (1) {
+		status = __real_pthread_rwlock_rdlock(lock);
+
+		switch(status) {
+		case EDEADLK:
+			/* Already own it: Note the error, but continue */
+			fprintf(stderr, "[%d] %s(%p): %s; continuing\n",
+				gettid(),
+				__FUNCTION__, lock, strerror(status));
+			/* deliberate fallthrough */
+		case 0:
+			return 0;
+		case EBUSY:
+			/* Try again */
+			break;
+		default:
+			/* Other return codes */
+			fprintf(stderr, "[%d] %s(%p): %s\n", gettid(),
+				__FUNCTION__, lock, strerror(status));
+			raise(SIGSEGV);
+			/* EINVAL? */
+			return 0;
+		}
+
+		delay.tv_sec = 0;
+		delay.tv_nsec = 100000;
+		nanosleep(&delay, NULL);
+	}
+
+	/* Not reached */
+	return 0;
+}
+
+
+int __real_pthread_rwlock_wrlock(pthread_rwlock_t *lock);
+int
+__wrap_pthread_rwlock_wrlock(pthread_rwlock_t *lock)
+{
+	int status;
+	struct timespec delay;
+
+	while (1) {
+		status = __real_pthread_rwlock_wrlock(lock);
+
+		switch(status) {
+		case EDEADLK:
+			/* Already own it: Note the error, but continue */
+			fprintf(stderr, "[%d] %s(%p): %s; continuing\n",
+				gettid(),
+				__FUNCTION__, lock, strerror(status));
+			/* deliberate fallthrough */
+		case 0:
+			return 0;
+		case EBUSY:
+			/* Try again */
+			break;
+		default:
+			/* Other return codes */
+			fprintf(stderr, "[%d] %s(%p): %s\n", gettid(),
+				__FUNCTION__, lock, strerror(status));
+			raise(SIGSEGV);
+			/* EINVAL? */
+			return 0;
+		}
+
+		delay.tv_sec = 0;
+		delay.tv_nsec = 100000;
+		nanosleep(&delay, NULL);
+	}
+
+	/* Not reached */
+	return 0;
+}
+
+
+int __real_pthread_rwlock_unlock(pthread_rwlock_t *lock);
+int
+__wrap_pthread_rwlock_unlock(pthread_rwlock_t *lock)
+{
+	int status;
+	struct timespec delay;
+
+	while (1) {
+		status = __real_pthread_rwlock_unlock(lock);
+
+		switch(status) {
+		case EPERM:
+			/* Don't own it: Note the error, but continue */
+			fprintf(stderr, "[%d] %s(%p): %s; continuing\n",
+				gettid(),
+				__FUNCTION__, lock, strerror(status));
+			/* deliberate fallthrough */
+		case 0:
+			return 0;
+		default:
+			fprintf(stderr, "[%d] %s(%p): %s\n", gettid(),
+				__FUNCTION__, lock, strerror(status));
+			raise(SIGSEGV);
+			return 0;
+		}
+
+		delay.tv_sec = 0;
+		delay.tv_nsec = 100000;
+		nanosleep(&delay, NULL);
+	}
+
+	/* Not reached */
+	return 0;
+}
+#endif
+
--- cluster/rgmanager/src/clulib/Makefile	2005/01/25 17:22:42	1.4.2.2
+++ cluster/rgmanager/src/clulib/Makefile	2007/05/03 15:02:46	1.4.2.3
@@ -30,7 +30,7 @@
 uninstall:
 
 libclulib.a: clulog.o daemon_init.o signals.o msgsimple.o \
-		vft.o gettid.o rg_strings.o
+		vft.o gettid.o rg_strings.o wrap_lock.o
 	${AR} cru $@ $^
 	ranlib $@
 
--- cluster/rgmanager/src/clulib/alloc.c	2006/01/20 16:25:24	1.3.2.4
+++ cluster/rgmanager/src/clulib/alloc.c	2007/05/03 15:02:46	1.3.2.5
@@ -155,7 +155,11 @@
 
 #ifndef NOPTHREADS
 #include <pthread.h>
+#ifdef WRAP_LOCKS
+static pthread_mutex_t _alloc_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
 static pthread_mutex_t _alloc_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif /* WRAP_LOCKS */
 #else
 #define pthread_mutex_trylock(x) (0)
 #define pthread_mutex_lock(x)
--- cluster/rgmanager/src/clulib/clulog.c	2006/10/05 17:52:27	1.2.2.3
+++ cluster/rgmanager/src/clulib/clulog.c	2007/05/03 15:02:46	1.2.2.4
@@ -20,7 +20,7 @@
 /** @file
  * Library routines for communicating with the logging daemon.
  *
- *  $Id: clulog.c,v 1.2.2.3 2006/10/05 17:52:27 lhh Exp $
+ *  $Id: clulog.c,v 1.2.2.4 2007/05/03 15:02:46 lhh Exp $
  *
  *  Author: Jeff Moyer <moyer@missioncriticallinux.com>
  */
@@ -50,7 +50,7 @@
 #include <string.h>
 
 
-static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.3 $";
+static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.4 $";
 
 #ifdef DEBUG
 #include <assert.h>
@@ -70,7 +70,12 @@
 static int   syslog_facility = LOG_DAEMON;
 static char  *daemon_name = NULL;
 static pid_t daemon_pid = -1;
+
+#ifdef WRAP_LOCKS
+static pthread_mutex_t log_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
 static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
 
 CODE logger_prioritynames[] = 
 { {"emerg", LOG_EMERG},
--- cluster/rgmanager/src/clulib/msgsimple.c	2006/05/12 21:28:31	1.4.2.1
+++ cluster/rgmanager/src/clulib/msgsimple.c	2007/05/03 15:02:46	1.4.2.2
@@ -51,6 +51,8 @@
 int
 msg_send_simple(int fd, int cmd, int arg1, int arg2)
 {
+	int       status;
+	int       my_errno;
 	generic_msg_hdr msg;
 
 	msg.gh_magic = GENERIC_HDR_MAGIC;
@@ -60,7 +62,25 @@
 	msg.gh_arg2 = arg2;
 	swab_generic_msg_hdr(&msg);
 
-	return msg_send(fd, (void *) &msg, sizeof (msg));
+	my_errno = 0;
+	status = msg_send(fd, (void *) &msg, sizeof (msg));
+	my_errno = errno;
+
+	if (status <= 0)
+	{
+		/*
+		 *  Should never be the case since msg_send() is 
+		 *  wrapped in a retry loop. Give one more try
+		 *  calling msg_send() for a few errnos, others, return
+		 *  error as they cannot and should not be retried.
+		 */
+		if ((my_errno == EINTR) || (my_errno == EAGAIN) ||
+		    (my_errno == ENOSPC)) {
+			status = msg_send(fd, (void *) &msg, sizeof (msg));
+		}
+	}
+
+	return (status);
 }
 
 
@@ -95,7 +115,7 @@
 			fprintf(stderr, "fd%d peek: %d/%d bytes\n", fd,
 			       ret, (int)sizeof (generic_msg_hdr));
 		else if (ret == 0)
-			errno = EAGAIN;
+			errno = ECONNRESET;
 		return -1;
 	}
 
@@ -110,7 +130,6 @@
 	 * allocate enough memory to receive the header + diff buffer
 	 */
 	*buf = malloc(peek_msg.gh_length);
-	memset(*buf, 0, peek_msg.gh_length);
 
 	if (!*buf) {
 		fprintf(stderr, "%s: malloc: %s", __FUNCTION__,
@@ -118,6 +137,8 @@
 		return -1;
 	}
 
+	memset(*buf, 0, peek_msg.gh_length);
+
 	/*
 	 * Now, do the real receive.  2 second timeout, if none specified.
 	 */
--- cluster/rgmanager/src/clulib/vft.c	2006/12/13 18:19:56	1.7.2.7
+++ cluster/rgmanager/src/clulib/vft.c	2007/05/03 15:02:46	1.7.2.8
@@ -54,8 +54,13 @@
  * TODO: We could make it thread safe, but this might be unnecessary work
  * Solution: Super-coarse-grained-bad-code-locking!
  */
+#ifdef WRAP_LOCKS
+static pthread_mutex_t key_list_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+static pthread_mutex_t vf_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
 static pthread_mutex_t key_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t vf_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif /* WRAP_LOCKS */
 static pthread_t vf_thread = (pthread_t)-1;
 static int thread_ready = 0;
 static vf_vote_cb_t default_vote_cb = NULL;
@@ -332,6 +337,7 @@
 	struct timeval tv;
 	fd_set rfds;
 	int nready, x;
+	int  myerrno = 0;
 
 	/* Set up for the select */
 	tv.tv_sec = timeout;
@@ -351,12 +357,30 @@
 			FD_SET(peer_fds[x], &rfds);
 
 		nready = select(MAX_FDS, &rfds, NULL, NULL, &tv);
-		if (nready <= -1) {
-			if (nready == 0)
-				printf("VF Abort: Timed out!\n");
-			else
-				printf("VF Abort: %s\n",
-				       strerror(errno));
+
+		/*
+		 *  fix, rhat erroneous check of nready==0 and
+		 *  not retrying on EINTR.
+		 */
+		if (nready < 0) 
+		{
+			myerrno = errno;
+			fprintf(stderr, "VF Abort: %s\n", strerror(myerrno));
+
+			if (myerrno == EINTR) {
+				tv.tv_sec = timeout;
+				tv.tv_usec = 0;
+				continue;
+			}
+
+			errno = myerrno;
+			return 0;
+		}
+
+		if (nready == 0) {
+			fprintf(stderr, 
+				"VF Abort: Timed out!\n");
+
 			return 0;
 		}
 
@@ -768,8 +792,13 @@
 	if (!fp)
 		return -1;
 
+#if defined(__sparc__) || defined(__hppa__) || defined(__sparc64__) || defined (__hppa64__)
+	rv = fscanf(fp,"%ld.%d %ld.%d\n", &tv->tv_sec, &tv->tv_usec,
+		    &junk.tv_sec, &junk.tv_usec);
+#else
 	rv = fscanf(fp,"%ld.%ld %ld.%ld\n", &tv->tv_sec, &tv->tv_usec,
 		    &junk.tv_sec, &junk.tv_usec);
+#endif
 	fclose(fp);
 	
 	if (rv != 4) {
@@ -830,6 +859,15 @@
 		free(key_node->kn_data);
 	key_node->kn_datalen = vnp->vn_datalen;
 	key_node->kn_data = malloc(vnp->vn_datalen);
+
+	/*
+	 *   Need to check return of malloc always
+	 */
+	if (key_node->kn_data == NULL) {
+		fprintf (stderr, "malloc fail err=%d\n", errno);
+		return -1;
+	}
+
 	memcpy(key_node->kn_data, vnp->vn_data, vnp->vn_datalen);
 
 	free(vnp);
@@ -844,15 +882,42 @@
 	struct timeval tv;
 	fd_set rfds;
 	generic_msg_hdr *hdrp = NULL;
+	int   myerrno = 0;
 
-	FD_ZERO(&rfds);
-	max = msg_fill_fdset(&rfds, MSG_ALL, MSGP_VFS);
+	while (1) {
+		FD_ZERO(&rfds);
+		max = msg_fill_fdset(&rfds, MSG_ALL, MSGP_VFS);
 
-	tv.tv_sec = 1;
-	tv.tv_usec = 0;
-	nready = select(max + 1, &rfds, NULL, NULL, &tv);
-	if (nready <= 0)
-		return;
+		tv.tv_sec = 1;
+		tv.tv_usec = 0;
+
+		nready = select(max + 1, &rfds, NULL, NULL, &tv);
+		if (nready < 0)
+		{
+			myerrno = errno;
+#ifdef NOT_NOW
+			/*
+			 *  i think red hat wants to return w/ EINTR
+			 */
+			if (myerrno == EINTR)
+			{
+				continue;
+			}
+#endif /* NOT_NOW */
+
+			errno = myerrno;
+			return;
+		}
+
+		if (nready == 0) {
+			return;
+		}
+
+		/*
+		 *  positive return value, break and process.
+		 */
+		break;
+	}
 
 	while (nready) {
 		fd = msg_next_fd(&rfds);
@@ -970,6 +1035,12 @@
 	}
 
 	va = malloc(sizeof(*va));
+
+	if (va == NULL) {
+		fprintf (stderr, "vf_init: malloc fail2 err=%d\n", errno);
+		return -1;
+	}
+
 	va->local_node_id = my_node_id;
 	va->port = my_port;
 
@@ -1071,6 +1142,13 @@
 	}
 
 	newnode = malloc(sizeof(*newnode));
+
+	if (newnode == NULL) {
+		fprintf(stderr, "malloc fail3 err=%d\n", errno);
+		pthread_mutex_unlock(&key_list_mutex);
+		return -1;
+	}
+
 	newnode->kn_data = NULL;
 	memset(newnode,0,sizeof(*newnode));
 	newnode->kn_keyid = strdup(keyid);
@@ -1178,6 +1256,8 @@
 	void *lockp = NULL;
 	int l;
 	char lock_name[256];
+	int my_status;
+	int ret_status = 0;
 
 	if (!data || !datalen || !keyid || !strlen(keyid) || !membership)
 		return -1;
@@ -1187,7 +1267,6 @@
 	snprintf(lock_name, sizeof(lock_name), "usrm::vf");
 	l = clu_lock(lock_name, CLK_EX, &lockp);
 	if (l < 0) {
-		clu_unlock(lock_name, lockp);
 		pthread_mutex_unlock(&vf_mutex);
 		return l;
 	}
@@ -1196,6 +1275,7 @@
 	count = sizeof(int) * (membership->cml_count + 1);
 	peer_fds = malloc(count);
 	if(!peer_fds) {
+		clu_unlock(lock_name, lockp);
 		pthread_mutex_unlock(&vf_mutex);
 		return -1;
 	}
@@ -1240,13 +1320,13 @@
 				goto retry_top;
 			if (flags & VFF_IGN_CONN_ERRORS)
 				continue;
+			close_all(peer_fds);
 			free(peer_fds);
 
 			clu_unlock(lock_name, lockp);
 			pthread_mutex_unlock(&vf_mutex);
 			return -1;
 		}
-
 		++y;
 	}
 
@@ -1289,19 +1369,30 @@
 	 */
 	for (x = 0; peer_fds[x] != -1; x++) {
 
-		if (msg_send(peer_fds[x], join_view, totallen) != totallen) {
-			vf_send_abort(peer_fds);
-			close_all(peer_fds);
-
-			free(join_view);
-			clu_unlock(lock_name, lockp);
-			pthread_mutex_unlock(&vf_mutex);
-			return -1;
-		} 
+		/*
+		 *  Still send msg to everyone, but then close
+		 *  all peers fds and cleanup - TBD JSC
+		 */
+		my_status = msg_send(peer_fds[x], join_view, totallen);
+		if (my_status != totallen) {
+			ret_status = -1;
+		}
 
 		remain++;
 	}
 
+	/*
+	 *  Now cleanup
+	 */
+	if (ret_status == -1) {
+		vf_send_abort(peer_fds);
+		close_all(peer_fds);
+		free(join_view);
+		clu_unlock(lock_name, lockp);
+		pthread_mutex_unlock(&vf_mutex);
+		return -1;
+	}
+
 #ifdef DEBUG
 	printf("VF: Checking for consensus...\n");
 #endif
@@ -1524,7 +1615,6 @@
 	snprintf(lock_name, sizeof(lock_name), "usrm::vf");
 	l = clu_lock(lock_name, CLK_EX, &lockp);
 	if (l < 0) {
-		clu_unlock(lock_name, lockp);
 		pthread_mutex_unlock(&vf_mutex);
 		printf("Couldn't lock %s\n", keyid);
 		return l;
--- cluster/rgmanager/src/daemons/groups.c	2006/11/27 22:21:30	1.8.2.18
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/03 15:02:47	1.8.2.19
@@ -22,6 +22,7 @@
 #include <magma.h>
 #include <magmamsg.h>
 #include <resgroup.h>
+#include <reslist.h>
 #include <vf.h>
 #include <magma.h>
 #include <ccs.h>
@@ -132,6 +133,106 @@
 }
 
 
+int get_rg_state_local(char *, rg_state_t *);
+int
+count_resource_groups_local(cluster_member_t *mp)
+{
+	resource_t *res;
+	char *rgname, *val;
+	rg_state_t st;
+
+	mp->cm_svccount = 0;
+	mp->cm_svcexcl = 0;
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	list_do(&_resources, res) {
+		if (res->r_rule->rr_root == 0)
+			continue;
+
+		rgname = res->r_attrs[0].ra_value;
+
+		if (get_rg_state_local(rgname, &st) < 0) {
+			continue;
+		}
+
+		if (st.rs_state != RG_STATE_STARTED &&
+		     st.rs_state != RG_STATE_STARTING)
+			continue;
+
+		if (mp->cm_id != st.rs_owner)
+			continue;
+
+		++mp->cm_svccount;
+
+		val = res_attr_value(res, "exclusive");
+		if (val && ((!strcmp(val, "yes") ||
+				     (atoi(val)>0))) ) {
+			++mp->cm_svcexcl;
+		}
+
+	} while (!list_done(&_resources, res));
+
+	pthread_rwlock_unlock(&resource_lock);
+
+	return 0;
+}
+
+
+int
+have_exclusive_resources()
+{
+	resource_t *res;
+	char *val;
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	list_do(&_resources, res) {
+		val = res_attr_value(res, "exclusive");
+		if (val && ((!strcmp(val, "yes") ||
+				     (atoi(val)>0))) ) {
+			pthread_rwlock_unlock(&resource_lock);
+			return 1;
+		}
+
+	} while (!list_done(&_resources, res));
+
+	pthread_rwlock_unlock(&resource_lock);
+
+	return 0;
+}
+
+
+int
+check_exclusive_resources(cluster_member_list_t *membership, char *svcName)
+{
+	cluster_member_t *mp;
+	int exclusive, count; 
+	resource_t *res;
+	char *val;
+
+	mp = memb_id_to_p(membership, my_id());
+	assert(mp);
+	count_resource_groups_local(mp);
+	exclusive = mp->cm_svcexcl;
+	count = mp->cm_svccount;
+	pthread_rwlock_rdlock(&resource_lock);
+	res = find_root_by_ref(&_resources, svcName);
+	if (!res) {
+		pthread_rwlock_unlock(&resource_lock);
+		return FAIL;
+	}
+	val = res_attr_value(res, "exclusive");
+	pthread_rwlock_unlock(&resource_lock);
+	if (exclusive || (count && val && 
+			(!strcmp(val, "yes") || (atoi(val)>0)))) {
+		return 1;
+	}
+
+	return 0;
+}
+
+
 /**
    Find the best target node for a service *besides* the current service
    owner.  Takes into account:
@@ -576,7 +677,6 @@
   @param rgname		Resource group name whose state we want to send.
   @see send_rg_states
  */
-int get_rg_state_local(char *, rg_state_t *);
 void
 send_rg_state(int fd, char *rgname, int fast)
 {
--- cluster/rgmanager/src/daemons/rg_state.c	2007/02/20 19:52:36	1.4.2.19
+++ cluster/rgmanager/src/daemons/rg_state.c	2007/05/03 15:02:47	1.4.2.20
@@ -41,6 +41,8 @@
 int set_rg_state(char *servicename, rg_state_t *svcblk);
 int get_rg_state(char *servicename, rg_state_t *svcblk);
 void get_recovery_policy(char *rg_name, char *buf, size_t buflen);
+int have_exclusive_resources();
+int check_exclusive_resources(cluster_member_list_t *membership, char *svcName);
 
 
 uint64_t
@@ -603,6 +605,10 @@
 			ret = 1;
 			break;
 		}
+		if (req == RG_START_RECOVER) {
+			ret = 1;
+			break;
+		}
 
 		clulog(LOG_DEBUG, "Not starting disabled RG %s\n",
 		       svcName);
@@ -1308,6 +1314,7 @@
 }
 
 
+pthread_mutex_t exclusive_mutex = PTHREAD_MUTEX_INITIALIZER;
 /**
  * handle_start_req - Handle a generic start request from a user or during
  * service manager boot.
@@ -1323,6 +1330,7 @@
 {
 	int ret, tolerance = FOD_BEST;
 	cluster_member_list_t *membership = member_list();
+	int need_check = have_exclusive_resources();
 
 	/*
 	 * When a service request is from a user application (eg, clusvcadm),
@@ -1338,6 +1346,18 @@
 		cml_free(membership);
 		return FAIL;
 	}
+	if (need_check) {
+		pthread_mutex_lock(&exclusive_mutex);
+		ret = check_exclusive_resources(membership, svcName);
+		if (ret != 0) {
+			cml_free(membership);
+			pthread_mutex_unlock(&exclusive_mutex);
+			if (ret > 0)
+				goto relocate;
+			else
+				return FAIL;
+		}
+	}
 	cml_free(membership);
 	
 	/*
@@ -1345,6 +1365,8 @@
 	 * mask here - so that we can try all nodes if necessary.
 	 */
 	ret = svc_start(svcName, req);
+	if (need_check)
+		pthread_mutex_unlock(&exclusive_mutex);
 
 	/* 
 	   If services are locked, return the error 
@@ -1384,6 +1406,7 @@
 		return RG_EABORT;
 	}
 	
+relocate:
 	/*
 	 * OK, it failed to start - but succeeded to stop.  Now,
 	 * we should relocate the service.
@@ -1421,6 +1444,7 @@
 	int x;
 	uint64_t me = my_id();
 	cluster_member_list_t *membership = member_list();
+	int need_check = have_exclusive_resources();
 
 	/* XXX ok, so we need to say "should I start this if I was the
 	   only cluster member online */
@@ -1441,9 +1465,19 @@
 		cml_free(membership);
 		return FAIL;
 	}
+ 	if (need_check) {
+ 		pthread_mutex_lock(&exclusive_mutex);
+ 		if (check_exclusive_resources(membership, svcName) != 0) {
+ 			pthread_mutex_unlock(&exclusive_mutex);
+ 			cml_free(membership);
+ 			return FAIL;
+ 		}
+ 	}
 	cml_free(membership);
 
 	x = svc_start(svcName, req);
+	if (need_check)
+ 		pthread_mutex_unlock(&exclusive_mutex);
 	if (x == 0)
 		return 0;
 	if (x == RG_ERUN)
--- cluster/rgmanager/src/daemons/tests/delta-test004-test005.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test004-test005.expected	2007/05/03 15:02:47	1.1.2.4
@@ -49,7 +49,6 @@
   ip [ NEEDSTART ] {
     address = "192.168.1.2";
     monitor_link = "1";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test005-test006.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test005-test006.expected	2007/05/03 15:02:47	1.1.2.4
@@ -48,7 +48,6 @@
   ip [ NEEDSTOP ] {
     address = "192.168.1.2";
     monitor_link = "1";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -62,7 +61,6 @@
   ip [ NEEDSTART ] {
     address = "192.168.1.2";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test006-test007.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test006-test007.expected	2007/05/03 15:02:47	1.1.2.4
@@ -48,7 +48,6 @@
   ip [ NEEDSTOP ] {
     address = "192.168.1.2";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -62,7 +61,6 @@
   ip [ NEEDSTART ] {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test007-test008.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test007-test008.expected	2007/05/03 15:02:47	1.1.2.4
@@ -58,7 +58,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -72,7 +71,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test008-test009.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test008-test009.expected	2007/05/03 15:02:47	1.1.2.4
@@ -68,7 +68,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -84,12 +83,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test009-test010.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test009-test010.expected	2007/05/03 15:02:47	1.1.2.4
@@ -79,12 +79,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -100,12 +98,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test010-test011.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test010-test011.expected	2007/05/03 15:02:47	1.1.2.4
@@ -138,12 +138,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -159,27 +157,20 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport [ NEEDSTART ] {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -187,7 +178,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test011-test012.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test011-test012.expected	2007/05/03 15:02:47	1.1.2.4
@@ -188,27 +188,20 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -216,7 +209,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -232,35 +224,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient [ NEEDSTART ] {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -268,7 +251,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test012-test013.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test012-test013.expected	2007/05/03 15:02:47	1.1.2.4
@@ -188,35 +188,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient [ NEEDSTOP ] {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -224,7 +215,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -240,35 +230,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient [ NEEDSTART ] {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -276,7 +257,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test013-test014.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test013-test014.expected	2007/05/03 15:02:47	1.1.2.4
@@ -212,35 +212,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -248,7 +239,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -264,35 +254,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -300,7 +281,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -315,35 +295,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -351,7 +322,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test014-test015.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test014-test015.expected	2007/05/03 15:02:47	1.1.2.4
@@ -236,35 +236,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient [ NEEDSTOP ] {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -272,7 +263,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -287,35 +277,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient [ NEEDSTOP ] {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -323,7 +304,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -339,35 +319,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient [ NEEDSTART ] {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -375,7 +346,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -390,35 +360,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient [ NEEDSTART ] {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -426,7 +387,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test015-test016.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/delta-test015-test016.expected	2007/05/03 15:02:47	1.1.2.4
@@ -237,35 +237,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -273,7 +264,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -288,35 +278,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -324,7 +305,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -340,35 +320,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -376,7 +347,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -391,35 +361,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -427,7 +388,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/delta-test016-test017.expected	2007/03/22 23:16:43	1.1.2.1
+++ cluster/rgmanager/src/daemons/tests/delta-test016-test017.expected	2007/05/03 15:02:47	1.1.2.2
@@ -251,35 +251,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -287,7 +278,6 @@
   ip [ NEEDSTOP ] {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -302,35 +292,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -338,7 +319,6 @@
   ip [ NEEDSTOP ] {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -354,35 +334,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -402,42 +373,32 @@
     ip [ NEEDSTART ] {
       address = "192.168.1.3";
       monitor_link = "yes";
-      nfslock = "(null)";
     }
     fs [ NEEDSTART ] {
       name = "mount2";
       mountpoint = "/mnt/cluster2";
       device = "/dev/sdb9";
       fstype = "ext3";
-      nfslock = "(null)";
       nfsexport {
         name = "Dummy Export";
         device = "/dev/sdb9";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         nfsclient {
           name = "Admin group";
           target = "@admin";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw";
         }
         nfsclient {
           name = "User group";
           target = "@users";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw,sync";
         }
         nfsclient {
           name = "red";
           target = "red";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw";
         }
       }
@@ -445,12 +406,10 @@
     script [ NEEDSTART ] {
       name = "script2";
       file = "/etc/init.d/script2";
-      service_name = "(null)";
     }
     ip [ NEEDSTART ] {
       address = "192.168.1.4";
       monitor_link = "yes";
-      nfslock = "(null)";
     }
   }
   script [ NEEDSTART ] {
--- cluster/rgmanager/src/daemons/tests/test005.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test005.expected	2007/05/03 15:02:47	1.1.2.4
@@ -26,7 +26,6 @@
   ip {
     address = "192.168.1.2";
     monitor_link = "1";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test006.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test006.expected	2007/05/03 15:02:47	1.1.2.4
@@ -26,7 +26,6 @@
   ip {
     address = "192.168.1.2";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test007.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test007.expected	2007/05/03 15:02:47	1.1.2.4
@@ -26,7 +26,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test008.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test008.expected	2007/05/03 15:02:47	1.1.2.4
@@ -36,7 +36,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test009.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test009.expected	2007/05/03 15:02:47	1.1.2.4
@@ -38,12 +38,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test010.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test010.expected	2007/05/03 15:02:47	1.1.2.4
@@ -47,12 +47,10 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
   }
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test011.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test011.expected	2007/05/03 15:02:47	1.1.2.4
@@ -97,27 +97,20 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -125,7 +118,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test012.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test012.expected	2007/05/03 15:02:47	1.1.2.4
@@ -97,35 +97,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
     }
@@ -133,7 +124,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test013.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test013.expected	2007/05/03 15:02:47	1.1.2.4
@@ -97,35 +97,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -133,7 +124,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test014.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test014.expected	2007/05/03 15:02:47	1.1.2.4
@@ -121,35 +121,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -157,7 +148,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -172,35 +162,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "ro";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -208,7 +189,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test015.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test015.expected	2007/05/03 15:02:47	1.1.2.4
@@ -121,35 +121,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -157,7 +148,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -172,35 +162,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -208,7 +189,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test016.expected	2007/03/20 19:40:06	1.1.2.3
+++ cluster/rgmanager/src/daemons/tests/test016.expected	2007/05/03 15:02:47	1.1.2.4
@@ -122,35 +122,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -158,7 +149,6 @@
   ip {
     address = "192.168.1.3";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
@@ -173,35 +163,26 @@
     mountpoint = "/mnt/cluster2";
     device = "/dev/sdb9";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb9";
       path = "/mnt/cluster2";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -209,7 +190,6 @@
   ip {
     address = "192.168.1.4";
     monitor_link = "yes";
-    nfslock = "(null)";
   }
   script {
     name = "initscript";
--- cluster/rgmanager/src/daemons/tests/test017.expected	2007/03/22 23:16:43	1.1.2.1
+++ cluster/rgmanager/src/daemons/tests/test017.expected	2007/05/03 15:02:47	1.1.2.2
@@ -135,35 +135,26 @@
     mountpoint = "/mnt/cluster";
     device = "/dev/sdb8";
     fstype = "ext3";
-    nfslock = "(null)";
     nfsexport {
       name = "Dummy Export";
       device = "/dev/sdb8";
       path = "/mnt/cluster";
-      fsid = "(null)";
-      nfslock = "(null)";
       nfsclient {
         name = "Admin group";
         target = "@admin";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
       nfsclient {
         name = "User group";
         target = "@users";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw,sync";
       }
       nfsclient {
         name = "red";
         target = "red";
         path = "/mnt/cluster";
-        fsid = "(null)";
-        nfslock = "(null)";
         options = "rw";
       }
     }
@@ -183,42 +174,32 @@
     ip {
       address = "192.168.1.3";
       monitor_link = "yes";
-      nfslock = "(null)";
     }
     fs {
       name = "mount2";
       mountpoint = "/mnt/cluster2";
       device = "/dev/sdb9";
       fstype = "ext3";
-      nfslock = "(null)";
       nfsexport {
         name = "Dummy Export";
         device = "/dev/sdb9";
         path = "/mnt/cluster2";
-        fsid = "(null)";
-        nfslock = "(null)";
         nfsclient {
           name = "Admin group";
           target = "@admin";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw";
         }
         nfsclient {
           name = "User group";
           target = "@users";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw,sync";
         }
         nfsclient {
           name = "red";
           target = "red";
           path = "/mnt/cluster2";
-          fsid = "(null)";
-          nfslock = "(null)";
           options = "rw";
         }
       }
@@ -226,12 +207,10 @@
     script {
       name = "script2";
       file = "/etc/init.d/script2";
-      service_name = "(null)";
     }
     ip {
       address = "192.168.1.4";
       monitor_link = "yes";
-      nfslock = "(null)";
     }
   }
   script {
--- cluster/rgmanager/src/resources/fs.sh	2007/02/15 22:46:00	1.4.2.21
+++ cluster/rgmanager/src/resources/fs.sh	2007/05/03 15:02:47	1.4.2.22
@@ -145,7 +145,7 @@
 	    <content type="boolean"/>
 	</parameter>
 
-	<parameter name="nfslock" inherit="nfslock">
+	<parameter name="nfslock" inherit="service%nfslock">
 	    <longdesc lang="en">
 	        If set and unmounting the file system fails, the node will
 		try to kill lockd and issue reclaims across all remaining
--- cluster/rgmanager/src/resources/nfsclient.sh	2007/02/21 20:54:51	1.3.2.11
+++ cluster/rgmanager/src/resources/nfsclient.sh	2007/05/03 15:02:47	1.3.2.12
@@ -95,7 +95,7 @@
             <content type="string"/>
         </parameter>
 
-        <parameter name="nfslock" inherit="nfsexport%nfslock">
+        <parameter name="nfslock" inherit="service%nfslock">
             <longdesc lang="en">
 	    	This tells us whether the service in question has the
 		NFS lock workarounds enabled.  If so, we always unexport
--- cluster/rgmanager/src/resources/nfsexport.sh	2006/06/16 19:57:52	1.4.2.5
+++ cluster/rgmanager/src/resources/nfsexport.sh	2007/05/03 15:02:47	1.4.2.6
@@ -98,7 +98,7 @@
 	    <content type="string"/>
         </parameter>
 
-        <parameter name="nfslock" inherit="nfslock">
+        <parameter name="nfslock" inherit="service%nfslock">
             <longdesc lang="en">
                 If you can see this, your GUI is broken.
 		This inherits an unspecified nfslock parameter so that
--- cluster/rgmanager/src/utils/clulog.c	2006/08/18 20:35:14	1.1.2.2
+++ cluster/rgmanager/src/utils/clulog.c	2007/05/03 15:02:47	1.1.2.3
@@ -123,6 +123,12 @@
     /* Add two bytes for linefeed and NULL terminator */
     len = strlen(argv[argc-1]) + 2;
     logmsg = (char*)malloc(strlen(argv[argc-1])+2);
+    if (logmsg == NULL) {
+        fprintf(stderr,
+            "clulog: malloc fail err=%d\n", errno);
+        exit(0);
+    }
+
     snprintf(logmsg, len, "%s\n", argv[argc-1]);
 
     if (!cmdline_loglevel) {



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-11-26 21:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-31 19:08 [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h lhh
  -- strict thread matches above, loose matches on Subject: below --
2007-11-26 21:46 lhh
2007-08-02 14:53 lhh
2007-08-02 14:47 lhh
2007-08-02 14:46 lhh
2007-05-31 18:58 lhh
2007-05-03 15:02 lhh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).