All of lore.kernel.org
 help / color / mirror / Atom feed
From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h  ...
Date: 31 May 2007 18:58:47 -0000	[thread overview]
Message-ID: <20070531185847.12042.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh at sourceware.org	2007-05-31 18:58:46

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: reslist.h 
	rgmanager/src/daemons: groups.c resrules.c restree.c 
	rgmanager/src/resources: script.sh 

Log message:
	Fix bugzilla #229650; implement __independent_subtree feature

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.9&r2=1.31.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.2&r2=1.15.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.5&r2=1.25.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.4&r2=1.16.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.3&r2=1.23.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1

--- cluster/rgmanager/ChangeLog	2007/05/31 18:38:44	1.31.2.9
+++ cluster/rgmanager/ChangeLog	2007/05/31 18:58:46	1.31.2.10
@@ -1,6 +1,8 @@
 2007-05-31 Lon Hohberger <lhh@redhat.com>
 	* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
 	in /usr/share/cluster when processing resource rules
+	* src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: 
+	Implement independent subtrees, per bug #229650
 
 2007-05-22 Lon Hohberger <lhh@redhat.com>
 	* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h	2007/03/23 00:06:34	1.15.2.2
+++ cluster/rgmanager/include/reslist.h	2007/05/31 18:58:46	1.15.2.3
@@ -35,6 +35,8 @@
 #define RF_NEEDSTART	(1<<2)	/** Used when adding/changing resources */
 #define RF_NEEDSTOP	(1<<3)  /** Used when deleting/changing resources */
 #define RF_COMMON	(1<<4)	/** " */
+#define RF_INDEPENDENT	(1<<5)  /** Define this for a resource if it is
+				  otherwise an independent subtree */
 
 #define RES_STOPPED	(0)
 #define RES_STARTED	(1)
@@ -56,10 +58,10 @@
 
 
 typedef struct _resource_attribute {
-	int	ra_flags;
-	/* XXX possible alignment problem on ia64 */
 	char	*ra_name;
 	char	*ra_value;
+	int	ra_flags;
+	int	_pad_;
 } resource_attr_t;
 
 
@@ -78,6 +80,7 @@
 	time_t	ra_last;
 	time_t	ra_interval;
 	int	ra_depth;
+	int 	_pad_;
 } resource_act_t;
 
 
--- cluster/rgmanager/src/daemons/groups.c	2007/05/10 16:23:43	1.25.2.5
+++ cluster/rgmanager/src/daemons/groups.c	2007/05/31 18:58:46	1.25.2.6
@@ -813,6 +813,7 @@
 	}
 	pthread_rwlock_unlock(&resource_lock);
 
+#if 0
 	/*
 	   Do NOT return error codes if we failed to stop for one of these
 	   reasons.  It didn't start, either, so it's safe to assume that
@@ -830,6 +831,7 @@
 			break;
 		}
 	}
+#endif
 
 	return ret;
 }
--- cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:37:50	1.16.2.4
+++ cluster/rgmanager/src/daemons/resrules.c	2007/05/31 18:58:46	1.16.2.5
@@ -262,6 +262,7 @@
 		acts[0].ra_depth = depth;
 		acts[0].ra_timeout = timeout;
 		acts[0].ra_interval = interval;
+		acts[0].ra_last = 0;
 		acts[1].ra_name = NULL;
 
 		*actsp = acts;
@@ -271,7 +272,7 @@
 	for (x = 0; acts[x].ra_name; x++) {
 		if (!strcmp(acts[x].ra_name, name) &&
 		    (depth == acts[x].ra_depth || depth == -1)) {
-			printf("Replacing action '%s' depth %d: ",
+			fprintf(stderr, "Replacing action '%s' depth %d: ",
 			       name, acts[x].ra_depth);
 			if (timeout >= 0) {
 				printf("timeout: %d->%d ",
@@ -306,6 +307,7 @@
 	acts[x].ra_depth = depth;
 	acts[x].ra_timeout = timeout;
 	acts[x].ra_interval = interval;
+	acts[x].ra_last = 0;
 
 	acts[x+1].ra_name = NULL;
 
--- cluster/rgmanager/src/daemons/restree.c	2007/05/03 15:14:16	1.23.2.3
+++ cluster/rgmanager/src/daemons/restree.c	2007/05/31 18:58:46	1.23.2.4
@@ -39,6 +39,9 @@
 void malloc_zap_mutex(void);
 #endif
 
+#define FL_FAILURE	0x1
+#define FL_RECOVERABLE	0x2
+
 
 /* XXX from resrules.c */
 int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
 	node->rn_resource = curres;
 	node->rn_state = RES_STOPPED;
 	node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+	snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+	if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+	if (conf_get(tok, &ref) == 0) {
+#endif
+		if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+			node->rn_flags |= RF_INDEPENDENT;
+		free(ref);
+	}
+
+
 	curres->r_refs++;
 
 	*newnode = node;
@@ -718,7 +734,6 @@
 		    resource_rule_t **rulelist,
 		    resource_t **reslist)
 {
-	resource_rule_t *curr;
 	resource_node_t *root = NULL;
 	char tok[512];
 
@@ -777,6 +792,8 @@
 				printf("NEEDSTART ");
 			if (node->rn_flags & RF_COMMON)
 				printf("COMMON ");
+			if (node->rn_flags & RF_INDEPENDENT)
+				printf("INDEPENDENT ");
 			printf("]");
 		}
 		printf(" {\n");
@@ -838,10 +855,11 @@
 #endif
 
 			/* Do op on all children at our level */
-			rv += _res_op(&node->rn_child, first,
+			rv |= _res_op(&node->rn_child, first,
 			     	     rule->rr_childtypes[x].rc_name, 
 		     		     ret, op);
-			if (rv != 0 && op != RS_STOP)
+
+			if (rv & FL_FAILURE && op != RS_STOP)
 				return rv;
 		}
 
@@ -853,46 +871,6 @@
 }
 
 
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
-			void *ret, int op)
-{
-	resource_node_t *node = *tree;
-	resource_t *res = node->rn_resource;
-	resource_rule_t *rule = res->r_rule;
-	int x, rv = 0, lev;
-
-	for (x = 0; rule->rr_childtypes &&
-	     rule->rr_childtypes[x].rc_name; x++) {
-
-		if(op == RS_STOP)
-			lev = rule->rr_childtypes[x].rc_stoplevel;
-		else
-			lev = rule->rr_childtypes[x].rc_startlevel;
-
-		if (lev)
-			continue;
-
-		/*
-		printf("%s children of %s type %s (default level)\n",
-		       agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       rule->rr_childtypes[x].rc_name);
-		 */
-
-		rv = _res_op(&node->rn_child, first,
-			     rule->rr_childtypes[x].rc_name, 
-			     ret, op);
-		if (rv != 0)
-			return rv;
-	}
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _xx_child_internal(resource_node_t *node, resource_t *first,
 		   resource_node_t *child, void *ret, int op)
@@ -926,13 +904,14 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		list_for(&node->rn_child, child, y) {
-			rv = _xx_child_internal(node, first, child, ret, op);
-			if (rv)
+			rv |= _xx_child_internal(node, first, child, ret, op);
+
+			if (rv & FL_FAILURE)
 				return rv;
 		}
 	} else {
 		list_for_rev(&node->rn_child, child, y) {
-			rv += _xx_child_internal(node, first, child, ret, op);
+			rv |= _xx_child_internal(node, first, child, ret, op);
 		}
 	}
 
@@ -973,7 +952,7 @@
 
 	if (op == RS_START || op == RS_STATUS) {
 		rv =  _do_child_levels(tree, first, ret, op);
-	       	if (rv != 0)
+	       	if (rv & FL_FAILURE)
 			return rv;
 
 		/* Start default level after specified ones */
@@ -992,6 +971,22 @@
 }
 
 
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+	int x;
+	resource_node_t *child;
+
+	list_for(&node->rn_child, child, x) {
+		if (child->rn_child)
+			mark_nodes(child->rn_child, state, flags);
+	}
+
+	node->rn_state = state;
+	node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
 /**
    Do a status on a resource node.  This takes into account the last time the
    status operation was run and selects the highest possible resource depth
@@ -1123,130 +1118,6 @@
 			in the subtree).
    @see			_res_op_by_level res_exec
  */
-#if 0
-int
-_res_op(resource_node_t **tree, resource_t *first,
-	char *type, void * __attribute__((unused))ret, int realop)
-{
-	int rv, me;
-	resource_node_t *node;
-	int op;
-
-	list_do(tree, node) {
-
-		/* Restore default operation. */
-		op = realop;
-
-		/* If we're starting by type, do that funky thing. */
-		if (type && strlen(type) &&
-		    strcmp(node->rn_resource->r_rule->rr_type, type))
-			continue;
-
-		/* If the resource is found, all nodes in the subtree must
-		   have the operation performed as well. */
-		me = !first || (node->rn_resource == first);
-
-		/*
-		printf("begin %s: %s %s [0x%x]\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource),
-		       node->rn_flags);
-		 */
-
-		if (me) {
-			/*
-			   If we've been marked as a node which
-			   needs to be started or stopped, clear
-			   that flag and start/stop this resource
-			   and all resource babies.
-
-			   Otherwise, don't do anything; look for
-			   children with RF_NEEDSTART and
-			   RF_NEEDSTOP flags.
-
-			   CONDSTART and CONDSTOP are no-ops if
-			   the appropriate flag is not set.
-			 */
-		       	if ((op == RS_CONDSTART) &&
-			    (node->rn_flags & RF_NEEDSTART)) {
-				/*
-				printf("Node %s:%s - CONDSTART\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_START;
-			}
-
-			if ((op == RS_CONDSTOP) &&
-			    (node->rn_flags & RF_NEEDSTOP)) {
-				/*
-				printf("Node %s:%s - CONDSTOP\n",
-				       node->rn_resource->r_rule->rr_type,
-				       primary_attr_value(node->rn_resource));
-				 */
-				op = RS_STOP;
-			}
-		}
-
-		/* Start starts before children */
-		if (me && (op == RS_START)) {
-			node->rn_flags &= ~RF_NEEDSTART;
-
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			set_time("start", 0, node);
-			clear_checks(node);
-
-			if (node->rn_state != RES_STARTED) {
-				++node->rn_resource->r_incarnations;
-				node->rn_state = RES_STARTED;
-			}
-		}
-
-		if (node->rn_child) {
-			rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-			if (rv != 0)
-				return rv;
-		}
-
-		/* Stop/status/etc stops after children have stopped */
-		if (me && (op == RS_STOP)) {
-			node->rn_flags &= ~RF_NEEDSTOP;
-			rv = res_exec(node, agent_op_str(op), NULL, 0);
-
-			if (rv != 0) {
-				node->rn_state = RES_FAILED;
-				return rv;
-			}
-
-			if (node->rn_state != RES_STOPPED) {
-				--node->rn_resource->r_incarnations;
-				node->rn_state = RES_STOPPED;
-			}
-
-		} else if (me && (op == RS_STATUS)) {
-
-			rv = do_status(node);
-			if (rv != 0)
-				return rv;
-		}
-
-		/*
-		printf("end %s: %s %s\n", agent_op_str(op),
-		       node->rn_resource->r_rule->rr_type,
-		       primary_attr_value(node->rn_resource));
-		 */
-	} while (!list_done(tree, node));
-
-	return 0;
-}
-#endif
-
-
 static inline int
 _res_op_internal(resource_node_t **tree, resource_t *first,
 		 char *type, void *__attribute__((unused))ret, int realop,
@@ -1309,7 +1180,7 @@
 		rv = res_exec(node, agent_op_str(op), NULL, 0);
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		set_time("start", 0, node);
@@ -1322,14 +1193,43 @@
 	} else if (me && (op == RS_STATUS)) {
 		/* Check status before children*/
 		rv = do_status(node);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			/*
+			   If this node's status has failed, all of its
+			   dependent children are failed, whether or not this
+			   node is independent or not.
+			 */
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If we're an independent subtree, return a flag
+			   stating that this section is recoverable apart
+			   from siblings in the resource tree.  All child
+			   resources of this node must be restarted,
+			   but siblings of this node are not affected. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
+
 	}
 
 	if (node->rn_child) {
 		rv = _res_op_by_level(&node, me?NULL:first, ret, op);
-		if (rv != 0)
-			return rv;
+		if (rv != 0) {
+			mark_nodes(node, RES_FAILED,
+				   RF_NEEDSTART | RF_NEEDSTOP);
+
+			/* If this node is independent of its siblings,
+			   that one of its dependent children failed
+			   does not matter: its dependent children must
+			   also be independent of this node's siblings. */
+			if (node->rn_flags & RF_INDEPENDENT)
+				return FL_RECOVERABLE;
+
+			return FL_FAILURE;
+		}
 	}
 
 	/* Stop should occur after children have stopped */
@@ -1339,7 +1239,7 @@
 
 		if (rv != 0) {
 			node->rn_state = RES_FAILED;
-			return rv;
+			return FL_FAILURE;
 		}
 
 		if (node->rn_state != RES_STOPPED) {
@@ -1378,24 +1278,31 @@
 	char *type, void * __attribute__((unused))ret, int realop)
 {
   	resource_node_t *node;
- 	int count = 0, rv;
+ 	int count = 0, rv = 0;
  	
  	if (realop == RS_STOP) {
  		list_for_rev(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
- 				return rv;
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
  		}
  	} else {
  		list_for(tree, node, count) {
- 			rv = _res_op_internal(tree, first, type, ret, realop,
- 					      node);
- 			if (rv != 0) 
+ 			rv |= _res_op_internal(tree, first, type, ret, realop,
+ 					       node);
+
+			/* If we hit a problem during a 'status' op in an
+			   independent subtree, rv will have the
+			   FL_RECOVERABLE bit set, but not FL_FAILURE.
+			   If we ever hit FL_FAILURE during a status
+			   operation, we're *DONE* - even if the subtree
+			   is flagged w/ indy-subtree */
+			  
+ 			if (rv & FL_FAILURE) 
  				return rv;
  		}
  	}
-	return 0;
+
+	return rv;
 }
 
 /**
@@ -1464,7 +1371,30 @@
 int
 res_status(resource_node_t **tree, resource_t *res, void *ret)
 {
-	return _res_op(tree, res, NULL, ret, RS_STATUS);
+	int rv;
+	rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+	if (rv & FL_FAILURE)
+		return rv;
+
+	clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+	       "Attempting inline recovery\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+
+	rv = res_condstop(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+	rv = res_condstart(tree, res, ret);
+	if (rv & FL_FAILURE)
+		goto out_fail;
+
+	clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 0;
+out_fail:
+	clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+	       res->r_rule->rr_type, res->r_attrs->ra_value);
+	return 1;
 }
 
 
--- cluster/rgmanager/src/resources/script.sh	2006/08/18 15:26:23	1.8
+++ cluster/rgmanager/src/resources/script.sh	2007/05/31 18:58:46	1.8.2.1
@@ -115,5 +115,5 @@
 declare -i rv=$?
 if [ $rv -ne 0 ]; then
 	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
-	return $OCF_ERR_GENERIC
+	exit $OCF_ERR_GENERIC
 fi



             reply	other threads:[~2007-05-31 18:58 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-31 18:58 lhh [this message]
  -- strict thread matches above, loose matches on Subject: below --
2007-11-26 21:46 [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h lhh
2007-08-02 14:53 lhh
2007-08-02 14:47 lhh
2007-08-02 14:46 lhh
2007-05-31 19:08 lhh
2007-05-03 15:02 lhh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070531185847.12042.qmail@sourceware.org \
    --to=lhh@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.