From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ...
Date: 31 May 2007 18:58:47 -0000 [thread overview]
Message-ID: <20070531185847.12042.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-05-31 18:58:46
Modified files:
rgmanager : ChangeLog
rgmanager/include: reslist.h
rgmanager/src/daemons: groups.c resrules.c restree.c
rgmanager/src/resources: script.sh
Log message:
Fix bugzilla #229650; implement __independent_subtree feature
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.9&r2=1.31.2.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.2&r2=1.15.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.5&r2=1.25.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.16.2.4&r2=1.16.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.3&r2=1.23.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1
--- cluster/rgmanager/ChangeLog 2007/05/31 18:38:44 1.31.2.9
+++ cluster/rgmanager/ChangeLog 2007/05/31 18:58:46 1.31.2.10
@@ -1,6 +1,8 @@
2007-05-31 Lon Hohberger <lhh@redhat.com>
* src/daemons/resrules.c: Fix #234249 - ignore obvious backup files
in /usr/share/cluster when processing resource rules
+ * src/daemons/restree.c, src/daemons/groups.c, include/reslist.h:
+ Implement independent subtrees, per bug #229650
2007-05-22 Lon Hohberger <lhh@redhat.com>
* src/resources/SAPInstance, SAPDatabase: Add primary attrs
--- cluster/rgmanager/include/reslist.h 2007/03/23 00:06:34 1.15.2.2
+++ cluster/rgmanager/include/reslist.h 2007/05/31 18:58:46 1.15.2.3
@@ -35,6 +35,8 @@
#define RF_NEEDSTART (1<<2) /** Used when adding/changing resources */
#define RF_NEEDSTOP (1<<3) /** Used when deleting/changing resources */
#define RF_COMMON (1<<4) /** " */
+#define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is
+ otherwise an independent subtree */
#define RES_STOPPED (0)
#define RES_STARTED (1)
@@ -56,10 +58,10 @@
typedef struct _resource_attribute {
- int ra_flags;
- /* XXX possible alignment problem on ia64 */
char *ra_name;
char *ra_value;
+ int ra_flags;
+ int _pad_;
} resource_attr_t;
@@ -78,6 +80,7 @@
time_t ra_last;
time_t ra_interval;
int ra_depth;
+ int _pad_;
} resource_act_t;
--- cluster/rgmanager/src/daemons/groups.c 2007/05/10 16:23:43 1.25.2.5
+++ cluster/rgmanager/src/daemons/groups.c 2007/05/31 18:58:46 1.25.2.6
@@ -813,6 +813,7 @@
}
pthread_rwlock_unlock(&resource_lock);
+#if 0
/*
Do NOT return error codes if we failed to stop for one of these
reasons. It didn't start, either, so it's safe to assume that
@@ -830,6 +831,7 @@
break;
}
}
+#endif
return ret;
}
--- cluster/rgmanager/src/daemons/resrules.c 2007/05/31 18:37:50 1.16.2.4
+++ cluster/rgmanager/src/daemons/resrules.c 2007/05/31 18:58:46 1.16.2.5
@@ -262,6 +262,7 @@
acts[0].ra_depth = depth;
acts[0].ra_timeout = timeout;
acts[0].ra_interval = interval;
+ acts[0].ra_last = 0;
acts[1].ra_name = NULL;
*actsp = acts;
@@ -271,7 +272,7 @@
for (x = 0; acts[x].ra_name; x++) {
if (!strcmp(acts[x].ra_name, name) &&
(depth == acts[x].ra_depth || depth == -1)) {
- printf("Replacing action '%s' depth %d: ",
+ fprintf(stderr, "Replacing action '%s' depth %d: ",
name, acts[x].ra_depth);
if (timeout >= 0) {
printf("timeout: %d->%d ",
@@ -306,6 +307,7 @@
acts[x].ra_depth = depth;
acts[x].ra_timeout = timeout;
acts[x].ra_interval = interval;
+ acts[x].ra_last = 0;
acts[x+1].ra_name = NULL;
--- cluster/rgmanager/src/daemons/restree.c 2007/05/03 15:14:16 1.23.2.3
+++ cluster/rgmanager/src/daemons/restree.c 2007/05/31 18:58:46 1.23.2.4
@@ -39,6 +39,9 @@
void malloc_zap_mutex(void);
#endif
+#define FL_FAILURE 0x1
+#define FL_RECOVERABLE 0x2
+
/* XXX from resrules.c */
int store_childtype(resource_child_t **childp, char *name, int start,
@@ -507,6 +510,19 @@
node->rn_resource = curres;
node->rn_state = RES_STOPPED;
node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+
+ snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base);
+#ifndef NO_CCS
+ if (ccs_get(ccsfd, tok, &ref) == 0) {
+#else
+ if (conf_get(tok, &ref) == 0) {
+#endif
+ if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0)
+ node->rn_flags |= RF_INDEPENDENT;
+ free(ref);
+ }
+
+
curres->r_refs++;
*newnode = node;
@@ -718,7 +734,6 @@
resource_rule_t **rulelist,
resource_t **reslist)
{
- resource_rule_t *curr;
resource_node_t *root = NULL;
char tok[512];
@@ -777,6 +792,8 @@
printf("NEEDSTART ");
if (node->rn_flags & RF_COMMON)
printf("COMMON ");
+ if (node->rn_flags & RF_INDEPENDENT)
+ printf("INDEPENDENT ");
printf("]");
}
printf(" {\n");
@@ -838,10 +855,11 @@
#endif
/* Do op on all children at our level */
- rv += _res_op(&node->rn_child, first,
+ rv |= _res_op(&node->rn_child, first,
rule->rr_childtypes[x].rc_name,
ret, op);
- if (rv != 0 && op != RS_STOP)
+
+ if (rv & FL_FAILURE && op != RS_STOP)
return rv;
}
@@ -853,46 +871,6 @@
}
-#if 0
-static inline int
-_do_child_default_level(resource_node_t **tree, resource_t *first,
- void *ret, int op)
-{
- resource_node_t *node = *tree;
- resource_t *res = node->rn_resource;
- resource_rule_t *rule = res->r_rule;
- int x, rv = 0, lev;
-
- for (x = 0; rule->rr_childtypes &&
- rule->rr_childtypes[x].rc_name; x++) {
-
- if(op == RS_STOP)
- lev = rule->rr_childtypes[x].rc_stoplevel;
- else
- lev = rule->rr_childtypes[x].rc_startlevel;
-
- if (lev)
- continue;
-
- /*
- printf("%s children of %s type %s (default level)\n",
- agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- rule->rr_childtypes[x].rc_name);
- */
-
- rv = _res_op(&node->rn_child, first,
- rule->rr_childtypes[x].rc_name,
- ret, op);
- if (rv != 0)
- return rv;
- }
-
- return 0;
-}
-#endif
-
-
static inline int
_xx_child_internal(resource_node_t *node, resource_t *first,
resource_node_t *child, void *ret, int op)
@@ -926,13 +904,14 @@
if (op == RS_START || op == RS_STATUS) {
list_for(&node->rn_child, child, y) {
- rv = _xx_child_internal(node, first, child, ret, op);
- if (rv)
+ rv |= _xx_child_internal(node, first, child, ret, op);
+
+ if (rv & FL_FAILURE)
return rv;
}
} else {
list_for_rev(&node->rn_child, child, y) {
- rv += _xx_child_internal(node, first, child, ret, op);
+ rv |= _xx_child_internal(node, first, child, ret, op);
}
}
@@ -973,7 +952,7 @@
if (op == RS_START || op == RS_STATUS) {
rv = _do_child_levels(tree, first, ret, op);
- if (rv != 0)
+ if (rv & FL_FAILURE)
return rv;
/* Start default level after specified ones */
@@ -992,6 +971,22 @@
}
+void
+mark_nodes(resource_node_t *node, int state, int flags)
+{
+ int x;
+ resource_node_t *child;
+
+ list_for(&node->rn_child, child, x) {
+ if (child->rn_child)
+ mark_nodes(child->rn_child, state, flags);
+ }
+
+ node->rn_state = state;
+ node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP);
+}
+
+
/**
Do a status on a resource node. This takes into account the last time the
status operation was run and selects the highest possible resource depth
@@ -1123,130 +1118,6 @@
in the subtree).
@see _res_op_by_level res_exec
*/
-#if 0
-int
-_res_op(resource_node_t **tree, resource_t *first,
- char *type, void * __attribute__((unused))ret, int realop)
-{
- int rv, me;
- resource_node_t *node;
- int op;
-
- list_do(tree, node) {
-
- /* Restore default operation. */
- op = realop;
-
- /* If we're starting by type, do that funky thing. */
- if (type && strlen(type) &&
- strcmp(node->rn_resource->r_rule->rr_type, type))
- continue;
-
- /* If the resource is found, all nodes in the subtree must
- have the operation performed as well. */
- me = !first || (node->rn_resource == first);
-
- /*
- printf("begin %s: %s %s [0x%x]\n", agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource),
- node->rn_flags);
- */
-
- if (me) {
- /*
- If we've been marked as a node which
- needs to be started or stopped, clear
- that flag and start/stop this resource
- and all resource babies.
-
- Otherwise, don't do anything; look for
- children with RF_NEEDSTART and
- RF_NEEDSTOP flags.
-
- CONDSTART and CONDSTOP are no-ops if
- the appropriate flag is not set.
- */
- if ((op == RS_CONDSTART) &&
- (node->rn_flags & RF_NEEDSTART)) {
- /*
- printf("Node %s:%s - CONDSTART\n",
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- op = RS_START;
- }
-
- if ((op == RS_CONDSTOP) &&
- (node->rn_flags & RF_NEEDSTOP)) {
- /*
- printf("Node %s:%s - CONDSTOP\n",
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- op = RS_STOP;
- }
- }
-
- /* Start starts before children */
- if (me && (op == RS_START)) {
- node->rn_flags &= ~RF_NEEDSTART;
-
- rv = res_exec(node, agent_op_str(op), NULL, 0);
- if (rv != 0) {
- node->rn_state = RES_FAILED;
- return rv;
- }
-
- set_time("start", 0, node);
- clear_checks(node);
-
- if (node->rn_state != RES_STARTED) {
- ++node->rn_resource->r_incarnations;
- node->rn_state = RES_STARTED;
- }
- }
-
- if (node->rn_child) {
- rv = _res_op_by_level(&node, me?NULL:first, ret, op);
- if (rv != 0)
- return rv;
- }
-
- /* Stop/status/etc stops after children have stopped */
- if (me && (op == RS_STOP)) {
- node->rn_flags &= ~RF_NEEDSTOP;
- rv = res_exec(node, agent_op_str(op), NULL, 0);
-
- if (rv != 0) {
- node->rn_state = RES_FAILED;
- return rv;
- }
-
- if (node->rn_state != RES_STOPPED) {
- --node->rn_resource->r_incarnations;
- node->rn_state = RES_STOPPED;
- }
-
- } else if (me && (op == RS_STATUS)) {
-
- rv = do_status(node);
- if (rv != 0)
- return rv;
- }
-
- /*
- printf("end %s: %s %s\n", agent_op_str(op),
- node->rn_resource->r_rule->rr_type,
- primary_attr_value(node->rn_resource));
- */
- } while (!list_done(tree, node));
-
- return 0;
-}
-#endif
-
-
static inline int
_res_op_internal(resource_node_t **tree, resource_t *first,
char *type, void *__attribute__((unused))ret, int realop,
@@ -1309,7 +1180,7 @@
rv = res_exec(node, agent_op_str(op), NULL, 0);
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
set_time("start", 0, node);
@@ -1322,14 +1193,43 @@
} else if (me && (op == RS_STATUS)) {
/* Check status before children*/
rv = do_status(node);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ /*
+ If this node's status has failed, all of its
+ dependent children are failed, whether or not this
+ node is independent or not.
+ */
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If we're an independent subtree, return a flag
+ stating that this section is recoverable apart
+ from siblings in the resource tree. All child
+ resources of this node must be restarted,
+ but siblings of this node are not affected. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
+
}
if (node->rn_child) {
rv = _res_op_by_level(&node, me?NULL:first, ret, op);
- if (rv != 0)
- return rv;
+ if (rv != 0) {
+ mark_nodes(node, RES_FAILED,
+ RF_NEEDSTART | RF_NEEDSTOP);
+
+ /* If this node is independent of its siblings,
+ that one of its dependent children failed
+ does not matter: its dependent children must
+ also be independent of this node's siblings. */
+ if (node->rn_flags & RF_INDEPENDENT)
+ return FL_RECOVERABLE;
+
+ return FL_FAILURE;
+ }
}
/* Stop should occur after children have stopped */
@@ -1339,7 +1239,7 @@
if (rv != 0) {
node->rn_state = RES_FAILED;
- return rv;
+ return FL_FAILURE;
}
if (node->rn_state != RES_STOPPED) {
@@ -1378,24 +1278,31 @@
char *type, void * __attribute__((unused))ret, int realop)
{
resource_node_t *node;
- int count = 0, rv;
+ int count = 0, rv = 0;
if (realop == RS_STOP) {
list_for_rev(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
- return rv;
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
}
} else {
list_for(tree, node, count) {
- rv = _res_op_internal(tree, first, type, ret, realop,
- node);
- if (rv != 0)
+ rv |= _res_op_internal(tree, first, type, ret, realop,
+ node);
+
+ /* If we hit a problem during a 'status' op in an
+ independent subtree, rv will have the
+ FL_RECOVERABLE bit set, but not FL_FAILURE.
+ If we ever hit FL_FAILURE during a status
+ operation, we're *DONE* - even if the subtree
+ is flagged w/ indy-subtree */
+
+ if (rv & FL_FAILURE)
return rv;
}
}
- return 0;
+
+ return rv;
}
/**
@@ -1464,7 +1371,30 @@
int
res_status(resource_node_t **tree, resource_t *res, void *ret)
{
- return _res_op(tree, res, NULL, ret, RS_STATUS);
+ int rv;
+ rv = _res_op(tree, res, NULL, ret, RS_STATUS);
+
+ if (rv & FL_FAILURE)
+ return rv;
+
+ clulog(LOG_WARNING, "Some independent resources in %s:%s failed; "
+ "Attempting inline recovery\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+
+ rv = res_condstop(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+ rv = res_condstart(tree, res, ret);
+ if (rv & FL_FAILURE)
+ goto out_fail;
+
+ clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 0;
+out_fail:
+ clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n",
+ res->r_rule->rr_type, res->r_attrs->ra_value);
+ return 1;
}
--- cluster/rgmanager/src/resources/script.sh 2006/08/18 15:26:23 1.8
+++ cluster/rgmanager/src/resources/script.sh 2007/05/31 18:58:46 1.8.2.1
@@ -115,5 +115,5 @@
declare -i rv=$?
if [ $rv -ne 0 ]; then
ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
- return $OCF_ERR_GENERIC
+ exit $OCF_ERR_GENERIC
fi
next reply other threads:[~2007-05-31 18:58 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-31 18:58 lhh [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-11-26 21:46 [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h lhh
2007-08-02 14:53 lhh
2007-08-02 14:47 lhh
2007-08-02 14:46 lhh
2007-05-31 19:08 lhh
2007-05-03 15:02 lhh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070531185847.12042.qmail@sourceware.org \
--to=lhh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.