From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 31 May 2007 19:08:15 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ... Message-ID: <20070531190815.24718.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-05-31 19:08:14 Modified files: rgmanager : ChangeLog rgmanager/include: reslist.h rgmanager/src/daemons: groups.c resrules.c restree.c rgmanager/src/resources: script.sh Log message: Fix 234249, 229650 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.43&r2=1.44 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.32&r2=1.33 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.21&r2=1.22 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.30&r2=1.31 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.9&r2=1.10 --- cluster/rgmanager/ChangeLog 2007/05/22 17:01:08 1.43 +++ cluster/rgmanager/ChangeLog 2007/05/31 19:08:13 1.44 @@ -1,3 +1,9 @@ +2007-05-31 Lon Hohberger + * src/daemons/resrules.c: Fix #234249 - ignore obvious backup files + in /usr/share/cluster when processing resource rules + * src/daemons/restree.c, src/daemons/groups.c, include/reslist.h: + Implement independent subtrees, per bug #229650 + 2007-05-22 Lon Hohberger * src/resources/SAPInstance, SAPDatabase: Add primary attrs --- cluster/rgmanager/include/reslist.h 2007/03/22 23:46:58 1.19 +++ cluster/rgmanager/include/reslist.h 2007/05/31 19:08:14 1.20 @@ -35,6 +35,8 @@ #define RF_NEEDSTART (1<<2) /** Used when adding/changing resources */ #define RF_NEEDSTOP (1<<3) /** Used when deleting/changing resources */ #define RF_COMMON (1<<4) /** " */ +#define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is + otherwise an independent subtree */ #define RES_STOPPED (0) #define RES_STARTED (1) --- cluster/rgmanager/src/daemons/groups.c 2007/04/27 18:10:10 1.32 +++ cluster/rgmanager/src/daemons/groups.c 2007/05/31 19:08:14 1.33 @@ -816,6 +816,7 @@ } pthread_rwlock_unlock(&resource_lock); +#if 0 /* Do NOT return error codes if we failed to stop for one of these reasons. It didn't start, either, so it's safe to assume that @@ -833,6 +834,7 @@ break; } } +#endif return ret; } --- cluster/rgmanager/src/daemons/resrules.c 2007/04/04 19:22:29 1.21 +++ cluster/rgmanager/src/daemons/resrules.c 2007/05/31 19:08:14 1.22 @@ -1025,7 +1025,7 @@ { DIR *dir; struct dirent *de; - char *fn;//, *dot; + char *fn, *dot; char path[2048]; struct stat st_buf; @@ -1040,10 +1040,23 @@ if (!fn) continue; + /* Ignore files with common backup extension */ if ((fn != NULL) && (strlen(fn) > 0) && (fn[strlen(fn)-1] == '~')) continue; + dot = strrchr(fn, '.'); + if (dot) { + /* Ignore RPM installed save files, patches, + diffs, etc. */ + if (!strncasecmp(dot, ".rpm", 4)) { + fprintf(stderr, "Warning: " + "Ignoring %s/%s: Bad extension %s\n", + rpath, de->d_name, dot); + continue; + } + } + snprintf(path, sizeof(path), "%s/%s", rpath, de->d_name); @@ -1053,8 +1066,10 @@ if (S_ISDIR(st_buf.st_mode)) continue; - if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP)) - load_resource_rulefile(path, rules); + if (st_buf.st_mode & (S_IXUSR|S_IXOTH|S_IXGRP)) { + printf("Loading resource rule from %s\n", path); + load_resource_rulefile(path, rules); + } } xmlCleanupParser(); --- cluster/rgmanager/src/daemons/restree.c 2007/05/03 15:15:17 1.30 +++ cluster/rgmanager/src/daemons/restree.c 2007/05/31 19:08:14 1.31 @@ -39,6 +39,9 @@ void malloc_zap_mutex(void); #endif +#define FL_FAILURE 0x1 +#define FL_RECOVERABLE 0x2 + /* XXX from resrules.c */ int store_childtype(resource_child_t **childp, char *name, int start, @@ -507,6 +510,19 @@ node->rn_resource = curres; node->rn_state = RES_STOPPED; node->rn_actions = (resource_act_t *)act_dup(curres->r_actions); + + snprintf(tok, sizeof(tok), "%s/@__independent_subtree", base); +#ifndef NO_CCS + if (ccs_get(ccsfd, tok, &ref) == 0) { +#else + if (conf_get(tok, &ref) == 0) { +#endif + if (atoi(ref) > 0 || strcasecmp(ref, "yes") == 0) + node->rn_flags |= RF_INDEPENDENT; + free(ref); + } + + curres->r_refs++; *newnode = node; @@ -777,6 +793,8 @@ printf("NEEDSTART "); if (node->rn_flags & RF_COMMON) printf("COMMON "); + if (node->rn_flags & RF_INDEPENDENT) + printf("INDEPENDENT "); printf("]"); } printf(" {\n"); @@ -841,10 +859,11 @@ #endif /* Do op on all children at our level */ - rv += _res_op(&node->rn_child, first, + rv |= _res_op(&node->rn_child, first, rule->rr_childtypes[x].rc_name, ret, op); - if (rv != 0 && op != RS_STOP) + + if (rv & FL_FAILURE && op != RS_STOP) return rv; } @@ -856,46 +875,6 @@ } -#if 0 -static inline int -_do_child_default_level(resource_node_t **tree, resource_t *first, - void *ret, int op) -{ - resource_node_t *node = *tree; - resource_t *res = node->rn_resource; - resource_rule_t *rule = res->r_rule; - int x, rv = 0, lev; - - for (x = 0; rule->rr_childtypes && - rule->rr_childtypes[x].rc_name; x++) { - - if(op == RS_STOP) - lev = rule->rr_childtypes[x].rc_stoplevel; - else - lev = rule->rr_childtypes[x].rc_startlevel; - - if (lev) - continue; - - /* - printf("%s children of %s type %s (default level)\n", - agent_op_str(op), - node->rn_resource->r_rule->rr_type, - rule->rr_childtypes[x].rc_name); - */ - - rv = _res_op(&node->rn_child, first, - rule->rr_childtypes[x].rc_name, - ret, op); - if (rv != 0) - return rv; - } - - return 0; -} -#endif - - static inline int _xx_child_internal(resource_node_t *node, resource_t *first, resource_node_t *child, void *ret, int op) @@ -929,13 +908,14 @@ if (op == RS_START || op == RS_STATUS) { list_for(&node->rn_child, child, y) { - rv = _xx_child_internal(node, first, child, ret, op); - if (rv) + rv |= _xx_child_internal(node, first, child, ret, op); + + if (rv & FL_FAILURE) return rv; } } else { list_for_rev(&node->rn_child, child, y) { - rv += _xx_child_internal(node, first, child, ret, op); + rv |= _xx_child_internal(node, first, child, ret, op); } } @@ -976,7 +956,7 @@ if (op == RS_START || op == RS_STATUS) { rv = _do_child_levels(tree, first, ret, op); - if (rv != 0) + if (rv & FL_FAILURE) return rv; /* Start default level after specified ones */ @@ -995,6 +975,22 @@ } +void +mark_nodes(resource_node_t *node, int state, int flags) +{ + int x; + resource_node_t *child; + + list_for(&node->rn_child, child, x) { + if (child->rn_child) + mark_nodes(child->rn_child, state, flags); + } + + node->rn_state = state; + node->rn_flags |= (RF_NEEDSTART | RF_NEEDSTOP); +} + + /** Do a status on a resource node. This takes into account the last time the status operation was run and selects the highest possible resource depth @@ -1223,7 +1219,7 @@ rv = res_exec(node, agent_op_str(op), NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; - return rv; + return FL_FAILURE; } set_time("start", 0, node); @@ -1236,14 +1232,43 @@ } else if (me && (op == RS_STATUS)) { /* Check status before children*/ rv = do_status(node); - if (rv != 0) - return rv; + if (rv != 0) { + /* + If this node's status has failed, all of its + dependent children are failed, whether or not this + node is independent or not. + */ + mark_nodes(node, RES_FAILED, + RF_NEEDSTART | RF_NEEDSTOP); + + /* If we're an independent subtree, return a flag + stating that this section is recoverable apart + from siblings in the resource tree. All child + resources of this node must be restarted, + but siblings of this node are not affected. */ + if (node->rn_flags & RF_INDEPENDENT) + return FL_RECOVERABLE; + + return FL_FAILURE; + } + } if (node->rn_child) { rv = _res_op_by_level(&node, me?NULL:first, ret, op); - if (rv != 0) - return rv; + if (rv != 0) { + mark_nodes(node, RES_FAILED, + RF_NEEDSTART | RF_NEEDSTOP); + + /* If this node is independent of its siblings, + that one of its dependent children failed + does not matter: its dependent children must + also be independent of this node's siblings. */ + if (node->rn_flags & RF_INDEPENDENT) + return FL_RECOVERABLE; + + return FL_FAILURE; + } } /* Stop should occur after children have stopped */ @@ -1253,7 +1278,7 @@ if (rv != 0) { node->rn_state = RES_FAILED; - return rv; + return FL_FAILURE; } if (node->rn_state != RES_STOPPED) { @@ -1292,24 +1317,31 @@ char *type, void * __attribute__((unused))ret, int realop) { resource_node_t *node; - int count = 0, rv; + int count = 0, rv = 0; if (realop == RS_STOP) { list_for_rev(tree, node, count) { - rv = _res_op_internal(tree, first, type, ret, realop, - node); - if (rv != 0) - return rv; + rv |= _res_op_internal(tree, first, type, ret, realop, + node); } } else { list_for(tree, node, count) { - rv = _res_op_internal(tree, first, type, ret, realop, - node); - if (rv != 0) + rv |= _res_op_internal(tree, first, type, ret, realop, + node); + + /* If we hit a problem during a 'status' op in an + independent subtree, rv will have the + FL_RECOVERABLE bit set, but not FL_FAILURE. + If we ever hit FL_FAILURE during a status + operation, we're *DONE* - even if the subtree + is flagged w/ indy-subtree */ + + if (rv & FL_FAILURE) return rv; } } - return 0; + + return rv; } /** @@ -1378,7 +1410,30 @@ int res_status(resource_node_t **tree, resource_t *res, void *ret) { - return _res_op(tree, res, NULL, ret, RS_STATUS); + int rv; + rv = _res_op(tree, res, NULL, ret, RS_STATUS); + + if (rv & FL_FAILURE) + return rv; + + clulog(LOG_WARNING, "Some independent resources in %s:%s failed; " + "Attempting inline recovery\n", + res->r_rule->rr_type, res->r_attrs->ra_value); + + rv = res_condstop(tree, res, ret); + if (rv & FL_FAILURE) + goto out_fail; + rv = res_condstart(tree, res, ret); + if (rv & FL_FAILURE) + goto out_fail; + + clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n", + res->r_rule->rr_type, res->r_attrs->ra_value); + return 0; +out_fail: + clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n", + res->r_rule->rr_type, res->r_attrs->ra_value); + return 1; } --- cluster/rgmanager/src/resources/script.sh 2007/04/05 15:08:20 1.9 +++ cluster/rgmanager/src/resources/script.sh 2007/05/31 19:08:14 1.10 @@ -118,5 +118,5 @@ declare -i rv=$? if [ $rv -ne 0 ]; then ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)" - return $OCF_ERR_GENERIC + exit $OCF_ERR_GENERIC fi