From mboxrd@z Thu Jan 1 00:00:00 1970 From: Lon Hohberger Date: Wed, 29 Aug 2007 17:39:07 -0400 Subject: [Cluster-devel] [patch] Fix independent subtree status checking Message-ID: <20070829213907.GQ17551@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Independent subtree recovery in rgmanager is not correct without this patch. -- Lon Hohberger - Software Engineer - Red Hat, Inc. -------------- next part -------------- Index: restree.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/restree.c,v retrieving revision 1.23.2.10 diff -u -r1.23.2.10 restree.c --- restree.c 2 Aug 2007 14:46:51 -0000 1.23.2.10 +++ restree.c 29 Aug 2007 21:37:58 -0000 @@ -954,20 +954,17 @@ return _res_op(&node->rn_child, first, NULL, ret, op); if (op == RS_START || op == RS_STATUS) { - rv = _do_child_levels(tree, first, ret, op); + rv |= _do_child_levels(tree, first, ret, op); if (rv & SFL_FAILURE) return rv; /* Start default level after specified ones */ - rv = _do_child_default_level(tree, first, ret, op); + rv |= _do_child_default_level(tree, first, ret, op); } /* stop */ else { - rv = _do_child_default_level(tree, first, ret, op); - if (rv != 0) - return rv; - - rv = _do_child_levels(tree, first, ret, op); + rv |= _do_child_default_level(tree, first, ret, op); + rv |= _do_child_levels(tree, first, ret, op); } return rv; @@ -1245,22 +1242,8 @@ } - if (node->rn_child) { - rv = _res_op_by_level(&node, me?NULL:first, ret, op); - if (rv != 0) { - mark_nodes(node, RES_FAILED, - RF_NEEDSTART | RF_NEEDSTOP); - - /* If this node is independent of its siblings, - that one of its dependent children failed - does not matter: its dependent children must - also be independent of this node's siblings. */ - if (node->rn_flags & RF_INDEPENDENT) - return SFL_RECOVERABLE; - - return SFL_FAILURE; - } - } + if (node->rn_child) + rv |= _res_op_by_level(&node, me?NULL:first, ret, op); /* Stop should occur after children have stopped */ if (me && (op == RS_STOP)) { Index: rg_state.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v retrieving revision 1.24.2.12 diff -u -r1.24.2.12 rg_state.c --- rg_state.c 2 Aug 2007 14:46:51 -0000 1.24.2.12 +++ rg_state.c 29 Aug 2007 21:37:58 -0000 @@ -1130,10 +1130,13 @@ static inline int handle_started_status(char *svcName, int ret, rg_state_t *svcStatus) { + int newowner; + if (ret & SFL_FAILURE) { - ret = msvc_check_cluster(svcName); - if (ret >= 0) - return 1; + newowner = msvc_check_cluster(svcName); + if (newowner >= 0) + return 0; /* running but not here */ + return ret; /* not running anymore */ } /* Ok, we have a recoverable service. Try to perform Index: test.c =================================================================== RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/test.c,v retrieving revision 1.6.2.5 diff -u -r1.6.2.5 test.c --- test.c 31 Jul 2007 17:54:54 -0000 1.6.2.5 +++ test.c 29 Aug 2007 21:37:58 -0000 @@ -238,7 +238,7 @@ ret = res_status(&tree, curres, NULL); if (ret) { - printf("Status check of %s failed\n", argv[3]); + printf("Status check of %s failed, ret=%d\n", argv[3], ret); goto out; } printf("Status of %s is good\n", argv[3]);