From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 27 Jun 2007 14:03:53 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/resgroup.h ... Message-ID: <20070627140353.7187.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-06-27 14:03:52 Modified files: rgmanager : ChangeLog rgmanager/include: resgroup.h reslist.h rgmanager/src/clulib: rg_strings.c rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c rg_state.c rg_thread.c test.c rgmanager/src/resources: vm.sh Log message: Merge from RHEL5 branch Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.48&r2=1.49 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.22&r2=1.23 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.20&r2=1.21 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&r1=1.8&r2=1.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.33&r2=1.34 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.39&r2=1.40 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.33&r2=1.34 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.35&r2=1.36 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&r1=1.21&r2=1.22 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&r1=1.9&r2=1.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=1.4&r2=1.5 --- cluster/rgmanager/ChangeLog 2007/06/21 18:39:08 1.48 +++ cluster/rgmanager/ChangeLog 2007/06/27 14:03:51 1.49 @@ -1,3 +1,30 @@ +2007-06-27 Lon Hohberger + * Merge from RHEL5 branch. + * src/daemons/vm.sh: Un-break migrate (#231692). Make status + checks happen every 30 seconds instead of 30 minutes. + * include/resgroup.h: Move inline recovery flags to a header file, + add RG_STATUS_INQUIRY for locating virtual machines which may have + migrated. + * include/reslist.h: Change res_exec() back to using agent_op_str() + inline so we can squelch errors while performing RG_STATUS_INQUIRY + * src/clulib/rg_strings.c: Add new strings for new error code / + request types + * src/daemons/groups.c: Change group_migrate() to use the correct + calling semantics + * src/daemons/main.c, nodeevent.c: Clean up cases which could cause + #244143 + * src/daemons/resrules.c: Clear up noise + * src/daemons/restree.c: Squelch errors during RG_STATUS_INQUIRY + Patch up inline service recovery (#229650) + * src/daemons/rg_state.c: Don't let migrations or relocations to a + node running exclusive services occur in the first place and return + a useful error. (goes with #237144). Locate virtual machines (or + generally, services with the 'migrate' ability) elsewhere in the + cluster prior to trying to start one. Detect if someone migrates + such a service without using the cluster tools (#232300) + * src/daemons/test.c: Make rg_test do the right thing for migrate + operations + 2007-06-21 Fabio M. Di Nitto * rgmanager/src/clulib/alloc.c: Undefine DEBUG when building on IA64. The __builtin_address functionality should be taken from libunwind --- cluster/rgmanager/include/resgroup.h 2007/06/14 19:08:57 1.22 +++ cluster/rgmanager/include/resgroup.h 2007/06/27 14:03:51 1.23 @@ -98,6 +98,7 @@ #define RG_MIGRATE 22 #define RG_FREEZE 23 #define RG_UNFREEZE 24 +#define RG_STATUS_INQUIRY 25 #define RG_NONE 999 const char *rg_req_str(int req); @@ -143,6 +144,7 @@ int svc_start(char *svcName, int req); int svc_stop(char *svcName, int error); int svc_status(char *svcName); +int svc_status_inquiry(char *svcName); int svc_disable(char *svcName); int svc_fail(char *svcName); int svc_freeze(char *svcName); @@ -188,6 +190,8 @@ int my_id(void); /* Return codes */ +#define RG_EFENCE -13 /* Fencing operation pending */ +#define RG_ENODE -12 /* Node is dead/nonexistent */ #define RG_EFROZEN -11 /* Service is frozen */ #define RG_ERUN -10 /* Service is already running */ #define RG_EQUORUM -9 /* Operation requires quorum */ @@ -221,6 +225,12 @@ #define FOD_RESTRICTED (1<<1) #define FOD_NOFAILBACK (1<<2) +/* + Status tree flags + */ +#define SFL_FAILURE (1<<0) +#define SFL_RECOVERABLE (1<<1) + //#define DEBUG #ifdef DEBUG --- cluster/rgmanager/include/reslist.h 2007/05/31 19:08:14 1.20 +++ cluster/rgmanager/include/reslist.h 2007/06/27 14:03:51 1.21 @@ -144,7 +144,7 @@ int res_status(resource_node_t **tree, resource_t *res, void *ret); int res_condstart(resource_node_t **tree, resource_t *res, void *ret); int res_condstop(resource_node_t **tree, resource_t *res, void *ret); -int res_exec(resource_node_t *node, const char *op, const char *arg, int depth); +int res_exec(resource_node_t *node, int op, const char *arg, int depth); /*int res_resinfo(resource_node_t **tree, resource_t *res, void *ret);*/ int expand_time(char *val); int store_action(resource_act_t **actsp, char *name, int depth, int timeout, int interval); --- cluster/rgmanager/src/clulib/rg_strings.c 2007/04/27 18:10:10 1.8 +++ cluster/rgmanager/src/clulib/rg_strings.c 2007/06/27 14:03:51 1.9 @@ -26,6 +26,8 @@ const struct string_val rg_error_strings[] = { + { RG_EFENCE, "Fencing operation pending; try again later" }, + { RG_ENODE, "Target node dead / nonexistent" }, { RG_ERUN, "Service is already running" }, { RG_EQUORUM, "Operation requires quorum" }, { RG_EINVAL, "Invalid operation for resource" }, @@ -68,6 +70,7 @@ {RG_UNLOCK, "unlocking"}, {RG_QUERY_LOCK, "lock status inquiry"}, {RG_MIGRATE, "migrate"}, + {RG_STATUS_INQUIRY, "out of band service status inquiry"}, {RG_NONE, "none"}, {0, NULL} }; @@ -182,5 +185,6 @@ const char * agent_op_str(int val) { + printf("searching agent_ops for %d\n", val); return rg_search_table(agent_ops, val); } --- cluster/rgmanager/src/daemons/groups.c 2007/05/31 19:08:14 1.33 +++ cluster/rgmanager/src/daemons/groups.c 2007/06/27 14:03:51 1.34 @@ -896,7 +896,7 @@ } clulog(LOG_NOTICE, "Migrating %s to %s\n", groupname, tgt_name); - ret = res_exec(rn, agent_op_str(RS_MIGRATE), tgt_name, 0); + ret = res_exec(rn, RS_MIGRATE, tgt_name, 0); if (ret == 0) { clulog(LOG_NOTICE, "Migration of %s to %s completed\n", --- cluster/rgmanager/src/daemons/main.c 2007/06/14 15:06:51 1.39 +++ cluster/rgmanager/src/daemons/main.c 2007/06/27 14:03:51 1.40 @@ -617,10 +617,12 @@ clulog(LOG_WARNING, "#67: Shutting down uncleanly\n"); rg_set_inquorate(); rg_doall(RG_INIT, 1, "Emergency stop of %s"); + rg_set_uninitialized(); #if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2 /* cman_replyto_shutdown() */ #endif - exit(0); + running = 0; + break; } return ret; @@ -700,6 +702,9 @@ } } + if (!running) + return 0; + if (need_reconfigure || check_config_update()) { need_reconfigure = 0; configure_logging(-1, 0); @@ -985,7 +990,8 @@ } } - cleanup(cluster_ctx); + if (rg_initialized()) + cleanup(cluster_ctx); clulog(LOG_NOTICE, "Shutdown complete, exiting\n"); clu_lock_finished(rgmanager_lsname); cman_finish(clu); --- cluster/rgmanager/src/daemons/nodeevent.c 2007/03/27 19:33:20 1.6 +++ cluster/rgmanager/src/daemons/nodeevent.c 2007/06/27 14:03:51 1.7 @@ -72,8 +72,10 @@ if (local) { /* Local Node Event */ - if (nodeStatus == 0) + if (nodeStatus == 0) { + clulog(LOG_ERR, "Exiting uncleanly\n"); hard_exit(); + } if (!rg_initialized()) { if (init_resource_groups(0) != 0) { --- cluster/rgmanager/src/daemons/restree.c 2007/06/13 20:32:41 1.33 +++ cluster/rgmanager/src/daemons/restree.c 2007/06/27 14:03:51 1.34 @@ -39,10 +39,6 @@ void malloc_zap_mutex(void); #endif -#define FL_FAILURE 0x1 -#define FL_RECOVERABLE 0x2 - - /* XXX from resrules.c */ int store_childtype(resource_child_t **childp, char *name, int start, int stop, int forbid, int flags); @@ -335,12 +331,13 @@ @see build_env */ int -res_exec(resource_node_t *node, const char *op, const char *arg, int depth) +res_exec(resource_node_t *node, int op, const char *arg, int depth) { int childpid, pid; int ret = 0; char **env = NULL; resource_t *res = node->rn_resource; + const char *op_str = agent_op_str(op); char fullpath[2048]; if (!res->r_rule->rr_agent) @@ -354,7 +351,7 @@ #ifdef NO_CCS if (_no_op_mode_) { - printf("[%s] %s:%s\n", op, res->r_rule->rr_type, + printf("[%s] %s:%s\n", op_str, res->r_rule->rr_type, res->r_attrs->ra_value); return 0; } @@ -392,9 +389,9 @@ restore_signals(); if (arg) - execle(fullpath, fullpath, op, arg, NULL, env); + execle(fullpath, fullpath, op_str, arg, NULL, env); else - execle(fullpath, fullpath, op, NULL, env); + execle(fullpath, fullpath, op_str, NULL, env); } #ifdef DEBUG @@ -411,10 +408,16 @@ ret = WEXITSTATUS(ret); +#ifndef NO_CCS + if ((op == RS_STATUS && + node->rn_state == RES_STARTED && ret) || + (op != RS_STATUS && ret)) { +#else if (ret) { +#endif clulog(LOG_NOTICE, "%s on %s \"%s\" returned %d (%s)\n", - op, res->r_rule->rr_type, + op_str, res->r_rule->rr_type, res->r_attrs->ra_value, ret, ocf_strerror(ret)); } @@ -864,7 +867,7 @@ rule->rr_childtypes[x].rc_name, ret, op); - if (rv & FL_FAILURE && op != RS_STOP) + if (rv & SFL_FAILURE && op != RS_STOP) return rv; } @@ -911,7 +914,7 @@ list_for(&node->rn_child, child, y) { rv |= _xx_child_internal(node, first, child, ret, op); - if (rv & FL_FAILURE) + if (rv & SFL_FAILURE) return rv; } } else { @@ -957,7 +960,7 @@ if (op == RS_START || op == RS_STATUS) { rv = _do_child_levels(tree, first, ret, op); - if (rv & FL_FAILURE) + if (rv & SFL_FAILURE) return rv; /* Start default level after specified ones */ @@ -1016,12 +1019,6 @@ if (strcmp(node->rn_actions[x].ra_name, "status")) continue; - /* If a status check has never been done, reset its status. */ - if (!node->rn_actions[x].ra_last) { - node->rn_actions[x].ra_last = now; - continue; - } - delta = now - node->rn_actions[x].ra_last; /* @@ -1067,7 +1064,8 @@ node->rn_actions[idx].ra_depth, (int)node->rn_actions[idx].ra_interval);*/ - if ((x = res_exec(node, agent_op_str(RS_STATUS), NULL, + node->rn_actions[idx].ra_last = now; + if ((x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth)) == 0) return 0; @@ -1075,7 +1073,7 @@ return x; /* Strange/failed status. Try to recover inline. */ - if ((x = res_exec(node, agent_op_str(RS_RECOVER), NULL, 0)) == 0) + if ((x = res_exec(node, RS_RECOVER, NULL, 0)) == 0) return 0; return x; @@ -1163,7 +1161,7 @@ char *type, void *__attribute__((unused))ret, int realop, resource_node_t *node) { - int rv, me, op; + int rv = 0, me, op; /* Restore default operation. */ op = realop; @@ -1217,10 +1215,10 @@ if (me && (op == RS_START)) { node->rn_flags &= ~RF_NEEDSTART; - rv = res_exec(node, agent_op_str(op), NULL, 0); + rv = res_exec(node, op, NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; - return FL_FAILURE; + return SFL_FAILURE; } set_time("start", 0, node); @@ -1248,9 +1246,9 @@ resources of this node must be restarted, but siblings of this node are not affected. */ if (node->rn_flags & RF_INDEPENDENT) - return FL_RECOVERABLE; + return SFL_RECOVERABLE; - return FL_FAILURE; + return SFL_FAILURE; } } @@ -1266,20 +1264,20 @@ does not matter: its dependent children must also be independent of this node's siblings. */ if (node->rn_flags & RF_INDEPENDENT) - return FL_RECOVERABLE; + return SFL_RECOVERABLE; - return FL_FAILURE; + return SFL_FAILURE; } } /* Stop should occur after children have stopped */ if (me && (op == RS_STOP)) { node->rn_flags &= ~RF_NEEDSTOP; - rv = res_exec(node, agent_op_str(op), NULL, 0); + rv = res_exec(node, op, NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; - return FL_FAILURE; + return SFL_FAILURE; } if (node->rn_state != RES_STOPPED) { @@ -1292,7 +1290,7 @@ //node->rn_resource->r_rule->rr_type, //primary_attr_value(node->rn_resource)); - return 0; + return rv; } @@ -1332,12 +1330,12 @@ /* If we hit a problem during a 'status' op in an independent subtree, rv will have the - FL_RECOVERABLE bit set, but not FL_FAILURE. - If we ever hit FL_FAILURE during a status + SFL_RECOVERABLE bit set, but not SFL_FAILURE. + If we ever hit SFL_FAILURE during a status operation, we're *DONE* - even if the subtree is flagged w/ indy-subtree */ - if (rv & FL_FAILURE) + if (rv & SFL_FAILURE) return rv; } } @@ -1411,33 +1409,7 @@ int res_status(resource_node_t **tree, resource_t *res, void *ret) { - int rv; - rv = _res_op(tree, res, NULL, ret, RS_STATUS); - - if (rv == 0) - return 0; - - if (rv & FL_FAILURE) - return rv; - - clulog(LOG_WARNING, "Some independent resources in %s:%s failed; " - "Attempting inline recovery\n", - res->r_rule->rr_type, res->r_attrs->ra_value); - - rv = res_condstop(tree, res, ret); - if (rv & FL_FAILURE) - goto out_fail; - rv = res_condstart(tree, res, ret); - if (rv & FL_FAILURE) - goto out_fail; - - clulog(LOG_NOTICE, "Inline recovery of %s:%s successful\n", - res->r_rule->rr_type, res->r_attrs->ra_value); - return 0; -out_fail: - clulog(LOG_WARNING, "Inline recovery of %s:%s failed\n", - res->r_rule->rr_type, res->r_attrs->ra_value); - return 1; + return _res_op(tree, res, NULL, ret, RS_STATUS); } --- cluster/rgmanager/src/daemons/rg_state.c 2007/06/25 16:49:28 1.35 +++ cluster/rgmanager/src/daemons/rg_state.c 2007/06/27 14:03:51 1.36 @@ -36,6 +36,10 @@ #include #include +/* XXX - copied :( */ +#define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ +#define cn_svcexcl cn_address.cna_address[1] + int node_should_start_safe(uint32_t, cluster_member_list_t *, char *); int next_node_id(cluster_member_list_t *membership, int me); @@ -50,6 +54,10 @@ int group_migratory(char *servicename, int lock); int have_exclusive_resources(void); int check_exclusive_resources(cluster_member_list_t *membership, char *svcName); +static int msvc_check_cluster(char *svcName); +static inline int handle_started_status(char *svcName, int ret, rg_state_t *svcStatus); +static inline int handle_migrate_status(char *svcName, int ret, rg_state_t *svcStatus); +int count_resource_groups_local(cman_node_t *mp); int @@ -837,10 +845,27 @@ struct dlm_lksb lockp; rg_state_t svcStatus; int ret; + cluster_member_list_t *membership; + cman_node_t *m; if (!group_migratory(svcName, 1)) return RG_EINVAL; + membership = member_list(); + m = memb_id_to_p(membership, target); + if (!m) { + free_member_list(membership); + return RG_EINVAL; + } + + count_resource_groups_local(m); + if (m->cn_svcexcl) { + free_member_list(membership); + return RG_EDEPEND; + } + free_member_list(membership); + + if (rg_lock(svcName, &lockp) < 0) { clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n", strerror(errno)); @@ -905,6 +930,129 @@ /** + * Ask the other nodes if they've seen this service. This can be used + * to allow users the ability to use non-rgmanager tools to migrate + * a virtual machine to another node in the cluster. + * + * Returns the node ID of the new owner, if any. -1 if no one in the + * cluster has seen the service. + */ +int +get_new_owner(char *svcName) +{ + SmMessageSt msgp, response; + msgctx_t ctx; + cluster_member_list_t *membership; + int x, ret = -1, me = my_id(); + + /* Build message */ + msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC; + msgp.sm_hdr.gh_command = RG_ACTION_REQUEST; + msgp.sm_hdr.gh_arg1 = RG_STATUS_INQUIRY; + msgp.sm_hdr.gh_length = sizeof(msgp); + msgp.sm_data.d_action = RG_STATUS_INQUIRY; + strncpy(msgp.sm_data.d_svcName, svcName, + sizeof(msgp.sm_data.d_svcName)); + msgp.sm_data.d_svcOwner = 0; + msgp.sm_data.d_ret = 0; + + swab_SmMessageSt(&msgp); + + membership = member_list(); + for (x = 0; x < membership->cml_count && ret < 0; x++) { + + /* don't query down members */ + if (!membership->cml_members[x].cn_member) + continue; + /* don't query self */ + if (membership->cml_members[x].cn_nodeid == me) + continue; + + if (msg_open(MSG_CLUSTER, membership->cml_members[x].cn_nodeid, + RG_PORT, &ctx, 2) < 0) { + /* failed to open: better to claim false successful + status rather than claim a failure and possibly + end up with a service on >1 node */ + goto out; + } + + msg_send(&ctx, &msgp, sizeof(msgp)); + msg_receive(&ctx, &response, sizeof (response), 5); + + swab_SmMessageSt(&response); + if (response.sm_data.d_ret == RG_SUCCESS) + ret = response.sm_data.d_svcOwner; + else + ret = -1; + + msg_close(&ctx); + } + +out: + free_member_list(membership); + + return ret; +} + + +/** + If a service is 'migratory' - that is, it has the 'migratory' attribute + and has no children, this will query other nodes in the cluster, checking + to see if the service has migrated to that node using a status inquiry + message. Note that this is a very inefficient thing to do; it would be + much, much better to simply use the cluster tools to migrate rather than + using the standard management tools for the service/virtual machine. + */ +static int +msvc_check_cluster(char *svcName) +{ + struct dlm_lksb lockp; + int newowner; + rg_state_t svcStatus; + + if (!group_migratory(svcName, 1)) + return -1; + + newowner = get_new_owner(svcName); + if (newowner < 0) { + clulog(LOG_DEBUG, "No other nodes have seen %s\n", svcName); + return -1; + } + + /* New owner found */ + clulog(LOG_NOTICE, "Migration: %s is running on %d\n", svcName, newowner); + + /* If the check succeeds (returns 0), then flip the state back to + 'started' - with a new owner */ + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#451: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return -1; + } + + if (get_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#452: Failed getting status for RG %s\n", + svcName); + return -1; + } + + svcStatus.rs_state = RG_STATE_STARTED; + svcStatus.rs_owner = newowner; + + if (set_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#453: Failed setting status for RG %s\n", + svcName); + return -1; + } + rg_unlock(&lockp); + + return newowner; +} + + +/** * Check status of a cluster service * * @param svcName Service name to check. @@ -946,14 +1094,58 @@ ret = group_op(svcName, RG_STATUS); - /* For running services, just check the return code */ + /* For running services, if the return code is 0, we're done*/ if (svcStatus.rs_state == RG_STATE_STARTED) - return ret; + return handle_started_status(svcName, ret, &svcStatus); + + return handle_migrate_status(svcName, ret, &svcStatus); +} + + +static inline int +handle_started_status(char *svcName, int ret, rg_state_t *svcStatus) +{ + if (ret & SFL_FAILURE) { + ret = msvc_check_cluster(svcName); + if (ret >= 0) + return 1; + } + + /* Ok, we have a recoverable service. Try to perform + inline recovery */ + if (ret & SFL_RECOVERABLE) { + + clulog(LOG_WARNING, "Some independent resources in %s failed; " + "Attempting inline recovery\n", svcName); + ret = group_op(svcName, RG_CONDSTOP); + if (!(ret & SFL_FAILURE)) { + ret = group_op(svcName, RG_CONDSTART); + } + + if (ret) { + clulog(LOG_WARNING, "Inline recovery of %s failed\n", + svcName); + } else { + clulog(LOG_NOTICE, + "Inline recovery of %s succeeded\n", + svcName); + return 0; + } + } + + return ret; +} + + +static inline int +handle_migrate_status(char *svcName, int ret, rg_state_t *svcStatus) +{ + struct dlm_lksb lockp; /* For service(s) migrating to the local node, ignore invalid return codes. XXX Should put a timeout on migrating services */ - if (ret < 0) + if (ret != 0) return 0; /* If the check succeeds (returns 0), then flip the state back to @@ -964,8 +1156,8 @@ return RG_EFAIL; } - svcStatus.rs_state = RG_STATE_STARTED; - if (set_rg_state(svcName, &svcStatus) != 0) { + svcStatus->rs_state = RG_STATE_STARTED; + if (set_rg_state(svcName, svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#46: Failed getting status for RG %s\n", svcName); @@ -1417,8 +1609,10 @@ int *new_owner) { cluster_member_list_t *allowed_nodes, *backup = NULL; + cman_node_t *m; int target = preferred_target, me = my_id(); int ret, x; + rg_state_t svcStatus; /* * Stop the service - if we haven't already done so. @@ -1436,9 +1630,22 @@ return RG_EFORWARD; } - if (preferred_target >= 0) { + if (preferred_target > 0) { allowed_nodes = member_list(); + m = memb_id_to_p(allowed_nodes, preferred_target); + if (!m) { + free_member_list(allowed_nodes); + return RG_EINVAL; + } + + /* Avoid even bothering the other node if we can */ + count_resource_groups_local(m); + if (m->cn_svcexcl) { + free_member_list(allowed_nodes); + return RG_EDEPEND; + } + /* Mark everyone except me and the preferred target DOWN for now If we can't start it on the preferred target, then we'll try @@ -1472,7 +1679,6 @@ if (target == me && me != preferred_target) goto exhausted; - if (target == me) { /* Relocate to self. Don't send a network request @@ -1508,7 +1714,7 @@ //count_resource_groups(allowed_nodes); } - if (preferred_target >= 0) + if (preferred_target > 0) memb_mark_down(allowed_nodes, preferred_target); memb_mark_down(allowed_nodes, me); @@ -1517,7 +1723,16 @@ if (target == me) goto exhausted; - switch (relocate_service(svcName, request, target)) { + ret = relocate_service(svcName, request, target); + switch (ret) { + case RG_ERUN: + /* Someone stole the service while we were + trying to relo it */ + get_rg_state_local(svcName, &svcStatus); + *new_owner = svcStatus.rs_owner; + free_member_list(allowed_nodes); + return 0; + case RG_EDEPEND: case RG_EFAIL: memb_mark_down(allowed_nodes, target); continue; @@ -1525,12 +1740,17 @@ svc_report_failure(svcName); free_member_list(allowed_nodes); return RG_EFAIL; + default: + /* deliberate fallthrough */ + clulog(LOG_ERR, + "#61: Invalid reply from member %d during" + " relocate operation!\n", target); case RG_NO: /* state uncertain */ free_member_list(allowed_nodes); - clulog(LOG_DEBUG, "State Uncertain: svc:%s " - "nid:%08x req:%d\n", svcName, - target, request); + clulog(LOG_CRIT, "State Uncertain: svc:%s " + "nid:%d req:%s ret:%d\n", svcName, + target, rg_req_str(request), ret); return 0; case 0: *new_owner = target; @@ -1538,10 +1758,6 @@ "on member %d\n", svcName, (int)target); free_member_list(allowed_nodes); return 0; - default: - clulog(LOG_ERR, - "#61: Invalid reply from member %d during" - " relocate operation!\n", target); } } free_member_list(allowed_nodes); @@ -1592,8 +1808,20 @@ handle_start_req(char *svcName, int req, int *new_owner) { int ret, tolerance = FOD_BEST; - cluster_member_list_t *membership = member_list(); - int need_check = have_exclusive_resources(); + cluster_member_list_t *membership; + int need_check, actual_failure = 0; + + /* When we get an enable req. for a migratory service, + check other nodes to see if they are already running + said service - and ignore failover domain constraints + */ + if ((ret = msvc_check_cluster(svcName)) >= 0) { + *new_owner = ret; + return RG_SUCCESS; + } + + need_check = have_exclusive_resources(); + membership = member_list(); /* * When a service request is from a user application (eg, clusvcadm), @@ -1672,14 +1900,16 @@ */ return RG_EABORT; } + actual_failure = 1; relocate: /* * OK, it failed to start - but succeeded to stop. Now, * we should relocate the service. */ - clulog(LOG_WARNING, "#71: Relocating failed service %s\n", - svcName); + if (actual_failure) + clulog(LOG_WARNING, "#71: Relocating failed service %s\n", + svcName); ret = handle_relocate_req(svcName, RG_START_RECOVER, -1, new_owner); /* If we leave the service stopped, instead of disabled, someone @@ -1780,46 +2010,56 @@ return handle_start_req(svcName, RG_START_RECOVER, new_owner); } + int handle_fd_start_req(char *svcName, int request, int *new_owner) { - cluster_member_list_t *allowed_nodes; - int target, me = my_id(); - int ret; - - allowed_nodes = member_list(); - - while (memb_count(allowed_nodes)) { - target = best_target_node(allowed_nodes, -1, - svcName, 1); - if (target == me) { - ret = handle_start_remote_req(svcName, request); - } else if (target < 0) { - free_member_list(allowed_nodes); - return RG_EFAIL; - } else { - ret = relocate_service(svcName, request, target); - } - - switch(ret) { - case RG_ESUCCESS: - return RG_ESUCCESS; - case RG_ERUN: - return RG_ERUN; - case RG_EFAIL: - memb_mark_down(allowed_nodes, target); - continue; - case RG_EABORT: - svc_report_failure(svcName); - free_member_list(allowed_nodes); - return RG_EFAIL; - default: - clulog(LOG_ERR, - "#6X: Invalid reply [%d] from member %d during" - " relocate operation!\n", ret, target); - } - } + cluster_member_list_t *allowed_nodes; + int target, me = my_id(); + int ret = RG_EFAIL; + + /* When we get an enable req. for a migratory service, + check other nodes to see if they are already running + said service - and ignore failover domain constraints + */ + if ((ret = msvc_check_cluster(svcName)) >= 0) { + *new_owner = ret; + return RG_SUCCESS; + } + + allowed_nodes = member_list(); - free_member_list(allowed_nodes); - return RG_EFAIL; + while (memb_count(allowed_nodes)) { + target = best_target_node(allowed_nodes, -1, + svcName, 1); + if (target == me) { + ret = handle_start_remote_req(svcName, request); + } else if (target < 0) { + free_member_list(allowed_nodes); + return RG_EFAIL; + } else { + ret = relocate_service(svcName, request, target); + } + + switch(ret) { + case RG_ESUCCESS: + return RG_ESUCCESS; + case RG_ERUN: + return RG_ERUN; + case RG_EFAIL: + memb_mark_down(allowed_nodes, target); + continue; + case RG_EABORT: + svc_report_failure(svcName); + free_member_list(allowed_nodes); + return RG_EFAIL; + default: + clulog(LOG_ERR, + "#6X: Invalid reply [%d] from member %d during" + " relocate operation!\n", ret, target); + } + } + + free_member_list(allowed_nodes); + return RG_EFAIL; } --- cluster/rgmanager/src/daemons/rg_thread.c 2007/06/14 15:06:52 1.21 +++ cluster/rgmanager/src/daemons/rg_thread.c 2007/06/27 14:03:51 1.22 @@ -441,6 +441,19 @@ ret = RG_EFAIL; break; + case RG_STATUS_INQUIRY: + error = svc_status_inquiry(myname); + + if (error == 0) { + ret = RG_SUCCESS; + newowner = my_id(); + } else { + ret = RG_EFAIL; + newowner = -1; + } + + break; + default: printf("Unhandled request %d\n", req->rr_request); ret = RG_NONE; --- cluster/rgmanager/src/daemons/test.c 2007/03/22 23:46:58 1.9 +++ cluster/rgmanager/src/daemons/test.c 2007/06/27 14:03:51 1.10 @@ -217,7 +217,7 @@ } #endif - if (res_exec(rn, "migrate", argv[4], 0)) { + if (res_exec(rn, RS_MIGRATE, argv[4], 0)) { ret = -1; goto out; } @@ -226,9 +226,9 @@ } else if (!strcmp(argv[1], "status")) { printf("Checking status of %s...\n", argv[3]); - if (res_status(&tree, curres, NULL)) { + ret = res_status(&tree, curres, NULL); + if (ret) { printf("Status check of %s failed\n", argv[3]); - ret = -1; goto out; } printf("Status of %s is good\n", argv[3]); @@ -391,5 +391,5 @@ out: xmlCleanupParser(); malloc_dump_table(); - return 0; + return ret; } --- cluster/rgmanager/src/resources/vm.sh 2007/04/19 17:53:05 1.4 +++ cluster/rgmanager/src/resources/vm.sh 2007/06/27 14:03:51 1.5 @@ -182,9 +182,8 @@ - - - + + @@ -273,13 +272,15 @@ # Start a virtual machine given the parameters from # the environment. # -start() +do_start() { # Use /dev/null for the configuration file, if xmdefconfig # doesn't exist... # declare cmdline + do_status && return 0 + cmdline="`build_xm_cmdline`" echo "# xm command line: $cmdline" @@ -293,7 +294,7 @@ # Stop a VM. Try to shut it down. Wait a bit, and if it # doesn't shut down, destroy it. # -stop() +do_stop() { declare -i timeout=60 declare -i ret=1 @@ -307,7 +308,7 @@ while [ $timeout -gt 0 ]; do sleep 5 ((timeout -= 5)) - status || return 0 + do_status || return 0 while read dom state; do # # State is "stopped". Kill it. @@ -346,10 +347,27 @@ # Simple status check: Find the VM in the list of running # VMs # -status() +do_status() { declare line + xm list $OCF_RESKEY_name &> /dev/null + if [ $? -eq 0 ]; then + return $OCF_SUCCESS + fi + xm list migrating-$OCF_RESKEY_name &> /dev/null + if [ $? -eq 1 ]; then + return $OCF_NOT_RUNNING + fi + + return $OCF_ERR_GENERIC + +### NOT REACHED ### + + # virsh doesn't handle migrating domains right now + # When this gets fixed, we need to revisit this status + # function. + line=$(virsh domstate $OCF_RESKEY_name) if [ "$line" = "" ]; then return $OCF_NOT_RUNNING @@ -400,26 +418,26 @@ case $1 in start) - start + do_start exit $? ;; stop) - stop shutdown destroy + do_stop shutdown destroy exit $? ;; kill) - stop destroy + do_stop destroy exit $? ;; recover|restart) exit 0 ;; status|monitor) - status + do_status exit $? ;; migrate) - migrate $2 # Send VM to this node + do_migrate $2 # Send VM to this node exit $? ;; reload)