From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 2 Aug 2007 14:46:53 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/reslist.h ... Message-ID: <20070802144653.19499.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL5 Changes by: lhh at sourceware.org 2007-08-02 14:46:52 Modified files: rgmanager : ChangeLog rgmanager/include: reslist.h rgmanager/src/daemons: groups.c main.c nodeevent.c restree.c rg_forward.c rg_state.c rgmanager/src/resources: vm.sh Log message: Fix #248727, round 2 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.31.2.23&r2=1.31.2.24 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.15.2.5&r2=1.15.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.11&r2=1.25.2.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.7&r2=1.34.2.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.4&r2=1.4.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.23.2.9&r2=1.23.2.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8.2.2&r2=1.8.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.11&r2=1.24.2.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1.2.6&r2=1.1.2.7 --- cluster/rgmanager/ChangeLog 2007/07/31 17:54:54 1.31.2.23 +++ cluster/rgmanager/ChangeLog 2007/08/02 14:46:51 1.31.2.24 @@ -1,3 +1,16 @@ +2007-08-02 Lon Hohberger + * general: More fixes around #248727 + * include/reslist.h, src/daemons/restree.c: Make last-value be + returned or resources which have been checked recently + * src/daemons/groups.c: Make VMs use migrate semantics instead of + relocate semantics when employing failover domain rules + * src/daemons/nodeevent.c: Fix VMs ending up on wrong nodes when + simultaneous boot occurs + * src/daemons/rg_forward.c: Fix erroneous timeout + * src/daemons/rg_state.c: Handle RG_STATE_MIGRATE in svc_advise_* + Handle certain migration failures. + * src/resources/vm.sh: Handle certain migration failures + 2007-07-31 Lon Hohberger * general: Make VMs not change state when added/removed from the cluster config or bounce services/VMs when minor config changes --- cluster/rgmanager/include/reslist.h 2007/07/31 17:54:54 1.15.2.5 +++ cluster/rgmanager/include/reslist.h 2007/08/02 14:46:51 1.15.2.6 @@ -128,6 +128,10 @@ resource_act_t *rn_actions; int rn_state; /* State of this instance of rn_resource */ int rn_flags; + int rn_last_status; + int rn_last_depth; + int rn_checked; + int rn_pad; } resource_node_t; typedef struct _fod_node { --- cluster/rgmanager/src/daemons/groups.c 2007/07/31 17:54:54 1.25.2.11 +++ cluster/rgmanager/src/daemons/groups.c 2007/08/02 14:46:51 1.25.2.12 @@ -500,13 +500,14 @@ consider_relocate(char *svcName, rg_state_t *svcStatus, uint32_t nodeid, cluster_member_list_t *membership) { - int a, b; + int a, b, req = RG_RELOCATE; /* Service must be running locally in order to consider for a relocate */ - if (svcStatus->rs_state != RG_STATE_STARTED || + if ((svcStatus->rs_state != RG_STATE_STARTING && + svcStatus->rs_state != RG_STATE_STARTED) || svcStatus->rs_owner != my_id()) return; @@ -526,11 +527,16 @@ if (a <= b) return; - clulog(LOG_DEBUG, "Relocating group %s to better node %s\n", + if (group_migratory(svcName, 1)) { + req = RG_MIGRATE; + } + + clulog(LOG_NOTICE, "%s %s to better node %s\n", + req==RG_MIGRATE ? "Migrating":"Relocating", svcName, memb_id_to_name(membership, nodeid)); - rt_enqueue_request(svcName, RG_RELOCATE, NULL, 0, nodeid, 0, 0); + rt_enqueue_request(svcName, req, NULL, 0, nodeid, 0, 0); } --- cluster/rgmanager/src/daemons/main.c 2007/07/24 13:53:08 1.34.2.7 +++ cluster/rgmanager/src/daemons/main.c 2007/08/02 14:46:51 1.34.2.8 @@ -43,7 +43,7 @@ #ifdef WRAP_THREADS void dump_thread_states(FILE *); #endif -int configure_logging(int ccsfd, int debug); +int configure_rgmanager(int ccsfd, int debug); void node_event(int, int, int, int); void node_event_q(int, int, int, int); @@ -730,7 +730,7 @@ if (need_reconfigure || check_config_update()) { need_reconfigure = 0; - configure_logging(-1, 0); + configure_rgmanager(-1, 0); init_resource_groups(1); return 0; } @@ -789,7 +789,7 @@ * Configure logging based on data in cluster.conf */ int -configure_logging(int ccsfd, int dbg) +configure_rgmanager(int ccsfd, int dbg) { char *v; char internal = 0; @@ -812,6 +812,12 @@ free(v); } + if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) { + if (!dbg) + set_transition_throttling(atoi(v)); + free(v); + } + if (internal) ccs_disconnect(ccsfd); @@ -956,7 +962,7 @@ We know we're quorate. At this point, we need to read the resource group trees from ccsd. */ - configure_logging(-1, debug); + configure_rgmanager(-1, debug); clulog(LOG_NOTICE, "Resource Group Manager Starting\n"); if (init_resource_groups(0) != 0) { --- cluster/rgmanager/src/daemons/nodeevent.c 2007/07/24 13:53:08 1.4.2.4 +++ cluster/rgmanager/src/daemons/nodeevent.c 2007/08/02 14:46:51 1.4.2.5 @@ -42,6 +42,7 @@ #endif static nevent_t *event_queue = NULL; static pthread_t ne_thread = 0; +static int transition_throttling = 5; int ne_queue_request(int local, int nodeid, int state); void hard_exit(void); @@ -53,6 +54,15 @@ extern int shutdown_pending; +void +set_transition_throttling(int nsecs) +{ + if (nsecs < 0) + nsecs = 0; + transition_throttling = nsecs; +} + + /** Called to handle the transition of a cluster member from up->down or down->up. This handles initializing services (in the local node-up case), @@ -88,11 +98,16 @@ if (shutdown_pending) { clulog(LOG_NOTICE, "Processing delayed exit signal\n"); running = 0; + return; } setup_signal(SIGINT, flag_shutdown); setup_signal(SIGTERM, flag_shutdown); setup_signal(SIGHUP, flag_reconfigure); + /* Let things settle if we're booting multiple */ + if (transition_throttling) + sleep(transition_throttling); + eval_groups(1, nodeID, 1); return; } --- cluster/rgmanager/src/daemons/restree.c 2007/07/31 17:54:54 1.23.2.9 +++ cluster/rgmanager/src/daemons/restree.c 2007/08/02 14:46:51 1.23.2.10 @@ -665,8 +665,10 @@ } } /* No resource rule matching the child? Press on... */ - if (!flags) + if (!flags) { + free(ref); continue; + } flags = 0; /* Don't descend on anything we should have already picked @@ -686,11 +688,9 @@ break; } - if (flags == 2) { - free(ref); - continue; - } free(ref); + if (flags == 2) + continue; x = 1; switch(do_load_resource(ccsfd, tok, childrule, tree, @@ -1035,12 +1035,21 @@ } /* No check levels ready at the moment. */ - if (idx == -1) + if (idx == -1) { + if (node->rn_checked) + return node->rn_last_status; return 0; + } - node->rn_actions[idx].ra_last = now; - if ((x = res_exec(node, RS_STATUS, NULL, - node->rn_actions[idx].ra_depth)) == 0) + + node->rn_actions[idx].ra_last = now; + x = res_exec(node, RS_STATUS, NULL, node->rn_actions[idx].ra_depth); + + node->rn_last_status = x; + node->rn_last_depth = node->rn_actions[idx].ra_depth; + node->rn_checked = 1; + + if (x == 0) return 0; if (!has_recover) @@ -1101,14 +1110,18 @@ now = res->r_started; - for (; node->rn_actions[x].ra_name; x++) { + for (; node->rn_actions[x].ra_name; x++) { - if (strcmp(node->rn_actions[x].ra_name, "monitor") && - strcmp(node->rn_actions[x].ra_name, "status")) + if (strcmp(node->rn_actions[x].ra_name, "monitor") && + strcmp(node->rn_actions[x].ra_name, "status")) continue; - node->rn_actions[x].ra_last = now; + node->rn_actions[x].ra_last = now; } + + node->rn_checked = 0; + node->rn_last_status = 0; + node->rn_last_depth = 0; } --- cluster/rgmanager/src/daemons/rg_forward.c 2007/07/24 13:53:08 1.8.2.2 +++ cluster/rgmanager/src/daemons/rg_forward.c 2007/08/02 14:46:51 1.8.2.3 @@ -122,10 +122,12 @@ m = NULL; continue; } - goto out_fail; + + if (ret == 0) + continue; } break; - } while(++retries < 60); /* old 60 second rule */ + } while(++retries < 60); /* old 600 second rule */ swab_SmMessageSt(&msg); --- cluster/rgmanager/src/daemons/rg_state.c 2007/07/24 13:53:08 1.24.2.11 +++ cluster/rgmanager/src/daemons/rg_state.c 2007/08/02 14:46:51 1.24.2.12 @@ -35,6 +35,7 @@ #include #include #include +#include /* XXX - copied :( */ #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ @@ -467,6 +468,7 @@ case RG_STATE_CHECK: case RG_STATE_STARTING: case RG_STATE_RECOVER: + case RG_STATE_MIGRATE: if ((svcStatus->rs_owner != my_id()) && memb_online(membership, svcStatus->rs_owner)) { /* @@ -583,6 +585,10 @@ "#43: Service %s has failed; can not start.\n", svcName); break; + + case RG_STATE_MIGRATE: + ret = 4; + break; case RG_STATE_STOPPING: case RG_STATE_STARTED: @@ -892,16 +898,60 @@ ret = group_migrate(svcName, target); - if (ret == -1 || ret > 0) { + switch(ret) { + default: + case -1: + case OCF_RA_ERROR: + svc_fail(svcName); /* XXX run svc_status again here to see if it's still healthy; if it is, don't FAIL it; it could be that the target node simply died; in this case, set status back to started */ - /* if ret > 0 { svc_status... */ - svc_fail(svcName); + return RG_EFAIL; + break; + case OCF_RA_NOT_RUNNING: + /* For these two, the VM was either not running or + migration is simply impossible. */ + /* Don't mark the service as failed; since it's either + recoverable or still running. */ ret = RG_EFAIL; + break; + case OCF_RA_NOT_CONFIGURED: + ret = RG_EINVAL; + break; + case 0: + return 0; } + /* Ok, we've hit a recoverable condition. Since VMs and migratory + services are ... well, migratable, we can just flip the state + back to 'started' and error checking will fix it later. */ + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return ret; + } + + if (get_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#46: Failed getting status for RG %s\n", + svcName); + return ret; + } + + if (svcStatus.rs_last_owner != my_id() || + svcStatus.rs_owner != target || + svcStatus.rs_state != RG_STATE_MIGRATE) { + rg_unlock(&lockp); + return ret; + } + + svcStatus.rs_owner = my_id(); + svcStatus.rs_state = RG_STATE_STARTED; + + set_rg_state(svcName, &svcStatus); + rg_unlock(&lockp); + return ret; } @@ -954,7 +1004,8 @@ } msg_send(&ctx, &msgp, sizeof(msgp)); - msg_receive(&ctx, &response, sizeof (response), 5); + if (msg_receive(&ctx, &response, sizeof (response), 5) != sizeof(response)) + goto cont;; swab_SmMessageSt(&response); if (response.sm_data.d_ret == RG_SUCCESS) @@ -962,6 +1013,7 @@ else ret = -1; +cont: msg_close(&ctx); } @@ -1937,7 +1989,7 @@ allowed_nodes = member_list(); while (memb_count(allowed_nodes)) { - target = best_target_node(allowed_nodes, -1, + target = best_target_node(allowed_nodes, 0, svcName, 1); if (target == me) { ret = handle_start_remote_req(svcName, request); @@ -1947,7 +1999,7 @@ ret = RG_EFAIL; goto out; } else { - ret = relocate_service(svcName, request, target); + ret = relocate_service(svcName, RG_START_REMOTE, target); } switch(ret) { --- cluster/rgmanager/src/resources/vm.sh 2007/07/31 17:54:55 1.1.2.6 +++ cluster/rgmanager/src/resources/vm.sh 2007/08/02 14:46:52 1.1.2.7 @@ -22,6 +22,8 @@ export PATH +. $(dirname $0)/ocf-shellfuncs + # # Virtual Machine start/stop script (requires the xm command) # @@ -375,8 +377,22 @@ migrate() { declare target=$1 + declare errstr rv + + err=$(xm migrate $OCF_RESKEY_name $target 2>&1 | head -1) + rv=$? + + if [ $rv -ne 0 ]; then + if [ "$err" != "${err/does not exist/}" ]; then + ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - domain does not exist" + return $OCF_NOT_RUNNING + fi + if [ "$err" != "${err/Connection refused/}" ]; then + ocf_log warn "Trying to migrate '$OCF_RESKEY_name' - connect refused" + return $OCF_ERR_CONFIGURED + fi + fi - xm migrate $OCF_RESKEY_name $target return $? }