From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 19 Jul 2006 18:43:34 -0000 Subject: [Cluster-devel] cluster/rgmanager TODO include/msgsimple.h inc ... Message-ID: <20060719184334.25245.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2006-07-19 18:43:32 Modified files: rgmanager : TODO rgmanager/include: msgsimple.h resgroup.h reslist.h rg_queue.h rgmanager/src/daemons: fo_domain.c groups.c resrules.c restree.c rg_queue.c rg_state.c rg_thread.c test.c rgmanager/src/resources: ra-api-1-modified.dtd xenvm.sh rgmanager/src/utils: Makefile clusvcadm.c Log message: Add preliminary live-migration support (e.g. for Xen for FC6 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/TODO.diff?cvsroot=cluster&r1=1.7&r2=1.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/msgsimple.h.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.11&r2=1.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.13&r2=1.14 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_queue.h.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&r1=1.9&r2=1.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.19&r2=1.20 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.12&r2=1.13 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.20&r2=1.21 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_queue.c.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.17&r2=1.18 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&r1=1.13&r2=1.14 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ra-api-1-modified.dtd.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/xenvm.sh.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/Makefile.diff?cvsroot=cluster&r1=1.11&r2=1.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&r1=1.9&r2=1.10 --- cluster/rgmanager/TODO 2006/07/12 17:15:14 1.7 +++ cluster/rgmanager/TODO 2006/07/19 18:43:32 1.8 @@ -2,3 +2,4 @@ can manually migrate Xen VMs to other nodes without telling the cluster about it. That is, the cluster should be able to acquire running VMs and update its state accordingly. +* Test against a working Xen build and shake out bugs --- cluster/rgmanager/include/msgsimple.h 2006/06/02 17:37:10 1.3 +++ cluster/rgmanager/include/msgsimple.h 2006/07/19 18:43:32 1.4 @@ -11,7 +11,7 @@ uint32_t gh_command; uint32_t gh_arg1; uint32_t gh_arg2; - /* XXX alignment issue on ia64 */ + uint32_t gh_arg3; } generic_msg_hdr; #define swab_generic_msg_hdr(ptr)\ @@ -29,9 +29,8 @@ char d_svcName[64]; uint32_t d_action; uint32_t d_svcState; - uint64_t d_svcOwner; + uint32_t d_svcOwner; int32_t d_ret; - uint32_t d_pad; } sm_data; } SmMessageSt; @@ -40,7 +39,7 @@ swab_generic_msg_hdr(&((ptr)->sm_hdr));\ swab32((ptr)->sm_data.d_action);\ swab32((ptr)->sm_data.d_svcState);\ - swab64((ptr)->sm_data.d_svcOwner);\ + swab32((ptr)->sm_data.d_svcOwner);\ swab32((ptr)->sm_data.d_ret);\ } --- cluster/rgmanager/include/resgroup.h 2006/07/12 14:04:06 1.11 +++ cluster/rgmanager/include/resgroup.h 2006/07/19 18:43:32 1.12 @@ -27,8 +27,8 @@ */ typedef struct { char rs_name[64]; /**< Service name */ - uint64_t rs_owner; /**< Member ID running service. */ - uint64_t rs_last_owner; /**< Last member to run the service. */ + uint32_t rs_owner; /**< Member ID running service. */ + uint32_t rs_last_owner; /**< Last member to run the service. */ uint32_t rs_state; /**< State of service. */ uint32_t rs_restarts; /**< Number of cluster-induced restarts */ @@ -39,8 +39,8 @@ #define swab_rg_state_t(ptr) \ {\ - swab64((ptr)->rs_owner);\ - swab64((ptr)->rs_last_owner);\ + swab32((ptr)->rs_owner);\ + swab32((ptr)->rs_last_owner);\ swab32((ptr)->rs_state);\ swab32((ptr)->rs_restarts);\ swab64((ptr)->rs_transition);\ @@ -56,6 +56,7 @@ #define RG_ACTION_REQUEST /* Message header */ 0x138582 #define RG_EVENT 0x138583 +/* Requests */ #define RG_SUCCESS 0 #define RG_FAIL 1 #define RG_START 2 @@ -78,6 +79,7 @@ #define RG_LOCK 19 #define RG_UNLOCK 20 #define RG_QUERY_LOCK 21 +#define RG_MIGRATE 22 #define RG_NONE 999 extern const char *rg_req_strings[]; @@ -102,6 +104,7 @@ #define RG_STATE_ERROR 117 /** Recoverable error */ #define RG_STATE_RECOVER 118 /** Pending recovery */ #define RG_STATE_DISABLED 119 /** Resource not allowd to run */ +#define RG_STATE_MIGRATE 120 /** Resource migrating */ #define DEFAULT_CHECK_INTERVAL 10 @@ -121,7 +124,7 @@ int svc_fail(char *svcName); int rt_enqueue_request(const char *resgroupname, int request, msgctx_t *resp_ctx, - int max, uint64_t target, int arg0, int arg1); + int max, uint32_t target, int arg0, int arg1); void send_response(int ret, request_t *req); void send_ret(msgctx_t *ctx, char *name, int ret, int req); @@ -135,7 +138,7 @@ /* from rg_state.c */ int set_rg_state(char *name, rg_state_t *svcblk); int get_rg_state(char *servicename, rg_state_t *svcblk); -uint64_t best_target_node(cluster_member_list_t *allowed, uint64_t owner, +uint32_t best_target_node(cluster_member_list_t *allowed, uint32_t owner, char *rg_name, int lock); #ifdef DEBUG @@ -158,22 +161,19 @@ cluster_member_list_t *member_list(void); int my_id(void); -#define RG_EDEPEND -7 -#define RG_EAGAIN -6 -#define RG_EDEADLCK -5 -#define RG_ENOSERVICE -4 -#define RG_EFORWARD -3 -#define RG_EABORT -2 -#define RG_EFAIL -1 +/* Return codes */ +#define RG_EQUORUM -9 /* Operation requires quorum */ +#define RG_EINVAL -8 /* Invalid operation for resource */ +#define RG_EDEPEND -7 /* Operation violates dependency */ +#define RG_EAGAIN -6 /* Try again */ +#define RG_EDEADLCK -5 /* Aborted - would deadlock */ +#define RG_ENOSERVICE -4 /* Service does not exist */ +#define RG_EFORWARD -3 /* Service not mastered locally */ +#define RG_EABORT -2 /* Abort; service unrecoverable */ +#define RG_EFAIL -1 /* Generic failure */ #define RG_ESUCCESS 0 - - -#define FORWARD -3 -#define ABORT -2 -#define FAIL -1 -#define SUCCESS 0 -#define YES 1 -#define NO 2 +#define RG_YES 1 +#define RG_NO 2 /* * Fail-over domain states --- cluster/rgmanager/include/reslist.h 2005/12/06 18:31:37 1.13 +++ cluster/rgmanager/include/reslist.h 2006/07/19 18:43:32 1.14 @@ -53,6 +53,10 @@ #define RS_RECOVER (7) #define RS_CONDSTART (8) /** Start if flagged with RF_NEEDSTART */ #define RS_CONDSTOP (9) /** STOP if flagged with RF_NEEDSTOP */ +#define RS_MONITOR (10) +#define RS_META_DATA (11) +#define RS_VALIDATE (12) +#define RS_MIGRATE (13) #ifndef SHAREDIR @@ -100,7 +104,7 @@ char * rr_type; char * rr_agent; char * rr_version; /** agent XML spec version; OCF-ism */ - int rr_root; + int rr_flags; int rr_maxrefs; resource_attr_t * rr_attrs; resource_child_t * rr_childtypes; --- cluster/rgmanager/include/rg_queue.h 2006/06/02 17:37:10 1.5 +++ cluster/rgmanager/include/rg_queue.h 2006/07/19 18:43:32 1.6 @@ -16,9 +16,10 @@ uint32_t rr_request; /** Request */ uint32_t rr_errorcode; /** Error condition */ uint32_t rr_orig_request; /** Original request */ - uint64_t rr_target; /** Target node */ + uint32_t rr_target; /** Target node */ uint32_t rr_arg0; /** Integer argument */ uint32_t rr_arg1; /** Integer argument */ + uint32_t rr_arg3; /** Integer argument */ uint32_t rr_line; /** Line no */ msgctx_t * rr_resp_ctx; /** FD to send response */ char *rr_file; /** Who made req */ @@ -28,7 +29,7 @@ int _rq_queue_request(request_t **queue, char *name, uint32_t request, uint32_t err, uint32_t oldreq, msgctx_t *ctx, time_t when, - uint64_t target, uint32_t arg0, uint32_t arg1, char *file, + uint32_t target, uint32_t arg0, uint32_t arg1, char *file, int line); #define rq_queue_request(queue, name, request, err, oldreq,\ --- cluster/rgmanager/src/daemons/fo_domain.c 2006/07/11 23:52:41 1.9 +++ cluster/rgmanager/src/daemons/fo_domain.c 2006/07/19 18:43:32 1.10 @@ -333,8 +333,10 @@ fod_t *fod = NULL; int found = 0; int owned_by_node = 0, started = 0, no_owner = 0; +#ifndef NO_CCS rg_state_t svc_state; struct dlm_lksb lockp; +#endif ENTER(); @@ -408,7 +410,7 @@ RETURN(FOD_BEST); } - if (get_rg_state(rg_name, &svc_state) == FAIL) { + if (get_rg_state(rg_name, &svc_state) == RG_EFAIL) { /* * Couldn't get the service state, thats odd */ --- cluster/rgmanager/src/daemons/groups.c 2006/07/11 23:52:41 1.19 +++ cluster/rgmanager/src/daemons/groups.c 2006/07/19 18:43:32 1.20 @@ -36,6 +36,7 @@ #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ #define cn_svcexcl cn_address.cna_address[1] +extern char *res_ops[]; static int config_version = 0; static resource_t *_resources = NULL; static resource_rule_t *_rules = NULL; @@ -61,7 +62,7 @@ @see node_should_start */ int -node_should_start_safe(uint64_t nodeid, cluster_member_list_t *membership, +node_should_start_safe(uint32_t nodeid, cluster_member_list_t *membership, char *rg_name) { int ret; @@ -78,7 +79,7 @@ count_resource_groups(cluster_member_list_t *ml) { resource_t *res; - char *rgname, *val; + char rgname[64], *val; int x; rg_state_t st; struct dlm_lksb lockp; @@ -92,10 +93,8 @@ pthread_rwlock_rdlock(&resource_lock); list_do(&_resources, res) { - if (res->r_rule->rr_root == 0) - continue; - rgname = res->r_attrs[0].ra_value; + res_build_name(rgname, sizeof(rgname), res); if (rg_lock(rgname, &lockp) < 0) { clulog(LOG_ERR, "#XX: Unable to obtain cluster " @@ -143,14 +142,14 @@ - Failover domain (ordering / restricted policy) - Exclusive service policy */ -uint64_t -best_target_node(cluster_member_list_t *allowed, uint64_t owner, +uint32_t +best_target_node(cluster_member_list_t *allowed, uint32_t owner, char *rg_name, int lock) { int x; int highscore = 1; int score; - uint64_t highnode = owner, nodeid; + uint32_t highnode = owner, nodeid; char *val; resource_t *res; int exclusive; @@ -364,7 +363,7 @@ void -consider_relocate(char *svcName, rg_state_t *svcStatus, uint64_t nodeid, +consider_relocate(char *svcName, rg_state_t *svcStatus, uint32_t nodeid, cluster_member_list_t *membership) { int a, b; @@ -409,7 +408,7 @@ * @see node_event */ int -eval_groups(int local, uint64_t nodeid, int nodeStatus) +eval_groups(int local, uint32_t nodeid, int nodeStatus) { struct dlm_lksb lockp; char svcName[64], *nodeName; @@ -611,6 +610,56 @@ } +/** + Tells us if a resource group can be migrated. + */ +int +group_migratory(char *groupname) +{ + resource_node_t *rn; + resource_t *res; + int migrate = 0, x; + + pthread_rwlock_rdlock(&resource_lock); + + res = find_root_by_ref(&_resources, groupname); + if (!res) { + pthread_rwlock_unlock(&resource_lock); + /* Nonexistent or non-TL RG cannot be migrated */ + return 0; + } + + for (x = 0; res->r_rule->rr_actions[x].ra_name; x++) { + if (!strcmp(res->r_rule->rr_actions[x].ra_name, + "migrate")) { + migrate = 1; + break; + } + } + + if (!migrate) { + pthread_rwlock_unlock(&resource_lock); + /* resource rule missing 'migrate' command */ + return 0; + } + + list_do(&_tree, rn) { + if (rn->rn_resource == res && rn->rn_child) { + pthread_rwlock_unlock(&resource_lock); + /* TL service w/ children cannot be migrated */ + return 0; + } + } while (!list_done(&_tree, rn)); + + pthread_rwlock_unlock(&resource_lock); + + /* Ok, we have a migrate option to the resource group, + the resource group has no children, and the resource + group exists. We're all good */ + return 1; +} + + /** Perform an operation on a resource group. That is, walk down the @@ -678,6 +727,38 @@ } +int +group_migrate(char *groupname, int target) +{ + resource_t *res; + char *tgt_name; + int ret = RG_ENOSERVICE; + cluster_member_list_t *membership; + + membership = member_list(); + if (!membership) + return RG_EFAIL; + + pthread_rwlock_rdlock(&resource_lock); + + tgt_name = memb_id_to_name(membership, target); + res = find_root_by_ref(&_resources, groupname); + if (!res) + goto out; + + if (!tgt_name) { + ret = RG_EINVAL; + goto out; + } + ret = res_exec(res, res_ops[RG_MIGRATE], tgt_name); + +out: + pthread_rwlock_unlock(&resource_lock); + free_member_list(membership); + return ret; +} + + /** Gets an attribute of a resource group. --- cluster/rgmanager/src/daemons/resrules.c 2006/06/02 17:37:10 1.12 +++ cluster/rgmanager/src/daemons/resrules.c 2006/07/19 18:43:32 1.13 @@ -52,12 +52,6 @@ newrule->rr_type); return -1; } - if (newrule->rr_root && curr->rr_root) { - fprintf(stderr, "Error storing %s: root " - "resource type %s exists already\n", - newrule->rr_type, curr->rr_type); - return -1; - } } while (!list_done(rulelist, curr)); @@ -185,30 +179,6 @@ } -/** - Get and store the root attribute. - - @param doc Pre-parsed XML document pointer. - @param ctx Pre-allocated XML XPath context pointer. - @param base XPath prefix to search - @param rr Resource rule to store new information in. - */ -void -_get_root(xmlDocPtr doc, xmlXPathContextPtr ctx, char *base, - resource_rule_t *rr) -{ - char xpath[256]; - char *ret = NULL; - - snprintf(xpath, sizeof(xpath), "%s/attributes/@root", base); - ret = xpath_get_one(doc, ctx, xpath); - if (ret) { - rr->rr_root = 1; - free(ret); - } -} - - int expand_time(char *val) { @@ -355,8 +325,6 @@ free(act); } while (1); - - } @@ -486,10 +454,7 @@ { int x; - printf("Resource Rules for \"%s\"", rr->rr_type); - if (rr->rr_root) - printf(" [ROOT]"); - printf("\n"); + printf("Resource Rules for \"%s\"\n", rr->rr_type); if (rr->rr_version) printf("OCF API Version: %s\n", rr->rr_version); @@ -550,7 +515,7 @@ children: - printf("Recognized child resource types:\n"); + printf("Explicitly defined child resource types:\n"); if (!rr->rr_childtypes) { printf(" - None -\n\n"); return; @@ -917,12 +882,11 @@ snprintf(base, sizeof(base), "/resource-agent[%d]/special[@tag=\"rgmanager\"]", ruleid); - _get_root(doc, ctx, base, rr); _get_maxparents(doc, ctx, base, rr); rr->rr_agent = strdup(filename); /* - Second, add the allowable-children fields + Second, add the children fields */ _get_childtypes(doc, ctx, base, rr); --- cluster/rgmanager/src/daemons/restree.c 2006/07/11 23:52:41 1.20 +++ cluster/rgmanager/src/daemons/restree.c 2006/07/19 18:43:32 1.21 @@ -70,7 +70,8 @@ "condstop", "monitor", "meta-data", /* printenv */ - "validate-all" + "validate-all", + "migrate" }; @@ -215,14 +216,13 @@ Allocate and fill an environment variable array. @param node Node in resource tree to use for parameters - @param op Operation (start/stop/status/monitor/etc.) @param depth Depth (status/monitor/etc.) @return Newly allocated environment array or NULL if one could not be formed. @see kill_env res_exec add_ocf_stuff */ static char ** -build_env(resource_node_t *node, int op, int depth) +build_env(resource_node_t *node, int depth) { resource_t *res = node->rn_resource; char **env; @@ -327,7 +327,7 @@ @see build_env */ int -res_exec(resource_node_t *node, int op, int depth) +res_exec(resource_node_t *node, const char *op, const char *arg, int depth) { int childpid, pid; int ret = 0; @@ -339,7 +339,7 @@ return 0; #ifdef DEBUG - env = build_env(node, op); + env = build_env(node, depth); if (!env) return -errno; #endif @@ -360,7 +360,7 @@ #endif #ifndef DEBUG - env = build_env(node, op, depth); + env = build_env(node, depth); #endif if (!env) @@ -375,7 +375,10 @@ restore_signals(); - execle(fullpath, fullpath, res_ops[op], NULL, env); + if (arg) + execle(fullpath, fullpath, op, arg, NULL, env); + else + execle(fullpath, fullpath, op, NULL, env); } #ifdef DEBUG @@ -395,7 +398,7 @@ if (ret) { clulog(LOG_NOTICE, "%s on %s \"%s\" returned %d (%s)\n", - res_ops[op], res->r_rule->rr_type, + op, res->r_rule->rr_type, res->r_attrs->ra_value, ret, ocf_strerror(ret)); } @@ -584,9 +587,6 @@ /* Find and build the list of root nodes */ list_do(rulelist, curr) { - if (!curr->rr_root) - continue; - build_tree(ccsfd, &root, NULL, curr, rulelist, reslist, tok); } while (!list_done(rulelist, curr)); @@ -847,7 +847,7 @@ return 0; node->rn_actions[idx].ra_last = now; - if ((x = res_exec(node, RS_STATUS, + if ((x = res_exec(node, res_ops[RS_STATUS], NULL, node->rn_actions[idx].ra_depth)) == 0) return 0; @@ -855,7 +855,7 @@ return x; /* Strange/failed status. Try to recover inline. */ - if ((x = res_exec(node, RS_RECOVER, 0)) == 0) + if ((x = res_exec(node, res_ops[RS_RECOVER], NULL, 0)) == 0) return 0; return x; @@ -1005,7 +1005,7 @@ if (me && (op == RS_START)) { node->rn_flags &= ~RF_NEEDSTART; - rv = res_exec(node, op, 0); + rv = res_exec(node, res_ops[op], NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; return rv; @@ -1029,7 +1029,7 @@ /* Stop/status/etc stops after children have stopped */ if (me && (op == RS_STOP)) { node->rn_flags &= ~RF_NEEDSTOP; - rv = res_exec(node, op, 0); + rv = res_exec(node, res_ops[op], NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; --- cluster/rgmanager/src/daemons/rg_queue.c 2006/06/02 17:37:10 1.4 +++ cluster/rgmanager/src/daemons/rg_queue.c 2006/07/19 18:43:32 1.5 @@ -26,7 +26,7 @@ int _rq_queue_request(request_t **queue, char *name, uint32_t request, uint32_t err, uint32_t oldreq, msgctx_t *ctx, time_t when, - uint64_t target, uint32_t arg0, uint32_t arg1, char *file, + uint32_t target, uint32_t arg0, uint32_t arg1, char *file, int line) { request_t *req; --- cluster/rgmanager/src/daemons/rg_state.c 2006/07/11 23:52:41 1.17 +++ cluster/rgmanager/src/daemons/rg_state.c 2006/07/19 18:43:32 1.18 @@ -32,9 +32,9 @@ #include #include -int node_should_start_safe(uint64_t, cluster_member_list_t *, char *); +int node_should_start_safe(uint32_t, cluster_member_list_t *, char *); -uint64_t next_node_id(cluster_member_list_t *membership, uint64_t me); +uint32_t next_node_id(cluster_member_list_t *membership, uint32_t me); int rg_exec_script(char *rgname, char *script, char *action); static int _svc_stop_finish(char *svcName, int failed, uint32_t newstate); @@ -43,13 +43,14 @@ int get_rg_state(char *servicename, rg_state_t *svcblk); void get_recovery_policy(char *rg_name, char *buf, size_t buflen); int check_depend_safe(char *servicename); +int group_migratory(char *servicename); -uint64_t -next_node_id(cluster_member_list_t *membership, uint64_t me) +uint32_t +next_node_id(cluster_member_list_t *membership, uint32_t me) { - uint64_t low = (uint64_t)(-1); - uint64_t next = me, curr; + uint32_t low = (uint32_t)(-1); + uint32_t next = me, curr; int x; for (x = 0; x < membership->cml_count; x++) { @@ -293,7 +294,7 @@ if (ret != VFR_OK) { free_member_list(membership); printf("Couldn't initialize rg %s!\n", name); - return FAIL; + return RG_EFAIL; } ret = vf_read(membership, res, &viewno, &data, &datalen); @@ -302,7 +303,7 @@ free(data); free_member_list(membership); printf("Couldn't reread rg %s! (%d)\n", name, ret); - return FAIL; + return RG_EFAIL; } } @@ -312,7 +313,7 @@ if (data) free(data); free_member_list(membership); - return FAIL; + return RG_EFAIL; } /* Copy out the data. */ @@ -353,7 +354,7 @@ svcblk->rs_transition = 0; strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name)); - return FAIL; + return RG_EFAIL; } /* Copy out the data. */ @@ -370,7 +371,7 @@ * @param svcStatus Current service status. * @param svcName Service name * @param req Specify request to perform - * @return 0 = DO NOT stop service, return FAIL + * @return 0 = DO NOT stop service, return RG_EFAIL * 1 = STOP service - return whatever it returns. * 2 = DO NOT stop service, return 0 (success) * 3 = DO NOT stop service, return RG_EFORWARD @@ -496,7 +497,7 @@ * @param svcName Service name * @param flags Specify whether or not it's legal to start a * disabled service, etc. - * @return 0 = DO NOT start service, return FAIL + * @return 0 = DO NOT start service, return RG_EFAIL * 1 = START service - return whatever it returns. * 2 = DO NOT start service, return 0 * 3 = DO NOT start service, return RG_EAGAIN @@ -563,7 +564,7 @@ } /* - * Service is running but owner is down -> FAILOVER + * Service is running but owner is down -> RG_EFAILOVER */ clulog(LOG_NOTICE, "Taking over service %s from down member %s\n", @@ -656,21 +657,21 @@ if (rg_lock(svcName, &lockp) < 0) { clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } if (get_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#46: Failed getting status for RG %s\n", svcName); - return FAIL; + return RG_EFAIL; } /* LOCK HELD */ switch (svc_advise_start(&svcStatus, svcName, req)) { - case 0: /* Don't start service, return FAIL */ + case 0: /* Don't start service, return RG_EFAIL */ rg_unlock(&lockp); - return FAIL; + return RG_EFAIL; case 2: /* Don't start service, return 0 */ rg_unlock(&lockp); return 0; @@ -696,7 +697,7 @@ clulog(LOG_ERR, "#47: Failed changing service status\n"); rg_unlock(&lockp); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); @@ -708,7 +709,7 @@ if (rg_lock(svcName, &lockp) < 0) { clulog(LOG_ERR, "#74: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } svcStatus.rs_state = RG_STATE_STARTED; @@ -716,7 +717,7 @@ clulog(LOG_ERR, "#75: Failed changing service status\n"); rg_unlock(&lockp); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); @@ -737,40 +738,138 @@ /** + * Migrate a service to another node. + */ +int +svc_migrate(char *svcName, int target) +{ + struct dlm_lksb lockp; + rg_state_t svcStatus; + int ret; + + if (!group_migratory(svcName)) + return RG_EINVAL; + + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return RG_EFAIL; + } + + if (get_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#46: Failed getting status for RG %s\n", + svcName); + return RG_EFAIL; + } + + if (svcStatus.rs_owner != my_id()) { + rg_unlock(&lockp); + return RG_EFORWARD; + } + + switch(svcStatus.rs_state) { + case RG_STATE_STARTED: + break; + case RG_STATE_STARTING: + case RG_STATE_STOPPING: + case RG_STATE_RECOVER: + case RG_STATE_MIGRATE: + case RG_STATE_ERROR: + rg_unlock(&lockp); + return RG_EAGAIN; + default: + rg_unlock(&lockp); + return RG_EFAIL; + } + + /* LOCK HELD */ + svcStatus.rs_owner = target; + svcStatus.rs_last_owner = my_id(); + svcStatus.rs_state = RG_STATE_MIGRATE; + svcStatus.rs_transition = (uint64_t)time(NULL); + + if (set_rg_state(svcName, &svcStatus) != 0) { + clulog(LOG_ERR, + "#75: Failed changing service status\n"); + rg_unlock(&lockp); + return RG_EFAIL; + } + rg_unlock(&lockp); + + ret = group_migrate(svcName, target); + return ret; +} + + +/** * Check status of a cluster service * * @param svcName Service name to check. - * @return RG_EFORWARD, FAIL, 0 + * @return RG_EFORWARD, RG_EFAIL, 0 */ int svc_status(char *svcName) { struct dlm_lksb lockp; rg_state_t svcStatus; + int ret; if (rg_lock(svcName, &lockp) < 0) { clulog(LOG_ERR, "#48: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } if (get_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#49: Failed getting status for RG %s\n", svcName); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); if (svcStatus.rs_owner != my_id()) /* Don't check status for anything not owned */ - return SUCCESS; + return 0; - if (svcStatus.rs_state != RG_STATE_STARTED) + if (svcStatus.rs_state != RG_STATE_STARTED && + svcStatus.rs_state != RG_STATE_MIGRATE) /* Not-running RGs should not be checked either. */ - return SUCCESS; + return 0; - return group_op(svcName, RG_STATUS); + ret = group_op(svcName, RG_STATUS); + + /* For running services, just check the return code */ + if (svcStatus.rs_state == RG_STATE_STARTED) + return ret; + + /* For service(s) migrating to the local node, ignore invalid + return codes. + XXX Should put a timeout on migrating services */ + if (ret < 0) + return 0; + + /* If the check succeeds (returns 0), then flip the state back to + 'started' - we now own the service */ + if (rg_lock(svcName, &lockp) < 0) { + clulog(LOG_ERR, "#45: Unable to obtain cluster lock: %s\n", + strerror(errno)); + return RG_EFAIL; + } + + svcStatus.rs_state = RG_STATE_STARTED; + if (set_rg_state(svcName, &svcStatus) != 0) { + rg_unlock(&lockp); + clulog(LOG_ERR, "#46: Failed getting status for RG %s\n", + svcName); + return RG_EFAIL; + } + rg_unlock(&lockp); + + clulog(LOG_NOTICE, "%s is now running locally\n", svcName); + + return 0; } @@ -795,17 +894,17 @@ return group_op(svcName, RG_STOP); } - if (rg_lock(svcName, &lockp) == FAIL) { + if (rg_lock(svcName, &lockp) == RG_EFAIL) { clulog(LOG_ERR, "#50: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } if (get_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#51: Failed getting status for RG %s\n", svcName); - return FAIL; + return RG_EFAIL; } switch (svc_advise_stop(&svcStatus, svcName, req)) { @@ -813,10 +912,10 @@ rg_unlock(&lockp); clulog(LOG_DEBUG, "Unable to stop RG %s in %s state\n", svcName, rg_state_str(svcStatus.rs_state)); - return FAIL; + return RG_EFAIL; case 2: rg_unlock(&lockp); - return SUCCESS; + return RG_ESUCCESS; case 3: rg_unlock(&lockp); return RG_EFORWARD; @@ -840,7 +939,7 @@ if (set_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#52: Failed changing RG status\n"); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); @@ -858,17 +957,17 @@ rg_state_t svcStatus; struct dlm_lksb lockp; - if (rg_lock(svcName, &lockp) == FAIL) { + if (rg_lock(svcName, &lockp) == RG_EFAIL) { clulog(LOG_ERR, "#53: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } if (get_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#54: Failed getting status for RG %s\n", svcName); - return FAIL; + return RG_EFAIL; } if ((svcStatus.rs_state != RG_STATE_STOPPING) && @@ -899,7 +998,7 @@ if (set_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#55: Failed changing RG status\n"); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); @@ -944,10 +1043,10 @@ struct dlm_lksb lockp; rg_state_t svcStatus; - if (rg_lock(svcName, &lockp) == FAIL) { + if (rg_lock(svcName, &lockp) == RG_EFAIL) { clulog(LOG_ERR, "#55: Unable to obtain cluster lock: %s\n", strerror(errno)); - return FAIL; + return RG_EFAIL; } clulog(LOG_DEBUG, "Handling failure request for RG %s\n", svcName); @@ -956,7 +1055,7 @@ rg_unlock(&lockp); clulog(LOG_ERR, "#56: Failed getting status for RG %s\n", svcName); - return FAIL; + return RG_EFAIL; } if ((svcStatus.rs_state == RG_STATE_STARTED) && @@ -964,7 +1063,7 @@ rg_unlock(&lockp); clulog(LOG_DEBUG, "Unable to disable RG %s in %s state\n", svcName, rg_state_str(svcStatus.rs_state)); - return FAIL; + return RG_EFAIL; } /* @@ -980,7 +1079,7 @@ if (set_rg_state(svcName, &svcStatus) != 0) { rg_unlock(&lockp); clulog(LOG_ERR, "#57: Failed changing RG status\n"); - return FAIL; + return RG_EFAIL; } rg_unlock(&lockp); @@ -994,7 +1093,7 @@ * Send a message to the target node to start the service. */ static int -relocate_service(char *svcName, int request, uint64_t target) +relocate_service(char *svcName, int request, uint32_t target) { SmMessageSt msg_relo; int msg_ret; @@ -1049,7 +1148,7 @@ clulog(LOG_WARNING, "#XX: Cancelling relocation: Shutting down\n"); msg_close(&ctx); - return NO; + return RG_NO; } /* Check for node transition in the middle of a relocate */ @@ -1062,7 +1161,7 @@ "#XX: Cancelling relocation: Target node down\n"); free_member_list(ml); msg_close(&ctx); - return FAIL; + return RG_EFAIL; } while (1); if (msg_ret != sizeof (SmMessageSt)) { @@ -1110,7 +1209,7 @@ int *new_owner) { cluster_member_list_t *allowed_nodes, *backup = NULL; - uint64_t target = preferred_target, me = my_id(); + uint32_t target = preferred_target, me = my_id(); int ret, x; /* @@ -1118,9 +1217,9 @@ */ if (request != RG_START_RECOVER) { ret = _svc_stop(svcName, request, 0, RG_STATE_STOPPED); - if (ret == FAIL) { + if (ret == RG_EFAIL) { svc_fail(svcName); - return FAIL; + return RG_EFAIL; } if (ret == RG_EFORWARD) return RG_EFORWARD; @@ -1214,8 +1313,8 @@ case RG_EABORT: svc_report_failure(svcName); free_member_list(allowed_nodes); - return FAIL; - case NO: + return RG_EFAIL; + case RG_NO: /* state uncertain */ free_member_list(allowed_nodes); clulog(LOG_DEBUG, "State Uncertain: svc:%s " @@ -1242,7 +1341,7 @@ * We're DONE. */ if (request == RG_START_RECOVER) - return FAIL; + return RG_EFAIL; /* * All potential places for the service to start have been exhausted. @@ -1255,7 +1354,7 @@ svcName); if (svc_start(svcName, RG_START_RECOVER) == 0) { *new_owner = me; - return FAIL; + return RG_EFAIL; } } @@ -1264,7 +1363,7 @@ svc_report_failure(svcName); } - return FAIL; + return RG_EFAIL; } @@ -1296,7 +1395,7 @@ (node_should_start_safe(my_id(), membership, svcName) < tolerance)) { free_member_list(membership); - return FAIL; + return RG_EFAIL; } free_member_list(membership); @@ -1320,14 +1419,14 @@ /* * If we succeeded, then we're done. */ - if (ret == SUCCESS) { + if (ret == RG_ESUCCESS) { *new_owner = my_id(); - return SUCCESS; + return RG_ESUCCESS; } /* Already running? */ - if (ret == NO) { - return SUCCESS; + if (ret == RG_NO) { + return RG_ESUCCESS; } /* @@ -1359,7 +1458,7 @@ /* If we leave the service stopped, instead of disabled, someone will try to start it after the next node transition */ - if (ret == FAIL) { + if (ret == RG_EFAIL) { if (svc_stop(svcName, RG_STOP) != 0) { svc_fail(svcName); svc_report_failure(svcName); @@ -1384,7 +1483,7 @@ { int tolerance = FOD_BEST; int x; - uint64_t me = my_id(); + uint32_t me = my_id(); cluster_member_list_t *membership = member_list(); /* XXX ok, so we need to say "should I start this if I was the @@ -1404,7 +1503,7 @@ */ if (node_should_start_safe(me, membership, svcName) < tolerance){ free_member_list(membership); - return FAIL; + return RG_EFAIL; } free_member_list(membership); @@ -1412,7 +1511,7 @@ return 0; if (svc_stop(svcName, RG_STOP_RECOVER) == 0) - return FAIL; + return RG_EFAIL; svc_fail(svcName); return RG_EABORT; --- cluster/rgmanager/src/daemons/rg_thread.c 2006/06/02 17:37:10 1.13 +++ cluster/rgmanager/src/daemons/rg_thread.c 2006/07/19 18:43:32 1.14 @@ -48,7 +48,7 @@ static resthread_t *find_resthread_byname(const char *resgroupname); static int spawn_if_needed(const char *resgroupname); int rt_enqueue_request(const char *resgroupname, int request, - msgctx_t *response_ctx, int max, uint64_t target, + msgctx_t *response_ctx, int max, uint32_t target, int arg0, int arg1); @@ -248,6 +248,26 @@ ret = RG_NONE; break; + case RG_MIGRATE: + error = svc_migrate(myname, req->rr_target); + + if (error == 0) { + ret = RG_SUCCESS; + + pthread_mutex_lock(&my_queue_mutex); + purge_status_checks(&my_queue); + pthread_mutex_unlock(&my_queue_mutex); + } else if (error == RG_EFORWARD) { + ret = RG_NONE; + break; + } else { + /* + * Bad news. + */ + ret = RG_FAIL; + } + break; + case RG_INIT: /* Stop without changing shared state of it */ error = group_op(myname, RG_STOP); @@ -551,7 +571,7 @@ int rt_enqueue_request(const char *resgroupname, int request, msgctx_t *response_ctx, - int max, uint64_t target, int arg0, int arg1) + int max, uint32_t target, int arg0, int arg1) { request_t *curr; int count = 0, ret; --- cluster/rgmanager/src/daemons/test.c 2006/07/11 23:52:41 1.5 +++ cluster/rgmanager/src/daemons/test.c 2006/07/19 18:43:32 1.6 @@ -90,7 +90,7 @@ conf_setconfig(argv[1]); ccsfd = ccs_lock(); - if (ccsfd == FAIL) { + if (ccsfd < 0) { printf("Error parsing %s\n", argv[1]); goto out; } --- cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2005/01/21 18:11:36 1.2 +++ cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2006/07/19 18:43:32 1.3 @@ -42,7 +42,7 @@ + @@ -403,6 +404,15 @@ fi } + +migrate() +{ + declare target=$1 + + # XXX TODO; requires working Xen + return 1 +} + # # A Resource group is abstract, but the OCF RA API doesn't allow for abstract # resources, so here it is. @@ -427,6 +437,10 @@ status exit $? ;; + migrate) + migrate $2 # Send Xen VM to this node + exit $? + ;; reload) exit 0 ;; --- cluster/rgmanager/src/utils/Makefile 2006/07/12 14:38:01 1.11 +++ cluster/rgmanager/src/utils/Makefile 2006/07/19 18:43:32 1.12 @@ -20,9 +20,9 @@ CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -CFLAGS+= -L${libdir} -DPACKAGE_VERSION=\"${RELEASE}\" +CFLAGS+= -DPACKAGE_VERSION=\"${RELEASE}\" -LDFLAGS+= -L../../../cman/lib -L../../../ccs/lib -lcman -lpthread -ldl -lncurses -L../clulib -lclulib -lccs +LDFLAGS+= -L${libdir} -L../../../cman/lib -L../../../ccs/lib -lcman -lpthread -ldl -lncurses -L../clulib -lclulib -lccs TARGETS=clubufflush clufindhostname clustat clusvcadm clulog clunfslock all: ${TARGETS} @@ -44,7 +44,7 @@ $(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS) clufindhostname: clufindhostname.o - $(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS) + $(CC) -o $@ $^ $(INLUDE) $(CFLAGS) clustat: clustat.o $(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS) --- cluster/rgmanager/src/utils/clusvcadm.c 2006/07/11 23:52:41 1.9 +++ cluster/rgmanager/src/utils/clusvcadm.c 2006/07/19 18:43:32 1.10 @@ -158,6 +158,9 @@ " on \n", name); printf(" %s -r -m Relocate [to ]\n", name); +printf(" %s -M -m Migrate [to ]\n", + name); +printf(" (e.g. for live migration of Xen VMs)\n"); printf(" %s -q Quiet operation\n", name); printf(" %s -R Restart a group in place.\n", name); @@ -186,7 +189,7 @@ SmMessageSt msg; int action = RG_STATUS; int node_specified = 0; - int msgtarget, me, svctarget = 0; + int me, svctarget = 0; char *actionstr = NULL; cluster_member_list_t *membership; @@ -224,6 +227,12 @@ action = RG_RELOCATE; svcname = optarg; break; + case 'M': + /* MIGRATE */ + actionstr = "trying to migrate"; + action = RG_MIGRATE; + svcname = optarg; + break; case 's': /* stop */ actionstr = "stopping"; @@ -237,7 +246,6 @@ break; case 'm': /* member ... */ case 'n': /* node .. same thing */ - strncpy(nodename,optarg,sizeof(nodename)); node_specified = 1; break; @@ -277,15 +285,14 @@ me = get_my_nodeid(ch); if (node_specified) { - msgtarget = memb_name_to_id(membership, nodename); - if (msgtarget == 0) { + svctarget = memb_name_to_id(membership, nodename); + if (svctarget == 0) { fprintf(stderr, "Member %s not in membership list\n", nodename); return 1; } - svctarget = msgtarget; } else { - msgtarget = me; + svctarget = me; /* clu_local_nodename(RG_SERVICE_GROUP, nodename, sizeof(nodename)); @@ -295,7 +302,7 @@ strcpy(nodename,"me"); build_message(&msg, action, svcname, svctarget); - if (action != RG_RELOCATE) { + if (action != RG_RELOCATE && action != RG_MIGRATE) { printf("Member %s %s %s", nodename, actionstr, svcname); printf("..."); fflush(stdout); @@ -330,7 +337,7 @@ /* Decode */ swab_SmMessageSt(&msg); switch (msg.sm_data.d_ret) { - case SUCCESS: + case RG_ESUCCESS: printf("success\n"); break; case RG_EFAIL: