From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 20 Mar 2007 18:35:16 -0000 Subject: [Cluster-devel] cluster/rgmanager include/members.h include/re ... Message-ID: <20070320183516.24873.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL50 Changes by: lhh at sourceware.org 2007-03-20 18:35:13 Modified files: rgmanager/include: members.h res-ocf.h resgroup.h reslist.h rgmanager/man : Makefile rgmanager/src/clulib: lock.c rg_strings.c rgmanager/src/daemons: Makefile groups.c main.c nodeevent.c reslist.c resrules.c restree.c rg_state.c rg_thread.c test.c rgmanager/src/resources: ip.sh nfsclient.sh ocf-shellfuncs vm.sh rgmanager/src/utils: clustat.c clusvcadm.c Log message: Merge: VM migration patch from RHEL5 branch Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/members.h.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.3&r2=1.3.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/res-ocf.h.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.1&r2=1.1.18.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.15.4.2&r2=1.15.4.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.15&r2=1.15.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/lock.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.3&r2=1.3.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.5.4.1&r2=1.5.4.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.14&r2=1.14.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.25.4.1&r2=1.25.4.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.34&r2=1.34.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/reslist.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.14&r2=1.14.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.16&r2=1.16.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.23&r2=1.23.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.24.4.3&r2=1.24.4.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.15.4.2&r2=1.15.4.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.6&r2=1.6.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ip.sh.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.21&r2=1.21.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.13&r2=1.13.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.3&r2=1.3.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.1&r2=1.1.4.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.25.4.2&r2=1.25.4.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.12.4.2&r2=1.12.4.3 --- cluster/rgmanager/include/members.h 2006/09/27 16:28:41 1.3 +++ cluster/rgmanager/include/members.h 2007/03/20 18:35:12 1.3.4.1 @@ -15,10 +15,12 @@ cluster_member_list_t * get_member_list(cman_handle_t h); void free_member_list(cluster_member_list_t *ml); void member_set_state(int nodeid, int state); +int memb_count(cluster_member_list_t *ml); int member_online(int nodeid); int memb_online(cluster_member_list_t *ml, int nodeid); int memb_online_name(cluster_member_list_t *ml, char *name); int memb_name_to_id(cluster_member_list_t *ml, char *name); +int memb_mark_down(cluster_member_list_t *ml, int nodeid); char * memb_id_to_name(cluster_member_list_t *ml, int nodeid); cman_node_t * memb_id_to_p(cluster_member_list_t *ml, int nodeid); cman_node_t * memb_name_to_p(cluster_member_list_t *ml, char *name); --- cluster/rgmanager/include/res-ocf.h 2004/08/13 15:36:50 1.1 +++ cluster/rgmanager/include/res-ocf.h 2007/03/20 18:35:12 1.1.18.1 @@ -45,4 +45,22 @@ #define OCF_RA_NOT_RUNNING 7 #define OCF_RA_MAX 7 +/* + Resource operations - not ocf-specified + */ +#define RS_START (0) +#define RS_STOP (1) +#define RS_STATUS (2) +#define RS_RESINFO (3) +#define RS_RESTART (4) +#define RS_RELOAD (5) +#define RS_CONDRESTART (6) +#define RS_RECOVER (7) +#define RS_CONDSTART (8) /** Start if flagged with RF_NEEDSTART */ +#define RS_CONDSTOP (9) /** STOP if flagged with RF_NEEDSTOP */ +#define RS_MONITOR (10) +#define RS_META_DATA (11) +#define RS_VALIDATE (12) +#define RS_MIGRATE (13) + #endif --- cluster/rgmanager/include/resgroup.h 2006/12/14 22:17:20 1.15.4.2 +++ cluster/rgmanager/include/resgroup.h 2007/03/20 18:35:12 1.15.4.3 @@ -81,9 +81,7 @@ #define RG_MIGRATE 22 #define RG_NONE 999 -extern const char *rg_req_strings[]; - -#define rg_req_str(req) (rg_req_strings[req]) +const char *rg_req_str(int req); int handle_relocate_req(char *svcName, int request, int preferred_target, int *new_owner); @@ -107,9 +105,11 @@ #define DEFAULT_CHECK_INTERVAL 10 -extern const char *rg_state_strings[]; +const char *rg_state_str(int val); +const char *agent_op_str(int val); -#define rg_state_str(state) (rg_state_strings[state - RG_STATE_BASE]) +int eval_groups(int local, uint32_t nodeid, int nodeStatus); +int group_migrate(char *groupname, int target); int rg_status(const char *resgroupname); int group_op(char *rgname, int op); @@ -121,6 +121,7 @@ int svc_status(char *svcName); int svc_disable(char *svcName); int svc_fail(char *svcName); +int svc_migrate(char *svcName, int target); int rt_enqueue_request(const char *resgroupname, int request, msgctx_t *resp_ctx, int max, uint32_t target, int arg0, int arg1); @@ -175,7 +176,7 @@ #define RG_YES 1 #define RG_NO 2 -char *rg_strerror(int val); +const char *rg_strerror(int val); /* --- cluster/rgmanager/include/reslist.h 2006/09/27 16:28:41 1.15 +++ cluster/rgmanager/include/reslist.h 2007/03/20 18:35:12 1.15.4.1 @@ -40,25 +40,6 @@ #define RES_STARTED (1) #define RES_FAILED (2) -/* - Resource operations - */ -#define RS_START (0) -#define RS_STOP (1) -#define RS_STATUS (2) -#define RS_RESINFO (3) -#define RS_RESTART (4) -#define RS_RELOAD (5) -#define RS_CONDRESTART (6) -#define RS_RECOVER (7) -#define RS_CONDSTART (8) /** Start if flagged with RF_NEEDSTART */ -#define RS_CONDSTOP (9) /** STOP if flagged with RF_NEEDSTOP */ -#define RS_MONITOR (10) -#define RS_META_DATA (11) -#define RS_VALIDATE (12) -#define RS_MIGRATE (13) - - #ifndef SHAREDIR #define SHAREDIR "/usr/share/rgmanager" #endif @@ -159,7 +140,11 @@ int res_status(resource_node_t **tree, resource_t *res, void *ret); int res_condstart(resource_node_t **tree, resource_t *res, void *ret); int res_condstop(resource_node_t **tree, resource_t *res, void *ret); +int res_exec(resource_node_t *node, const char *op, const char *arg, int depth); /*int res_resinfo(resource_node_t **tree, resource_t *res, void *ret);*/ +int expand_time(char *val); +int store_action(resource_act_t **actsp, char *name, int depth, int timeout, int interval); + /* Calculate differences --- cluster/rgmanager/man/Makefile 2006/06/02 17:37:10 1.4 +++ cluster/rgmanager/man/Makefile 2007/03/20 18:35:13 1.4.4.1 @@ -12,8 +12,8 @@ ############################################################################### top_srcdir=.. UNINSTALL = ${top_srcdir}/scripts/uninstall.pl -TARGETS = clubufflush.8 clufindhostname.8 clulog.8 clunfsops.8 \ - clurgmgrd.8 clurmtabd.8 clushutdown.8 clustat.8 clusvcadm.8 +TARGETS = clubufflush.8 clufindhostname.8 clulog.8 \ + clurgmgrd.8 clurmtabd.8 clustat.8 clusvcadm.8 include ${top_srcdir}/make/defines.mk --- cluster/rgmanager/src/clulib/lock.c 2006/10/23 22:47:00 1.3 +++ cluster/rgmanager/src/clulib/lock.c 2007/03/20 18:35:13 1.3.4.1 @@ -162,7 +162,7 @@ int clu_close_lockspace(dlm_lshandle_t ls, const char *name) { - return dlm_release_lockspace(name, ls, 0); + return dlm_release_lockspace(name, ls, 1); } --- cluster/rgmanager/src/clulib/rg_strings.c 2006/12/14 22:17:20 1.5.4.1 +++ cluster/rgmanager/src/clulib/rg_strings.c 2007/03/20 18:35:13 1.5.4.2 @@ -16,9 +16,16 @@ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include -struct { int val; char *str; } rg_error_strings[] = { +struct string_val { + int val; + char *str; +}; + + +const struct string_val rg_error_strings[] = { { RG_ERUN, "Service is already running" }, { RG_EQUORUM, "Operation requires quorum" }, { RG_EINVAL, "Invalid operation for resource" }, @@ -36,53 +43,107 @@ }; -char *rg_strerror(int err) +const struct string_val rg_req_strings[] = { + {RG_SUCCESS, "success" }, + {RG_FAIL, "fail"}, + {RG_START, "start"}, + {RG_STOP, "stop"}, + {RG_STATUS, "status"}, + {RG_DISABLE, "disable"}, + {RG_STOP_RECOVER, "stop (recovery)"}, + {RG_START_RECOVER, "start (recovery)"}, + {RG_RESTART, "restart"}, + {RG_EXITING, "exiting"}, + {RG_INIT, "initialize"}, + {RG_ENABLE, "enable"}, + {RG_STATUS_NODE, "status inquiry"}, + {RG_RELOCATE, "relocate"}, + {RG_CONDSTOP, "conditional stop"}, + {RG_CONDSTART, "conditional start"}, + {RG_START_REMOTE,"remote start"}, + {RG_STOP_USER, "user stop"}, + {RG_STOP_EXITING, "stop (shutdown)"}, + {RG_LOCK, "locking"}, + {RG_UNLOCK, "unlocking"}, + {RG_QUERY_LOCK, "lock status inquiry"}, + {RG_MIGRATE, "migrate"}, + {RG_NONE, "none"}, + {0, NULL} +}; + + +const struct string_val rg_state_strings[] = { + {RG_STATE_STOPPED, "stopped"}, + {RG_STATE_STARTING, "starting"}, + {RG_STATE_STARTED, "started"}, + {RG_STATE_STOPPING, "stopping"}, + {RG_STATE_FAILED, "failed"}, + {RG_STATE_UNINITIALIZED, "uninitialized"}, + {RG_STATE_CHECK, "checking"}, + {RG_STATE_ERROR, "recoverable"}, + {RG_STATE_RECOVER, "recovering"}, + {RG_STATE_DISABLED, "disabled"}, + {RG_STATE_MIGRATE, "migrating"}, + {0, NULL} +}; + + +const struct string_val agent_ops[] = { + {RS_START, "start"}, + {RS_STOP, "stop"}, + {RS_STATUS, "status"}, + {RS_RESINFO, "resinfo"}, + {RS_RESTART, "restart"}, + {RS_RELOAD, "reload"}, + {RS_CONDRESTART, "condrestart"}, /* Unused */ + {RS_RECOVER, "recover"}, + {RS_CONDSTART, "condstart"}, + {RS_CONDSTOP, "condstop"}, + {RS_MONITOR, "monitor"}, + {RS_META_DATA, "meta-data"}, /* printenv */ + {RS_VALIDATE, "validate-all"}, + {RS_MIGRATE, "migrate"}, + {0 , NULL} +}; + + +static inline const char * +rg_search_table(const struct string_val *table, int val) { int x; - for (x = 0; rg_error_strings[x].str != NULL; x++) { - if (rg_error_strings[x].val == err) { - return rg_error_strings[x].str; + for (x = 0; table[x].str != NULL; x++) { + if (table[x].val == val) { + return table[x].str; } } return "Unknown"; } + + +const char * +rg_strerror(int val) +{ + return rg_search_table(rg_error_strings, val); +} +const char * +rg_state_str(int val) +{ + return rg_search_table(rg_state_strings, val); +} -const char *rg_state_strings[] = { - "stopped", - "starting", - "started", - "stopping", - "failed", - "uninitialized", - "checking", - "recoverable", - "recovering", - "disabled", - "" -}; -const char *rg_req_strings[] = { - "success", - "fail", - "start", - "stop", - "status", - "disable", - "stop (recovery)", - "start (recovery)", - "restart", - "exiting", - "initialize", - "enable", - "status inquiry", - "relocate", - "conditional stop", - "conditional start", - "remote start", - "user stop", - "" -}; +const char * +rg_req_str(int val) +{ + return rg_search_table(rg_req_strings, val); +} + +const char * +agent_op_str(int val) +{ + return rg_search_table(agent_ops, val); +} --- cluster/rgmanager/src/daemons/Makefile 2006/07/12 14:38:01 1.14 +++ cluster/rgmanager/src/daemons/Makefile 2007/03/20 18:35:13 1.14.4.1 @@ -15,14 +15,11 @@ include ${top_srcdir}/make/defines.mk INCLUDE += -I $(top_srcdir)/include -I $(top_srcdir)/../cman/lib -I $(top_srcdir)/../ccs/lib -I $(top_srcdir)/../dlm/lib - -CFLAGS+= -g -I${incdir} -I/usr/include/libxml2 -L${libdir} +INCLUDE += -I${incdir} -I/usr/include/libxml2 CFLAGS+= -g -Wstrict-prototypes -Wshadow -fPIC -D_GNU_SOURCE -CFLAGS+= -L ../clulib - -LDFLAGS+= -lclulib -lxml2 -lpthread -ldl -L../../../cman/lib -L../../../ccs/lib -L../../../dlm/lib +LDFLAGS+= -L ../clulib -L../../../cman/lib -L../../../ccs/lib -L../../../dlm/lib -L${libdir} -lclulib -lxml2 -lpthread -ldl TARGETS=clurgmgrd clurmtabd rg_test all: ${TARGETS} @@ -59,7 +56,7 @@ # packages should run 'make check' as part of the build process. # rg_test: rg_locks-noccs.o test-noccs.o reslist-noccs.o \ - resrules.o restree-noccs.o fo_domain-noccs.o + resrules-noccs.o restree-noccs.o fo_domain-noccs.o $(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) -llalloc $(LDFLAGS) -lccs -lcman clurmtabd: clurmtabd.o clurmtabd_lib.o --- cluster/rgmanager/src/daemons/groups.c 2006/12/18 21:46:57 1.25.4.1 +++ cluster/rgmanager/src/daemons/groups.c 2007/03/20 18:35:13 1.25.4.2 @@ -36,7 +36,6 @@ #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ #define cn_svcexcl cn_address.cna_address[1] -extern char *res_ops[]; static int config_version = 0; static resource_t *_resources = NULL; static resource_rule_t *_rules = NULL; @@ -617,17 +616,17 @@ Tells us if a resource group can be migrated. */ int -group_migratory(char *groupname) +group_migratory(char *groupname, int lock) { resource_node_t *rn; resource_t *res; - int migrate = 0, x; + int migrate = 0, x, ret = 0; - pthread_rwlock_rdlock(&resource_lock); + if (lock) + pthread_rwlock_rdlock(&resource_lock); res = find_root_by_ref(&_resources, groupname); if (!res) { - pthread_rwlock_unlock(&resource_lock); /* Nonexistent or non-TL RG cannot be migrated */ return 0; } @@ -640,26 +639,26 @@ } } - if (!migrate) { - pthread_rwlock_unlock(&resource_lock); - /* resource rule missing 'migrate' command */ - return 0; - } + if (!migrate) + goto out_unlock; list_do(&_tree, rn) { if (rn->rn_resource == res && rn->rn_child) { - pthread_rwlock_unlock(&resource_lock); /* TL service w/ children cannot be migrated */ - return 0; + goto out_unlock; } } while (!list_done(&_tree, rn)); - pthread_rwlock_unlock(&resource_lock); /* Ok, we have a migrate option to the resource group, the resource group has no children, and the resource group exists. We're all good */ - return 1; + ret = 1; + +out_unlock: + if (lock) + pthread_rwlock_unlock(&resource_lock); + return ret; } @@ -733,27 +732,69 @@ int group_migrate(char *groupname, int target) { + resource_node_t *rn = NULL, *tmp; resource_t *res; char *tgt_name; int ret = RG_ENOSERVICE; cluster_member_list_t *membership; + if (target <= 0) { + clulog(LOG_WARNING, + "Illegal node ID %d during migrate operation\n", + target); + return RG_EINVAL; + } + membership = member_list(); - if (!membership) + if (!membership) { + clulog(LOG_ERR, "Unable to determine membership during " + "migrate operation\n"); return RG_EFAIL; + } pthread_rwlock_rdlock(&resource_lock); tgt_name = memb_id_to_name(membership, target); + if (!tgt_name) { + clulog(LOG_WARNING, "Node ID %d not in membership during " + "migrate operation\n", target); + ret = RG_EINVAL; + goto out; + } + res = find_root_by_ref(&_resources, groupname); - if (!res) + if (!res) { + clulog(LOG_WARNING, + "Unable to find '%s' in resource list during" + "migrate operation\n", groupname); goto out; + } - if (!tgt_name) { - ret = RG_EINVAL; + list_do(&_tree, tmp) { + if (tmp->rn_resource == res) { + rn = tmp; + break; + } + } while (!list_done(&_tree, tmp)); + + if (!rn) { + clulog(LOG_WARNING, + "Unable to find '%s' it top level of resource " + "tree during migrate operation\n", groupname); goto out; } - ret = res_exec(res, res_ops[RG_MIGRATE], tgt_name); + + clulog(LOG_NOTICE, "Migrating %s to %s\n", groupname, tgt_name); + ret = res_exec(rn, agent_op_str(RS_MIGRATE), tgt_name, 0); + if (ret == 0) { + clulog(LOG_NOTICE, + "Migration of %s to %s completed\n", + groupname, tgt_name); + } else { + clulog(LOG_ERR, + "Migration of %s to %s failed; return code %d\n", + groupname, tgt_name, ret); + } out: pthread_rwlock_unlock(&resource_lock); @@ -1004,7 +1045,8 @@ } if (svcblk.rs_owner != my_id() || - svcblk.rs_state != RG_STATE_STARTED) + (svcblk.rs_state != RG_STATE_STARTED && + svcblk.rs_state != RG_STATE_MIGRATE)) continue; rt_enqueue_request(rg, RG_STATUS, --- cluster/rgmanager/src/daemons/main.c 2006/10/23 22:47:01 1.34 +++ cluster/rgmanager/src/daemons/main.c 2007/03/20 18:35:13 1.34.4.1 @@ -48,7 +48,6 @@ int init_resource_groups(int); void kill_resource_groups(void); void set_my_id(int); -int eval_groups(int, int, int); void flag_shutdown(int sig); void hard_exit(void); int send_rg_states(msgctx_t *, int); --- cluster/rgmanager/src/daemons/nodeevent.c 2006/09/27 16:28:41 1.4 +++ cluster/rgmanager/src/daemons/nodeevent.c 2007/03/20 18:35:13 1.4.4.1 @@ -137,7 +137,7 @@ { cman_handle_t ch; int fenced = 0; - int fence_time; + uint64_t fence_time; ch = cman_init(NULL); if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0) --- cluster/rgmanager/src/daemons/reslist.c 2006/07/11 23:52:41 1.14 +++ cluster/rgmanager/src/daemons/reslist.c 2007/03/20 18:35:13 1.14.4.1 @@ -28,6 +28,9 @@ #include #include #include +#ifndef NO_CCS +#include +#endif char *attr_value(resource_node_t *node, char *attrname); @@ -360,12 +363,29 @@ /* Unique/primary is not unique */ - printf("Unique/primary not unique " - "type %s, %s=%s\n", +#ifdef NO_CCS + printf("Error: " + "%s attribute collision. " + "type=%s attr=%s value=%s\n", + (newres->r_attrs[x].ra_flags& + RA_PRIMARY)?"Primary": + "Unique", + newres->r_rule->rr_type, + newres->r_attrs[x].ra_name, + newres->r_attrs[x].ra_value + ); +#else + clulog(LOG_ERR, + "%s attribute collision. " + "type=%s attr=%s value=%s\n", + (newres->r_attrs[x].ra_flags& + RA_PRIMARY)?"Primary": + "Unique", newres->r_rule->rr_type, newres->r_attrs[x].ra_name, newres->r_attrs[x].ra_value ); +#endif return -1; } break; @@ -679,14 +699,21 @@ for (resID = 1; ; resID++) { snprintf(tok, sizeof(tok), RESOURCE_BASE "/%s[%d]", currule->rr_type, resID); - + newres = load_resource(ccsfd, currule, tok); if (!newres) break; if (store_resource(reslist, newres) != 0) { +#ifdef NO_CCS printf("Error storing %s resource\n", newres->r_rule->rr_type); +#else + clulog(LOG_ERR, + "Error storing %s resource\n", + newres->r_rule->rr_type); +#endif + destroy_resource(newres); } --- cluster/rgmanager/src/daemons/resrules.c 2006/10/20 20:59:49 1.16 +++ cluster/rgmanager/src/daemons/resrules.c 2007/03/20 18:35:13 1.16.4.1 @@ -31,7 +31,9 @@ #include #include #include +#ifndef NO_CCS #include +#endif /** @@ -49,8 +51,13 @@ list_do(rulelist, curr) { if (!strcmp(newrule->rr_type, curr->rr_type)) { +#ifdef NO_CCS fprintf(stderr, "Error storing %s: Duplicate\n", newrule->rr_type); +#else + clulog(LOG_ERR, "Error storing %s: Duplicate\n", + newrule->rr_type); +#endif return -1; } @@ -294,8 +301,8 @@ ret = xpath_get_one(doc, ctx, xpath); if (ret) { timeout = expand_time(ret); - if (interval < 0) - interval = 0; + if (timeout < 0) + timeout = 0; free(ret); } --- cluster/rgmanager/src/daemons/restree.c 2006/09/21 18:04:04 1.23 +++ cluster/rgmanager/src/daemons/restree.c 2007/03/20 18:35:13 1.23.4.1 @@ -60,23 +60,6 @@ time_t get_time(char *action, int depth, resource_node_t *node); -const char *res_ops[] = { - "start", - "stop", - "status", - "resinfo", - "restart", - "reload", - "condrestart", /* Unused */ - "recover", - "condstart", - "condstop", - "monitor", - "meta-data", /* printenv */ - "validate-all", - "migrate" -}; - const char *ocf_errors[] = { "success", // 0 @@ -97,14 +80,13 @@ const char * ocf_strerror(int ret) { - if (ret < OCF_RA_MAX) + if (ret >= 0 && ret < OCF_RA_MAX) return ocf_errors[ret]; return "unspecified"; } - /** Destroys an environment variable array. @@ -360,7 +342,7 @@ #endif #if 0 printf("Exec of script %s, action %s type %s\n", - res->r_rule->rr_agent, res_ops[op], + res->r_rule->rr_agent, agent_op_str(op), res->r_rule->rr_type); #endif @@ -701,7 +683,7 @@ #if 0 printf("%s children of %s type %s (level %d)\n", - res_ops[op], + agent_op_str(op), node->rn_resource->r_rule->rr_type, rule->rr_childtypes[x].rc_name, l); #endif @@ -744,7 +726,7 @@ /* printf("%s children of %s type %s (default level)\n", - res_ops[op], + agent_op_str(op), node->rn_resource->r_rule->rr_type, rule->rr_childtypes[x].rc_name); */ @@ -852,7 +834,7 @@ return 0; node->rn_actions[idx].ra_last = now; - if ((x = res_exec(node, res_ops[RS_STATUS], NULL, + if ((x = res_exec(node, agent_op_str(RS_STATUS), NULL, node->rn_actions[idx].ra_depth)) == 0) return 0; @@ -860,7 +842,7 @@ return x; /* Strange/failed status. Try to recover inline. */ - if ((x = res_exec(node, res_ops[RS_RECOVER], NULL, 0)) == 0) + if ((x = res_exec(node, agent_op_str(RS_RECOVER), NULL, 0)) == 0) return 0; return x; @@ -965,7 +947,7 @@ me = !first || (node->rn_resource == first); /* - printf("begin %s: %s %s [0x%x]\n", res_ops[op], + printf("begin %s: %s %s [0x%x]\n", agent_op_str(op), node->rn_resource->r_rule->rr_type, primary_attr_value(node->rn_resource), node->rn_flags); @@ -1010,7 +992,7 @@ if (me && (op == RS_START)) { node->rn_flags &= ~RF_NEEDSTART; - rv = res_exec(node, res_ops[op], NULL, 0); + rv = res_exec(node, agent_op_str(op), NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; return rv; @@ -1034,7 +1016,7 @@ /* Stop/status/etc stops after children have stopped */ if (me && (op == RS_STOP)) { node->rn_flags &= ~RF_NEEDSTOP; - rv = res_exec(node, res_ops[op], NULL, 0); + rv = res_exec(node, agent_op_str(op), NULL, 0); if (rv != 0) { node->rn_state = RES_FAILED; @@ -1054,7 +1036,7 @@ } /* - printf("end %s: %s %s\n", res_ops[op], + printf("end %s: %s %s\n", agent_op_str(op), node->rn_resource->r_rule->rr_type, primary_attr_value(node->rn_resource)); */ --- cluster/rgmanager/src/daemons/rg_state.c 2007/01/16 15:29:35 1.24.4.3 +++ cluster/rgmanager/src/daemons/rg_state.c 2007/03/20 18:35:13 1.24.4.4 @@ -47,7 +47,7 @@ int get_rg_state(char *servicename, rg_state_t *svcblk); void get_recovery_policy(char *rg_name, char *buf, size_t buflen); int check_depend_safe(char *servicename); -int group_migratory(char *servicename); +int group_migratory(char *servicename, int lock); int @@ -460,10 +460,6 @@ break; case RG_STATE_STOPPING: - printf("Service %s is stopping right now\n", svcName); - ret = 2; - break; - case RG_STATE_STARTED: case RG_STATE_CHECK: case RG_STATE_STARTING: @@ -819,7 +815,7 @@ rg_state_t svcStatus; int ret; - if (!group_migratory(svcName)) + if (!group_migratory(svcName, 1)) return RG_EINVAL; if (rg_lock(svcName, &lockp) < 0) { @@ -870,6 +866,17 @@ rg_unlock(&lockp); ret = group_migrate(svcName, target); + + if (ret == -1 || ret > 0) { + /* XXX run svc_status again here to see if it's still + healthy; if it is, don't FAIL it; it could be that + the target node simply died; in this case, set status + back to started */ + /* if ret > 0 { svc_status... */ + svc_fail(svcName); + ret = RG_EFAIL; + } + return ret; } --- cluster/rgmanager/src/daemons/rg_thread.c 2006/12/14 20:12:03 1.15.4.2 +++ cluster/rgmanager/src/daemons/rg_thread.c 2007/03/20 18:35:13 1.15.4.3 @@ -163,7 +163,7 @@ char myname[256]; resthread_t *myself; request_t *req; - uint32_t ret = RG_FAIL, error = 0; + int ret = RG_EFAIL, error = 0; rg_inc_threads(); @@ -274,7 +274,7 @@ /* * Bad news. */ - ret = RG_FAIL; + ret = RG_EFAIL; } break; @@ -289,7 +289,7 @@ if (error == 0) ret = RG_SUCCESS; else - ret = RG_FAIL; + ret = RG_EFAIL; break; case RG_CONDSTOP: @@ -321,7 +321,7 @@ /* * Bad news. */ - ret = RG_FAIL; + ret = RG_EFAIL; } break; @@ -340,7 +340,7 @@ /* * Bad news. */ - ret = RG_FAIL; + ret = RG_EFAIL; } pthread_mutex_lock(&my_queue_mutex); @@ -368,7 +368,7 @@ /* * Bad news. */ - ret = RG_FAIL; + ret = RG_EFAIL; } break; @@ -393,7 +393,7 @@ /* * Bad news. */ - ret = RG_FAIL; + ret = RG_EFAIL; } break; --- cluster/rgmanager/src/daemons/test.c 2006/07/19 18:43:32 1.6 +++ cluster/rgmanager/src/daemons/test.c 2007/03/20 18:35:13 1.6.4.1 @@ -83,7 +83,7 @@ fod_t *domains = NULL; resource_rule_t *rulelist = NULL, *currule; resource_t *reslist = NULL, *curres; - resource_node_t *tree = NULL; + resource_node_t *tree = NULL, *tmp, *rn = NULL; int ccsfd, ret = 0, rules = 0; fprintf(stderr,"Running in test mode.\n"); @@ -145,6 +145,13 @@ goto out; } + list_do(&tree, tmp) { + if (tmp->rn_resource == curres) { + rn = tmp; + break; + } + } while (!list_done(&tree, tmp)); + if (!strcmp(argv[1], "start")) { printf("Starting %s...\n", argv[3]); @@ -164,6 +171,23 @@ } printf("Stop of %s complete\n", argv[3]); goto out; + } else if (!strcmp(argv[1], "migrate")) { + printf("Migrating %s to %s...\n", argv[3], argv[4]); + + #if 0 + if (!group_migratory(curres)) { + printf("No can do\n"); + ret = -1; + goto out; + } + #endif + + if (res_exec(rn, "migrate", argv[4], 0)) { + ret = -1; + goto out; + } + printf("Migration of %s complete\n", argv[3]); + goto out; } else if (!strcmp(argv[1], "status")) { printf("Checking status of %s...\n", argv[3]); --- cluster/rgmanager/src/resources/ip.sh 2006/06/02 17:37:10 1.21 +++ cluster/rgmanager/src/resources/ip.sh 2007/03/20 18:35:13 1.21.4.1 @@ -884,6 +884,9 @@ exit 0 fi ip_op ${OCF_RESKEY_family} add ${OCF_RESKEY_address} + if [ $? -ne 0 ]; then + exit $OCF_ERR_GENERIC + fi if [ $NFS_TRICKS -eq 0 ]; then if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ --- cluster/rgmanager/src/resources/nfsclient.sh 2006/08/18 15:26:22 1.13 +++ cluster/rgmanager/src/resources/nfsclient.sh 2007/03/20 18:35:13 1.13.4.1 @@ -200,9 +200,9 @@ ;; no_all_squash) ;; - anonuid) + anonuid=*) ;; - anongid) + anongid=*) ;; *) ocf_log err "Export Option $o invalid" --- cluster/rgmanager/src/resources/ocf-shellfuncs 2006/06/02 17:37:10 1.3 +++ cluster/rgmanager/src/resources/ocf-shellfuncs 2007/03/20 18:35:13 1.3.4.1 @@ -1,10 +1,10 @@ # -# $Id: ocf-shellfuncs,v 1.3 2006/06/02 17:37:10 lhh Exp $ +# $Id: ocf-shellfuncs,v 1.3.4.1 2007/03/20 18:35:13 lhh Exp $ # # Common helper functions for the OCF Resource Agents supplied by # heartbeat. # -# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br?e +# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Br???e # All Rights Reserved. # # Modified for linux-cluster 2005 by Lon Hohberger @@ -175,7 +175,7 @@ pretty_echo $__OCF_PRIO "$__OCF_MSG" clulog -p $__LOG_PID -n $__LOG_NAME \ - -l $__OCF_PRIO_N -s $__OCF_PRIO_N "$__OCF_MSG" + -s $__OCF_PRIO_N "$__OCF_MSG" } __ocf_set_defaults "$@" --- cluster/rgmanager/src/resources/vm.sh 2006/10/23 22:47:01 1.1 +++ cluster/rgmanager/src/resources/vm.sh 2007/03/20 18:35:13 1.1.4.1 @@ -367,14 +367,15 @@ { declare target=$1 - # XXX TODO - return 1 + xm migrate $OCF_RESKEY_name $target + return $? } # # A Resource group is abstract, but the OCF RA API doesn't allow for abstract # resources, so here it is. # + case $1 in start) start --- cluster/rgmanager/src/utils/clustat.c 2007/01/17 16:19:07 1.25.4.2 +++ cluster/rgmanager/src/utils/clustat.c 2007/03/20 18:35:13 1.25.4.3 @@ -20,6 +20,7 @@ #define FLAG_LOCAL 0x2 #define FLAG_RGMGR 0x4 #define FLAG_NOCFG 0x8 /* Shouldn't happen */ +#define FLAG_QDISK 0x10 #define RG_VERBOSE 0x1 @@ -53,8 +54,10 @@ struct timeval tv; - if (msg_open(MSG_SOCKET, 0, 0, &ctx, 10) < 0) + if (msg_open(MSG_SOCKET, 0, 0, &ctx, 10) < 0) { + perror("msg_open"); return; + } msg_send_simple(&ctx, RG_STATUS_NODE, 0, 0); @@ -144,6 +147,7 @@ struct timeval tv; if (msg_open(MSG_SOCKET, 0, 0, &ctx, 10) < 0) { + perror("msg_open"); return NULL; } @@ -342,8 +346,8 @@ cluster_member_list_t * add_missing(cluster_member_list_t *all, cluster_member_list_t *these) { - int x, y; - cman_node_t *m, *new; + int x, y, addflag; + cman_node_t *m, *nn; if (!these) return all; @@ -356,12 +360,8 @@ these->cml_members[x].cn_name)) m = &all->cml_members[y]; } - + if (!m) { - printf("%s not found\n", these->cml_members[x].cn_name); - /* WTF? It's not in our config */ - printf("realloc %d\n", (int)((all->cml_count+1) * - sizeof(cman_node_t))); all->cml_members = realloc(all->cml_members, (all->cml_count+1) * sizeof(cman_node_t)); @@ -370,15 +370,21 @@ exit(1); } - new = &all->cml_members[all->cml_count]; + nn = &all->cml_members[all->cml_count]; - memcpy(new, &these->cml_members[x], + memcpy(nn, &these->cml_members[x], sizeof(cman_node_t)); + + if (nn->cn_nodeid == 0) { /* quorum disk? */ + addflag = FLAG_QDISK; + } else { + addflag = FLAG_NOCFG; + } - if (new->cn_member) { - new->cn_member = FLAG_UP | FLAG_NOCFG; + if (nn->cn_member) { + nn->cn_member = FLAG_UP | addflag; } else { - new->cn_member = FLAG_NOCFG; + nn->cn_member = addflag; } ++all->cml_count; @@ -612,9 +618,12 @@ if (node->cn_member & FLAG_NOCFG) printf(", Estranged"); - + if (node->cn_member & FLAG_RGMGR) printf(", rgmanager"); + + if (node->cn_member & FLAG_QDISK) + printf(", Quorum Disk"); printf("\n"); @@ -626,12 +635,13 @@ xml_member_state(cman_node_t *node) { printf(" \n", + "estranged=\"%d\" rgmanager=\"%d\" qdisk=\"%d\" nodeid=\"0x%08x\"/>\n", node->cn_name, !!(node->cn_member & FLAG_UP), !!(node->cn_member & FLAG_LOCAL), !!(node->cn_member & FLAG_NOCFG), !!(node->cn_member & FLAG_RGMGR), + !!(node->cn_member & FLAG_QDISK), (uint32_t)((node->cn_nodeid )&0xffffffff)); } @@ -947,7 +957,8 @@ qs = cman_is_quorate(ch); membership = build_member_list(ch, &local_node_id); - rgs = rg_state_list(local_node_id, fast); + if (!member_name) + rgs = rg_state_list(local_node_id, fast); if (rgs) { flag_rgmanager_nodes(membership); } --- cluster/rgmanager/src/utils/clusvcadm.c 2006/12/14 22:17:21 1.12.4.2 +++ cluster/rgmanager/src/utils/clusvcadm.c 2007/03/20 18:35:13 1.12.4.3 @@ -159,9 +159,9 @@ " on \n", name); printf(" %s -r -m Relocate [to ]\n", name); -printf(" %s -M -m Migrate [to ]\n", +printf(" %s -M -m Migrate to \n", name); -printf(" (e.g. for live migration of Xen VMs)\n"); +printf(" (e.g. for live migration of VMs)\n"); printf(" %s -q Quiet operation\n", name); printf(" %s -R Restart a group in place.\n", name); @@ -240,7 +240,7 @@ return 1; } - while ((opt = getopt(argc, argv, "lSue:d:r:n:m:vR:s:qh?")) != EOF) { + while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:qh?")) != EOF) { switch (opt) { case 'l': return do_lock(); @@ -265,13 +265,13 @@ break; case 'r': /* RELOCATE */ - actionstr = "trying to relocate"; + actionstr = "relocate"; action = RG_RELOCATE; svcname = optarg; break; case 'M': /* MIGRATE */ - actionstr = "trying to migrate"; + actionstr = "migrate"; action = RG_MIGRATE; svcname = optarg; break; @@ -310,6 +310,11 @@ return 1; } + if (action == RG_MIGRATE && !node_specified) { + printf("Migration requires a target cluster member\n"); + return 1; + } + if (!strchr(svcname,':')) { snprintf(realsvcname, sizeof(realsvcname), "service:%s", svcname); @@ -359,9 +364,9 @@ msg_open(MSG_SOCKET, 0, RG_PORT, &ctx, 5); } else { if (!svctarget) - printf("Trying to relocate %s", svcname); + printf("Trying to %s %s", actionstr, svcname); else - printf("Trying to relocate %s to %s", svcname, + printf("Trying to %s %s to %s", actionstr, svcname, nodename); printf("..."); fflush(stdout); @@ -392,12 +397,13 @@ swab_SmMessageSt(&msg); printf("%s\n", rg_strerror(msg.sm_data.d_ret)); - if (msg.sm_data.d_ret == RG_ERUN) return 0; + if (msg.sm_data.d_ret) + return msg.sm_data.d_ret; switch (action) { - case RG_MIGRATE: + /*case RG_MIGRATE:*/ case RG_RELOCATE: case RG_START: case RG_ENABLE: