From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 31 Jul 2007 18:00:28 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog include/res-ocf.h ... Message-ID: <20070731180028.15054.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-07-31 18:00:26 Modified files: rgmanager : ChangeLog rgmanager/include: res-ocf.h reslist.h rgmanager/src/clulib: rg_strings.c rgmanager/src/daemons: groups.c reslist.c resrules.c restree.c test.c rgmanager/src/resources: ra-api-1-modified.dtd service.sh vm.sh Log message: Fix bug #248727 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.53&r2=1.54 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/res-ocf.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.21&r2=1.22 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&r1=1.9&r2=1.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.37&r2=1.38 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/reslist.c.diff?cvsroot=cluster&r1=1.18&r2=1.19 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/resrules.c.diff?cvsroot=cluster&r1=1.22&r2=1.23 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.34&r2=1.35 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&r1=1.10&r2=1.11 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ra-api-1-modified.dtd.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&r1=1.10&r2=1.11 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/vm.sh.diff?cvsroot=cluster&r1=1.5&r2=1.6 --- cluster/rgmanager/ChangeLog 2007/07/23 20:49:13 1.53 +++ cluster/rgmanager/ChangeLog 2007/07/31 18:00:25 1.54 @@ -1,3 +1,23 @@ +2007-07-31 Lon Hohberger + * general: Make VMs not change state when added/removed from the + cluster config or bounce services/VMs when minor config changes + are made. (#248727) + * include/res-ocf.h: Add reconfigure operation + * include/reslist.h: Add flags allowing reconfigurable options in + resource agent metadata, and per-agent flags which allow resources + to be added w/o initialization / removed w/o kill. + * src/clulib/rg_strings.c: Add string definition for reconfigure op + * src/daemons/groups.c: Allow no-init-on-add and no-kill-on-delete + flags to work + * src/daemons/reslist.c: Allow resource comparison to take into + account reconfig flags and return a new value if the resource is + reconfigurable + * src/daemons/resrules.c: Code to support new flags + * src/daemons/restree.c: Code to support reconfiguration operation + * src/daemons/test.c: Code to support testing new reconfig ops and + behaviors + * src/resources/*: Add new flags to service and VM resource agents. + 2007-07-23 Lon Hohberger * general: make threads exit with pthread_exit() so we can wrap/track them. Add internal statedump (SIGUSR1) support. --- cluster/rgmanager/include/res-ocf.h 2007/03/10 00:20:54 1.2 +++ cluster/rgmanager/include/res-ocf.h 2007/07/31 18:00:25 1.3 @@ -62,5 +62,6 @@ #define RS_META_DATA (11) #define RS_VALIDATE (12) #define RS_MIGRATE (13) +#define RS_RECONFIG (14) #endif --- cluster/rgmanager/include/reslist.h 2007/06/27 14:03:51 1.21 +++ cluster/rgmanager/include/reslist.h 2007/07/31 18:00:25 1.22 @@ -25,10 +25,11 @@ #include -#define RA_PRIMARY (1<<0) -#define RA_UNIQUE (1<<1) -#define RA_REQUIRED (1<<2) -#define RA_INHERIT (1<<3) +#define RA_PRIMARY (1<<0) /** Primary key */ +#define RA_UNIQUE (1<<1) /** Unique for given type */ +#define RA_REQUIRED (1<<2) /** Required (or an error if not present */ +#define RA_INHERIT (1<<3) /** Inherit a parent resource's attr */ +#define RA_RECONFIG (1<<4) /** Allow inline reconfiguration */ #define RF_INLINE (1<<0) #define RF_DEFINED (1<<1) @@ -37,6 +38,15 @@ #define RF_COMMON (1<<4) /** " */ #define RF_INDEPENDENT (1<<5) /** Define this for a resource if it is otherwise an independent subtree */ +#define RF_RECONFIG (1<<6) + +#define RF_INIT (1<<7) /** Resource rule: Initialize this resource + class on startup */ +#define RF_DESTROY (1<<8) /** Resource rule flag: Destroy this + resource class if you delete it from + the configuration */ + + #define RES_STOPPED (0) #define RES_STARTED (1) --- cluster/rgmanager/src/clulib/rg_strings.c 2007/06/27 14:03:51 1.9 +++ cluster/rgmanager/src/clulib/rg_strings.c 2007/07/31 18:00:25 1.10 @@ -113,6 +113,7 @@ {RS_META_DATA, "meta-data"}, /* printenv */ {RS_VALIDATE, "validate-all"}, {RS_MIGRATE, "migrate"}, + {RS_RECONFIG, "reconfig"}, {0 , NULL} }; --- cluster/rgmanager/src/daemons/groups.c 2007/07/23 20:49:13 1.37 +++ cluster/rgmanager/src/daemons/groups.c 2007/07/31 18:00:25 1.38 @@ -1225,6 +1225,7 @@ do_condstops(void) { resource_node_t *curr; + struct dlm_lksb lockp; rg_state_t svcblk; int need_kill; char rg[64]; @@ -1252,6 +1253,24 @@ clulog(LOG_DEBUG, "Removing %s\n", rg); } + if (!curr->rn_child && ((curr->rn_resource->r_rule->rr_flags & + RF_DESTROY) == 0) && group_migratory(rg, 0) && + need_kill == 1) { + /* Do something smart here: flip state? */ + clulog(LOG_NOTICE, + "%s removed from the config, but I am not stopping it.\n", + rg); + if (rg_lock(rg, &lockp) != 0) + continue; + if (get_rg_state(rg, &svcblk) < 0) + goto cont; + svcblk.rs_state = RG_STATE_DISABLED; + set_rg_state(rg, &svcblk); +cont: + rg_unlock(&lockp); + continue; + } + rt_enqueue_request(rg, need_kill ? RG_DISABLE : RG_CONDSTOP, NULL, 0, 0, 0, 0); @@ -1285,7 +1304,8 @@ /* New RG. We'll need to initialize it. */ need_init = 0; - if (curr->rn_resource->r_flags & RF_NEEDSTART) + if (!(curr->rn_resource->r_flags & RF_RECONFIG) && + (curr->rn_resource->r_flags & RF_NEEDSTART)) need_init = 1; if (!need_init) { @@ -1308,7 +1328,17 @@ if (need_init) { ++new_groups; - clulog(LOG_DEBUG, "Initializing %s\n", rg); + clulog(LOG_NOTICE, "Initializing %s\n", rg); + } + + if (!curr->rn_child && ((curr->rn_resource->r_rule->rr_flags & + RF_INIT) == 0) && group_migratory(rg, 0) && + need_init == 1) { + /* Do something smart here? */ + clulog(LOG_NOTICE, + "%s was added to the config, but I am not initializing it.\n", + rg); + continue; } rt_enqueue_request(rg, need_init ? RG_INIT : RG_CONDSTART, --- cluster/rgmanager/src/daemons/reslist.c 2007/04/26 20:42:02 1.18 +++ cluster/rgmanager/src/daemons/reslist.c 2007/07/31 18:00:25 1.19 @@ -176,18 +176,29 @@ /** Compare two resources. + @param left Left resource + @param right Right resource + @return -1 on different resource, 0 if the same, 1 if different, + 2 if different, but only safe resources are different + */ int rescmp(resource_t *left, resource_t *right) { - int x, y = 0, found; + int x, y = 0, found = 0, ret = 0; + /* Completely different resource class... */ if (strcmp(left->r_rule->rr_type, right->r_rule->rr_type)) { - //printf("Er, wildly different resource type! "); return -1; } + /* + printf("Comparing %s:%s to %s:%s\n", + left->r_rule->rr_type, left->r_attrs[0].ra_value, + right->r_rule->rr_type, right->r_attrs[0].ra_value) + */ + for (x = 0; left->r_attrs && left->r_attrs[x].ra_name; x++) { found = 0; @@ -203,35 +214,52 @@ left->r_attrs[x].ra_flags) { /* Flags are different. Change in resource agents? */ - //printf("flags differ "); + /* + printf("* flags differ %08x vs %08x\n", + left->r_attrs[x].ra_flags, + right->r_attrs[y].ra_flags); + */ return 1; } if (strcmp(right->r_attrs[y].ra_value, left->r_attrs[x].ra_value)) { /* Different attribute value. */ - //printf("different value for attr '%s' ", - //right->r_attrs[y].ra_name); - return 1; + /* + printf("* different value for attr '%s':" + " '%s' vs '%s'", + right->r_attrs[y].ra_name, + left->r_attrs[x].ra_value, + right->r_attrs[y].ra_value); + */ + if (left->r_attrs[x].ra_flags & RA_RECONFIG) { + /* printf(" [SAFE]\n"); */ + ret = 2; + } else { + /* printf("\n"); */ + return 1; + } } } /* Attribute missing -> different attribute value. */ if (!found) { - //printf("Attribute %s deleted ", - //left->r_attrs[x].ra_name); + /* + printf("* Attribute '%s' deleted\n", + left->r_attrs[x].ra_name); + */ return 1; } } /* Different attribute count */ if (x != y) { - //printf("Attribute count differ (attributes added!) "); + /* printf("* Attribute count differ (attributes added!) "); */ return 1; } /* All the same */ - return 0; + return ret; } @@ -288,7 +316,7 @@ resource_t *curr; char ref_buf[128]; char *type; - char *name; + char *name = ref; int x; snprintf(ref_buf, sizeof(ref_buf), "%s", ref); @@ -544,6 +572,8 @@ printf(" [NEEDSTOP]"); if (res->r_flags & RF_COMMON) printf(" [COMMON]"); + if (res->r_flags & RF_RECONFIG) + printf(" [RECONFIG]"); printf("\n"); if (res->r_rule->rr_maxrefs) @@ -579,6 +609,8 @@ printf(" unique"); if (res->r_attrs[x].ra_flags & RA_REQUIRED) printf(" required"); + if (res->r_attrs[x].ra_flags & RA_RECONFIG) + printf(" reconfig"); if (res->r_attrs[x].ra_flags & RA_INHERIT) printf(" inherit(\"%s\")", res->r_attrs[x].ra_value); printf(" ]\n"); --- cluster/rgmanager/src/daemons/resrules.c 2007/05/31 19:08:14 1.22 +++ cluster/rgmanager/src/daemons/resrules.c 2007/07/31 18:00:25 1.23 @@ -175,6 +175,36 @@ /** + Get and store a bit field. + + @param doc Pre-parsed XML document pointer. + @param ctx Pre-allocated XML XPath context pointer. + @param base XPath prefix to search + @param rr Resource rule to store new information in. + */ +void +_get_rule_flag(xmlDocPtr doc, xmlXPathContextPtr ctx, char *base, + resource_rule_t *rr, char *flag, int bit) +{ + char xpath[256]; + char *ret = NULL; + + snprintf(xpath, sizeof(xpath), + "%s/attributes/@%s", + base, flag); + ret = xpath_get_one(doc, ctx, xpath); + if (ret) { + if (atoi(ret)) { + rr->rr_flags |= bit; + } else { + rr->rr_flags &= ~bit; + } + free(ret); + } +} + + +/** Get and store the version @param doc Pre-parsed XML document pointer. @@ -367,8 +397,8 @@ ret = xpath_get_one(doc, ctx, xpath); if (ret) { interval = expand_time(ret); - if (interval < 0) - interval = 0; + if (interval < 1) + interval = 1; free(ret); } @@ -529,6 +559,17 @@ printf("Max instances: %d\n", rr->rr_maxrefs); if (rr->rr_agent) printf("Agent: %s\n", basename(rr->rr_agent)); + + printf("Flags: "); + if (rr->rr_flags) { + if (rr->rr_flags & RF_INIT) + printf("init_on_add "); + if (rr->rr_flags & RF_DESTROY) + printf("destroy_on_delete "); + } else { + printf("(none)"); + } + printf("\n"); printf("Attributes:\n"); if (!rr->rr_attrs) { @@ -544,18 +585,25 @@ continue; } - printf(" ["); - if (rr->rr_attrs[x].ra_flags & RA_PRIMARY) - printf(" primary"); - if (rr->rr_attrs[x].ra_flags & RA_UNIQUE) - printf(" unique"); - if (rr->rr_attrs[x].ra_flags & RA_REQUIRED) - printf(" required"); - if (rr->rr_attrs[x].ra_flags & RA_INHERIT) - printf(" inherit"); - else if (rr->rr_attrs[x].ra_value) - printf(" default=\"%s\"", rr->rr_attrs[x].ra_value); - printf(" ]\n"); + if (rr->rr_attrs[x].ra_flags) { + printf(" ["); + if (rr->rr_attrs[x].ra_flags & RA_PRIMARY) + printf(" primary"); + if (rr->rr_attrs[x].ra_flags & RA_UNIQUE) + printf(" unique"); + if (rr->rr_attrs[x].ra_flags & RA_REQUIRED) + printf(" required"); + if (rr->rr_attrs[x].ra_flags & RA_INHERIT) + printf(" inherit"); + if (rr->rr_attrs[x].ra_flags & RA_RECONFIG) + printf(" reconfig"); + printf(" ]"); + } + + if (rr->rr_attrs[x].ra_value) + printf(" default=\"%s\"\n", rr->rr_attrs[x].ra_value); + else + printf("\n"); } actions: @@ -691,6 +739,18 @@ } /* + See if this can be reconfigured on the fly without a + stop/start + */ + snprintf(xpath, sizeof(xpath), "%s/parameter[%d]/@reconfig", + base, x); + if ((ret = xpath_get_one(doc,ctx,xpath))) { + if ((atoi(ret) != 0) || (ret[0] == 'y')) + flags |= RA_RECONFIG; + free(ret); + } + + /* See if this is supposed to be inherited */ snprintf(xpath, sizeof(xpath), "%s/parameter[%d]/@inherit", @@ -955,6 +1015,7 @@ break; memset(rr,0,sizeof(*rr)); + rr->rr_flags = RF_INIT | RF_DESTROY; rr->rr_type = type; snprintf(base, sizeof(base), "/resource-agent[%d]", ruleid); @@ -967,6 +1028,8 @@ "/resource-agent[%d]/special[@tag=\"rgmanager\"]", ruleid); _get_maxparents(doc, ctx, base, rr); + _get_rule_flag(doc, ctx, base, rr, "init_on_add", RF_INIT); + _get_rule_flag(doc, ctx, base, rr, "destroy_on_delete", RF_DESTROY); rr->rr_agent = strdup(filename); /* --- cluster/rgmanager/src/daemons/restree.c 2007/06/27 14:03:51 1.34 +++ cluster/rgmanager/src/daemons/restree.c 2007/07/31 18:00:25 1.35 @@ -1213,9 +1213,15 @@ /* Start starts before children */ if (me && (op == RS_START)) { - node->rn_flags &= ~RF_NEEDSTART; - rv = res_exec(node, op, NULL, 0); + if (node->rn_flags & RF_RECONFIG && + realop == RS_CONDSTART) { + rv = res_exec(node, RS_RECONFIG, NULL, 0); + op = realop; /* reset to CONDSTART */ + } else { + rv = res_exec(node, op, NULL, 0); + } + node->rn_flags &= ~(RF_NEEDSTART | RF_RECONFIG); if (rv != 0) { node->rn_state = RES_FAILED; return SFL_FAILURE; @@ -1435,6 +1441,7 @@ resource_delta(resource_t **leftres, resource_t **rightres) { resource_t *lc, *rc; + int ret; list_do(leftres, lc) { rc = find_resource_by_ref(rightres, lc->r_rule->rr_type, @@ -1447,10 +1454,25 @@ } /* Ok, see if the resource is the same */ - if (rescmp(lc, rc) == 0) { + ret = rescmp(lc, rc); + if (ret == 0) { rc->r_flags |= RF_COMMON; continue; } + + if (ret == 2) { + /* return of 2 from rescmp means + the two resources differ only + by reconfigurable bits */ + /* Do nothing on condstop phase; + do a "reconfig" instead of + "start" on conststart phase */ + rc->r_flags |= RF_COMMON; + rc->r_flags |= RF_NEEDSTART; + rc->r_flags |= RF_RECONFIG; + continue; + } + rc->r_flags |= RF_COMMON; /* Resource has changed. Flag it. */ @@ -1512,12 +1534,17 @@ or is new), then we don't really care about its children. */ + if (rn->rn_resource->r_flags & RF_NEEDSTART) { rn->rn_flags |= RF_NEEDSTART; - continue; + if ((rn->rn_resource->r_flags & RF_RECONFIG) == 0) + continue; } - if (rc == 0) { + if (rc == 0 || rc == 2) { + if (rc == 2) + rn->rn_flags |= RF_NEEDSTART | RF_RECONFIG; + /* Ok, same resource. Recurse. */ ln->rn_flags |= RF_COMMON; rn->rn_flags |= RF_COMMON; --- cluster/rgmanager/src/daemons/test.c 2007/06/27 14:03:51 1.10 +++ cluster/rgmanager/src/daemons/test.c 2007/07/31 18:00:25 1.11 @@ -33,6 +33,50 @@ #error "Can not be built with CCS support." #endif +/** + Tells us if a resource group can be migrated. + */ +int +group_migratory(resource_t **resources, resource_node_t **tree, char *groupname) +{ + resource_node_t *rn; + resource_t *res; + int migrate = 0, x, ret = 0; + + res = find_root_by_ref(resources, groupname); + if (!res) { + /* Nonexistent or non-TL RG cannot be migrated */ + return 0; + } + + for (x = 0; res->r_rule->rr_actions[x].ra_name; x++) { + if (!strcmp(res->r_rule->rr_actions[x].ra_name, + "migrate")) { + migrate = 1; + break; + } + } + + if (!migrate) + goto out_unlock; + + list_do(tree, rn) { + if (rn->rn_resource == res && rn->rn_child) { + /* TL service w/ children cannot be migrated */ + goto out_unlock; + } + } while (!list_done(tree, rn)); + + + /* Ok, we have a migrate option to the resource group, + the resource group has no children, and the resource + group exists. We're all good */ + ret = 1; + +out_unlock: + return ret; +} + #define shift() {++argv; --argc;} #define USAGE_TEST \ @@ -252,7 +296,9 @@ resource_rule_t *rulelist = NULL, *currule, *rulelist2 = NULL; resource_t *reslist = NULL, *curres, *reslist2 = NULL; resource_node_t *tree = NULL, *tree2 = NULL; - int ccsfd, ret = 0; + resource_node_t *tn; + int ccsfd, ret = 0, need_init, need_kill; + char rg[64]; if (argc < 2) { printf("Operation requires two arguments\n"); @@ -309,6 +355,57 @@ print_resource_tree(&tree); printf("=== New Resource Tree ===\n"); print_resource_tree(&tree2); + printf("=== Operations (down-phase) ===\n"); + list_do(&tree, tn) { + res_build_name(rg, sizeof(rg), tn->rn_resource); + /* Set state to uninitialized if we're killing a RG */ + need_init = 0; + + /* Set state to uninitialized if we're killing a RG */ + need_kill = 0; + if (tn->rn_resource->r_flags & RF_NEEDSTOP) { + need_kill = 1; + printf("[kill] "); + } + + if (!tn->rn_child && ((tn->rn_resource->r_rule->rr_flags & + RF_DESTROY) == 0) && group_migratory(&reslist, &tree, rg) && + need_kill == 1) { + /* Do something smart here: flip state? */ + printf("[no-op] %s was removed from the config, but I am not stopping it.\n", + rg); + continue; + } + + res_condstop(&tn, tn->rn_resource, NULL); + } while (!list_done(&tree, tn)); + printf("=== Operations (up-phase) ===\n"); + list_do(&tree2, tn) { + res_build_name(rg, sizeof(rg), tn->rn_resource); + /* New RG. We'll need to initialize it. */ + need_init = 0; + if (!(tn->rn_resource->r_flags & RF_RECONFIG) && + (tn->rn_resource->r_flags & RF_NEEDSTART)) + need_init = 1; + + if (need_init) { + printf("[init] "); + } + + if (!tn->rn_child && ((tn->rn_resource->r_rule->rr_flags & + RF_INIT) == 0) && group_migratory(&reslist2, &tree2, rg) && + need_init == 1) { + /* Do something smart here? */ + printf("[noop] %s was added, but I am not initializing it\n", rg); + continue; + } + + if (need_init) { + res_stop(&tn, tn->rn_resource, NULL); + } else { + res_condstart(&tn, tn->rn_resource, NULL); + } + } while (!list_done(&tree2, tn)); out: destroy_resource_tree(&tree2); @@ -368,6 +465,7 @@ goto out; } else if (!strcmp(argv[1], "delta")) { shift(); + _no_op_mode(1); ret = tree_delta_test(argc, argv); goto out; } else { --- cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2006/08/18 15:26:22 1.4 +++ cluster/rgmanager/src/resources/ra-api-1-modified.dtd 2007/07/31 18:00:25 1.5 @@ -24,6 +24,7 @@ name CDATA #REQUIRED primary (1|0) "0" required (1|0) "0" + reconfig (1|0) "0" inherit CDATA "" unique (1|0) "0" reconfig (1|0) "0"> @@ -43,7 +44,7 @@ --- cluster/rgmanager/src/resources/service.sh 2007/07/23 20:49:13 1.10 +++ cluster/rgmanager/src/resources/service.sh 2007/07/31 18:00:25 1.11 @@ -56,7 +56,7 @@ - + Fail over domains define lists of cluster members to try in the event that a resource group fails. @@ -164,6 +164,7 @@ + @@ -227,6 +228,9 @@ validate-all) exit 0 ;; + reconfig) + exit 0 + ;; *) exit 0 ;; --- cluster/rgmanager/src/resources/vm.sh 2007/06/27 14:03:51 1.5 +++ cluster/rgmanager/src/resources/vm.sh 2007/07/31 18:00:25 1.6 @@ -53,7 +53,7 @@ - + Fail over domains define lists of cluster members to try in the event that the host of the virtual machine @@ -65,7 +65,7 @@ - + If set to yes, this resource group will automatically be started after the cluster forms a quorum. If set to no, this virtual @@ -200,7 +200,11 @@ - + + EOT