From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yang Hongyang Subject: Re: [PATCH v6 COLO 07/15] implement the cmdline for COLO Date: Thu, 25 Jun 2015 12:06:52 +0800 Message-ID: <558B7E5C.6050709@cn.fujitsu.com> References: <1433735159-26739-1-git-send-email-yanghy@cn.fujitsu.com> <1433735159-26739-8-git-send-email-yanghy@cn.fujitsu.com> <1434453583.13744.117.camel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1434453583.13744.117.camel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Ian Campbell Cc: wei.liu2@citrix.com, wency@cn.fujitsu.com, andrew.cooper3@citrix.com, yunhong.jiang@intel.com, eddie.dong@intel.com, xen-devel@lists.xen.org, guijianfeng@cn.fujitsu.com, rshriram@cs.ubc.ca, ian.jackson@eu.citrix.com List-Id: xen-devel@lists.xenproject.org On 06/16/2015 07:19 PM, Ian Campbell wrote: > On Mon, 2015-06-08 at 11:45 +0800, Yang Hongyang wrote: >> From: Wen Congyang >> >> Add a new option -c to the command 'xl remus'. If you want >> to use COLO HA instead of Remus HA, please use -c option. >> >> Update man pages to reflect the addition of a new option to >> 'xl remus' command. >> >> Also add a new option -c to the internal command 'xl migrate-receive'. > > I asked about whether COLO was an extension or a peer to Remus in an > earlier patch. the answer may have an impact here too. We implemented COLO based on Remus, so we assume it is an extension to Remus. > >> @@ -498,6 +501,11 @@ Disable network output buffering. Requires enabling unsafe mode. >> >> Disable disk replication. Requires enabling unsafe mode. >> >> +=item B<-c> >> + >> +Enable COLO HA. It is conflict with B<-i> and B<-b>, and memory > > "It conflicts with" or "This conflicts with". > >> +checkpoint compression must be disabled. >> + >> =back >> >> =item B I >> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c >> index 1145ae4..7df2466 100644 >> --- a/tools/libxl/libxl.c >> +++ b/tools/libxl/libxl.c >> @@ -811,6 +811,22 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, >> goto out; >> } >> >> + /* The caller must set this defbool */ >> + if (libxl_defbool_is_default(info->colo)) { >> + LOG(ERROR, "colo mode must be enabled/disabled"); > > As I wondered earlier -- this suggests it should not be a defbool, or > that the interfaces should split. > >> + rc = ERROR_FAIL; >> + goto out; >> + } >> + >> + if (libxl_defbool_val(info->colo)) { >> + libxl_defbool_setdefault(&info->compression, false); > > Assuming this isn't invalidated by the above comments, you should make > the existing: > libxl_defbool_setdefault(&info->compression, true); > into > libxl_defbool_setdefault(&info->compression, libxl_defbool_val(colo)); > > and then do an error check later. > >> + if (libxl_defbool_val(info->compression)) { >> + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); >> + rc = ERROR_FAIL; >> + goto out; >> + } >> + } >> + >> libxl_defbool_setdefault(&info->allow_unsafe, false); >> libxl_defbool_setdefault(&info->blackhole, false); >> libxl_defbool_setdefault(&info->compression, true); >> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c >> index adfadd1..4bbadd3 100644 >> --- a/tools/libxl/xl_cmdimpl.c >> +++ b/tools/libxl/xl_cmdimpl.c >> @@ -4273,6 +4273,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> dom_info.send_fd = send_fd; >> dom_info.migration_domname_r = &migration_domname; >> dom_info.checkpointed_stream = remus; >> + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) >> + /* COLO uses stdout to send control message to master */ >> + dom_info.quiet = 1; > > Please set a const char * to either "COLO" or "Remus" here and use it > everywhere you've currently got an open coded decision on that. > >> >> rc = create_domain(&dom_info); >> if (rc < 0) { >> @@ -4287,7 +4290,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> /* If we are here, it means that the sender (primary) has crashed. >> * TODO: Split-Brain Check. >> */ >> - fprintf(stderr, "migration target: Remus Failover for domain %u\n", >> + fprintf(stderr, "migration target: %s Failover for domain %u\n", >> + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", >> domid); >> >> /* >> @@ -4304,15 +4308,21 @@ static void migrate_receive(int debug, int daemonize, int monitor, >> rc = libxl_domain_rename(ctx, domid, migration_domname, >> common_domname); >> if (rc) >> - fprintf(stderr, "migration target (Remus): " >> + fprintf(stderr, "migration target (%s): " >> "Failed to rename domain from %s to %s:%d\n", >> + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", >> migration_domname, common_domname, rc); >> } >> >> + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) >> + /* The guest is running after failover in COLO mode */ >> + exit(rc ? -ERROR_FAIL: 0); >> + >> rc = libxl_domain_unpause(ctx, domid); >> if (rc) >> - fprintf(stderr, "migration target (Remus): " >> + fprintf(stderr, "migration target (%s): " >> "Failed to unpause domain %s (id: %u):%d\n", >> + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", >> common_domname, domid, rc); >> >> exit(rc ? -ERROR_FAIL: 0); >> @@ -4458,7 +4468,7 @@ int main_migrate_receive(int argc, char **argv) >> int debug = 0, daemonize = 1, monitor = 1, remus = 0; >> int opt; >> >> - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { >> + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { >> case 'F': >> daemonize = 0; >> break; >> @@ -4470,8 +4480,10 @@ int main_migrate_receive(int argc, char **argv) >> debug = 1; >> break; >> case 'r': >> - remus = 1; >> + remus = LIBXL_CHECKPOINTED_STREAM_REMUS; >> break; >> + case 'c': >> + remus = LIBXL_CHECKPOINTED_STREAM_COLO; >> } >> >> if (argc-optind != 0) { >> @@ -7958,15 +7970,18 @@ int main_remus(int argc, char **argv) >> pid_t child = -1; >> uint8_t *config_data; >> int config_len; >> + int interval = 0; >> >> memset(&r_info, 0, sizeof(libxl_domain_remus_info)); >> /* Defaults */ >> r_info.interval = 200; >> libxl_defbool_setdefault(&r_info.blackhole, false); >> + libxl_defbool_setdefault(&r_info.colo, false); >> >> - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { >> + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { >> case 'i': >> r_info.interval = atoi(optarg); >> + interval = 1; > > This duplication of r_info.interval and interval seems odd. Perhaps > interval is really "interval_was_set" but even so I'm not sure this > would be better achieved with some more refactoring. > >> break; >> case 'F': >> libxl_defbool_set(&r_info.allow_unsafe, true); >> @@ -7992,11 +8007,28 @@ int main_remus(int argc, char **argv) >> case 'e': >> daemonize = 0; >> break; >> + case 'c': >> + libxl_defbool_set(&r_info.colo, true); >> } >> >> domid = find_domain(argv[optind]); >> host = argv[optind + 1]; >> >> + if (libxl_defbool_val(r_info.colo)) { >> + if (!interval) >> + r_info.interval = 0; >> + >> + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { >> + perror("option -c is conflict with -i or -b"); > > "...-c conflicts with..." > >> + exit(-1); >> + } >> + >> + if (libxl_defbool_is_default(r_info.compression)) { >> + perror("option -u must be specified when using COLO"); > > Then enforce it using setdefault I think instead of making the user jump > through a pointless hoop. > > Ian. > > . > -- Thanks, Yang.