From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yang Hongyang Subject: Re: [PATCH v6 COLO 04/15] libxc/restore: support COLO restore Date: Mon, 8 Jun 2015 22:06:27 +0800 Message-ID: <5575A163.1050407@cn.fujitsu.com> References: <1433735159-26739-1-git-send-email-yanghy@cn.fujitsu.com> <1433735159-26739-5-git-send-email-yanghy@cn.fujitsu.com> <557570FF.10901@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <557570FF.10901@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Andrew Cooper , xen-devel@lists.xen.org Cc: wei.liu2@citrix.com, ian.campbell@citrix.com, wency@cn.fujitsu.com, guijianfeng@cn.fujitsu.com, yunhong.jiang@intel.com, eddie.dong@intel.com, rshriram@cs.ubc.ca, ian.jackson@eu.citrix.com List-Id: xen-devel@lists.xenproject.org On 06/08/2015 06:39 PM, Andrew Cooper wrote: > On 08/06/15 04:45, Yang Hongyang wrote: >> call the callbacks resume/checkpoint/suspend while secondary vm >> status is consistent with primary. >> >> Signed-off-by: Yang Hongyang >> Signed-off-by: Wen Congyang >> CC: Andrew Cooper >> --- >> tools/libxc/xc_sr_common.h | 11 +++++-- >> tools/libxc/xc_sr_restore.c | 63 ++++++++++++++++++++++++++++++++++++- >> tools/libxc/xc_sr_restore_x86_hvm.c | 1 + >> 3 files changed, 72 insertions(+), 3 deletions(-) >> >> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h >> index 565c5da..382bf76 100644 >> --- a/tools/libxc/xc_sr_common.h >> +++ b/tools/libxc/xc_sr_common.h >> @@ -132,8 +132,11 @@ struct xc_sr_restore_ops >> * >> * @return 0 for success, -1 for failure, or the sentinel value >> * RECORD_NOT_PROCESSED. >> + * BROKEN_CHANNEL: if we are under Remus/COLO, this means master may dead, >> + * we will failover. > > "this means that the master" Thanks. > >> */ >> #define RECORD_NOT_PROCESSED 1 >> +#define BROKEN_CHANNEL 2 >> int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); >> >> /** >> @@ -205,8 +208,12 @@ struct xc_sr_context >> uint32_t guest_type; >> uint32_t guest_page_size; >> >> - /* Plain VM, or checkpoints over time. */ >> - bool checkpointed; >> + /* >> + * 0: Plain VM >> + * 1: Remus >> + * 2: COLO >> + */ >> + int checkpointed; > > I think this would be nicer as > > enum { > STREAM_PLAIN, > STREAM_REMUS, > STREAM_COLO, > } stream; > > perhaps? It would reduce the use of a magic 2 in the code. This is another place that I missed, good catch, and it's better, thank you. > >> >> /* Currently buffering records between a checkpoint */ >> bool buffer_all_records; >> diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c >> index 2d2edd3..982a70e 100644 >> --- a/tools/libxc/xc_sr_restore.c >> +++ b/tools/libxc/xc_sr_restore.c >> @@ -1,4 +1,5 @@ >> #include >> +#include >> >> #include "xc_sr_common.h" >> >> @@ -472,7 +473,7 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec); >> static int handle_checkpoint(struct xc_sr_context *ctx) >> { >> xc_interface *xch = ctx->xch; >> - int rc = 0; >> + int rc = 0, ret; >> unsigned i; >> >> if ( !ctx->restore.checkpointed ) >> @@ -498,6 +499,46 @@ static int handle_checkpoint(struct xc_sr_context *ctx) >> else >> ctx->restore.buffer_all_records = true; >> >> + if ( ctx->restore.checkpointed == 2 ) >> + { >> +#define HANDLE_CALLBACK_RETURN_VALUE(ret) \ > > I would ideally like to avoid macros like this in an effort to avoid the > code slipping back into the state that the legacy code was in, but at > least it is local to the area used. > >> + do { \ >> + if ( ret == 0 ) \ >> + { \ >> + /* Some internal error happens */ \ >> + rc = -1; \ >> + goto err; \ >> + } \ >> + else if ( ret == 2 ) \ >> + { \ >> + /* Reading/writing error, do failover */ \ >> + rc = BROKEN_CHANNEL; \ >> + goto err; \ >> + } \ >> + } while (0) > > This should have the logic inverted somewhat, to cover all possible > values of ret, including the negative half. yes, will fix in the next version. > > e.g. > > if ( ret == 1 ) > rc = 0; /* Success */ > else > { > if ( ret == 2 ) > rc = BROKEN_CHANNEL; > else > rc = -1; /* Some unspecified error */ > goto err; > } > >> + >> + /* COLO */ >> + >> + /* We need to resume guest */ >> + rc = ctx->restore.ops.stream_complete(ctx); >> + if ( rc ) >> + goto err; >> + >> + /* TODO: call restore_results */ >> + >> + /* Resume secondary vm */ >> + ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data); >> + HANDLE_CALLBACK_RETURN_VALUE(ret); >> + >> + /* wait for new checkpoint */ >> + ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data); >> + HANDLE_CALLBACK_RETURN_VALUE(ret); >> + >> + /* suspend secondary vm */ >> + ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data); >> + HANDLE_CALLBACK_RETURN_VALUE(ret); > > Please #undef HANDLE_CALLBACK_RETURN_VALUE here. OK. > >> + } >> + >> err: >> return rc; >> } >> @@ -678,6 +719,8 @@ static int restore(struct xc_sr_context *ctx) >> goto err; >> } >> } >> + else if ( rc == BROKEN_CHANNEL ) >> + goto remus_failover; >> else if ( rc ) >> goto err; >> } >> @@ -685,6 +728,15 @@ static int restore(struct xc_sr_context *ctx) >> } while ( rec.type != REC_TYPE_END ); >> >> remus_failover: >> + >> + if ( ctx->restore.checkpointed == 2 ) >> + { >> + /* With COLO, we have already called stream_complete */ >> + rc = 0; >> + IPRINTF("COLO Failover"); >> + goto done; >> + } >> + >> /* >> * With Remus, if we reach here, there must be some error on primary, >> * failover from the last checkpoint state. >> @@ -735,6 +787,15 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, >> ctx.restore.checkpointed = checkpointed_stream; >> ctx.restore.callbacks = callbacks; >> >> + /* Sanity checks for callbacks. */ >> + if ( ctx.restore.checkpointed == 2 ) >> + { >> + /* this is COLO restore */ >> + assert(callbacks->suspend && >> + callbacks->checkpoint && >> + callbacks->postcopy); > > FWIW, I need to make the ->checkpoint() callback used even in the remus > case for qemu handling in libxl migration v2. So this should be move out in libxl migration v2 support. > >> + } >> + >> IPRINTF("In experimental %s", __func__); >> DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" >> ", checkpointed_stream %d", io_fd, dom, hvm, pae, >> diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c >> index 06177e0..8e54c68 100644 >> --- a/tools/libxc/xc_sr_restore_x86_hvm.c >> +++ b/tools/libxc/xc_sr_restore_x86_hvm.c >> @@ -181,6 +181,7 @@ static int handle_qemu(struct xc_sr_context *ctx) >> if ( fp ) >> fclose(fp); >> free(qbuf); >> + ctx->x86_hvm.restore.qbuf = NULL; > > This looks like an unrelated bugfix. Yes, this is a bugfix. It won't trigger when in normal migration or Remus, but in colo case, this will cause error because handle_qemu will be called multiple times. > > ~Andrew > >> >> return rc; >> } > > . > -- Thanks, Yang.