* [PATCH Remus v6 1/3] libxc/save: refactor of send_domain_memory_live()
2015-05-15 10:17 [PATCH Remus v6 0/3] Remus support for Migration-v2 Yang Hongyang
@ 2015-05-15 10:17 ` Yang Hongyang
2015-05-15 12:56 ` Andrew Cooper
2015-05-15 10:17 ` [PATCH Remus v6 2/3] libxc/save: implement Remus checkpointed save Yang Hongyang
2015-05-15 10:17 ` [PATCH Remus v6 3/3] libxc/restore: implement Remus checkpointed restore Yang Hongyang
2 siblings, 1 reply; 8+ messages in thread
From: Yang Hongyang @ 2015-05-15 10:17 UTC (permalink / raw)
To: xen-devel
Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
eddie.dong, guijianfeng, rshriram, ian.jackson
Split the send_domain_memory_live() into three helper function:
- send_memory_live() do the actually live send
- suspend_and_send_dirty() suspend the guest and send dirty pages
- send_memory_verify()
The motivation of this is that when we send checkpointed stream, we
will skip the actually live part.
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
---
tools/libxc/xc_sr_save.c | 134 +++++++++++++++++++++++++++++++++--------------
1 file changed, 95 insertions(+), 39 deletions(-)
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 1d0a46d..fb48df1 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -455,21 +455,15 @@ static int update_progress_string(struct xc_sr_context *ctx,
}
/*
- * Send all domain memory. This is the heart of the live migration loop.
+ * Send memory while guest is running.
*/
-static int send_domain_memory_live(struct xc_sr_context *ctx)
+static int send_memory_live(struct xc_sr_context *ctx)
{
xc_interface *xch = ctx->xch;
xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
char *progress_str = NULL;
unsigned x;
int rc = -1;
- DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
- &ctx->save.dirty_bitmap_hbuf);
-
- rc = enable_logdirty(ctx);
- if ( rc )
- goto out;
rc = update_progress_string(ctx, &progress_str, 0);
if ( rc )
@@ -485,7 +479,7 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
{
if ( xc_shadow_control(
xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
- HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
+ &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
NULL, 0, &stats) != ctx->save.p2m_size )
{
PERROR("Failed to retrieve logdirty bitmap");
@@ -505,6 +499,26 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
goto out;
}
+ out:
+ xc_set_progress_prefix(xch, NULL);
+ free(progress_str);
+ return rc;
+}
+
+/*
+ * Suspend the domain and send dirty memory.
+ * This is the last iteration of the live migration and the
+ * heart of the checkpointed stream.
+ */
+static int suspend_and_send_dirty(struct xc_sr_context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
+ char *progress_str = NULL;
+ int rc = -1;
+ DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+ &ctx->save.dirty_bitmap_hbuf);
+
rc = suspend_domain(ctx);
if ( rc )
goto out;
@@ -519,9 +533,15 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
goto out;
}
- rc = update_progress_string(ctx, &progress_str, ctx->save.max_iterations);
- if ( rc )
- goto out;
+ if ( ctx->save.live )
+ {
+ rc = update_progress_string(ctx, &progress_str,
+ ctx->save.max_iterations);
+ if ( rc )
+ goto out;
+ }
+ else
+ xc_set_progress_prefix(xch, "Checkpointed save");
bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
@@ -529,42 +549,78 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
if ( rc )
goto out;
- if ( ctx->save.debug )
+ out:
+ xc_set_progress_prefix(xch, NULL);
+ free(progress_str);
+ return rc;
+}
+
+static int send_memory_verify(struct xc_sr_context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
+ int rc = -1;
+ struct xc_sr_record rec =
{
- struct xc_sr_record rec =
- {
- .type = REC_TYPE_VERIFY,
- .length = 0,
- };
+ .type = REC_TYPE_VERIFY,
+ .length = 0,
+ };
- DPRINTF("Enabling verify mode");
+ DPRINTF("Enabling verify mode");
- rc = write_record(ctx, &rec);
- if ( rc )
- goto out;
+ rc = write_record(ctx, &rec);
+ if ( rc )
+ goto out;
- xc_set_progress_prefix(xch, "Memory verify");
- rc = send_all_pages(ctx);
- if ( rc )
- goto out;
+ xc_set_progress_prefix(xch, "Memory verify");
+ rc = send_all_pages(ctx);
+ if ( rc )
+ goto out;
- if ( xc_shadow_control(
- xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
- HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
- NULL, 0, &stats) != ctx->save.p2m_size )
- {
- PERROR("Failed to retrieve logdirty bitmap");
- rc = -1;
- goto out;
- }
+ if ( xc_shadow_control(
+ xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+ &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
+ NULL, 0, &stats) != ctx->save.p2m_size )
+ {
+ PERROR("Failed to retrieve logdirty bitmap");
+ rc = -1;
+ goto out;
+ }
+
+ DPRINTF(" Further stats: faults %u, dirty %u",
+ stats.fault_count, stats.dirty_count);
+
+ out:
+ return rc;
+}
+
+/*
+ * Send all domain memory. This is the heart of the live migration loop.
+ */
+static int send_domain_memory_live(struct xc_sr_context *ctx)
+{
+ int rc = -1;
+
+ rc = enable_logdirty(ctx);
+ if ( rc )
+ goto out;
- DPRINTF(" Further stats: faults %u, dirty %u",
- stats.fault_count, stats.dirty_count);
+ rc = send_memory_live(ctx);
+ if ( rc )
+ goto out;
+
+ rc = suspend_and_send_dirty(ctx);
+ if ( rc )
+ goto out;
+
+ if ( ctx->save.debug && !ctx->save.checkpointed )
+ {
+ rc = send_memory_verify(ctx);
+ if ( rc )
+ goto out;
}
out:
- xc_set_progress_prefix(xch, NULL);
- free(progress_str);
return rc;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH Remus v6 2/3] libxc/save: implement Remus checkpointed save
2015-05-15 10:17 [PATCH Remus v6 0/3] Remus support for Migration-v2 Yang Hongyang
2015-05-15 10:17 ` [PATCH Remus v6 1/3] libxc/save: refactor of send_domain_memory_live() Yang Hongyang
@ 2015-05-15 10:17 ` Yang Hongyang
2015-05-15 12:59 ` Andrew Cooper
2015-05-15 10:17 ` [PATCH Remus v6 3/3] libxc/restore: implement Remus checkpointed restore Yang Hongyang
2 siblings, 1 reply; 8+ messages in thread
From: Yang Hongyang @ 2015-05-15 10:17 UTC (permalink / raw)
To: xen-devel
Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
eddie.dong, guijianfeng, rshriram, ian.jackson
With Remus, the save flow should be:
live migration->{ periodically save(checkpointed save) }
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
---
tools/libxc/xc_sr_save.c | 80 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 61 insertions(+), 19 deletions(-)
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index fb48df1..d5a71b4 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -57,6 +57,16 @@ static int write_end_record(struct xc_sr_context *ctx)
}
/*
+ * Writes a CHECKPOINT record into the stream.
+ */
+static int write_checkpoint_record(struct xc_sr_context *ctx)
+{
+ struct xc_sr_record checkpoint = { REC_TYPE_CHECKPOINT, 0, NULL };
+
+ return write_record(ctx, &checkpoint);
+}
+
+/*
* Writes a batch of memory as a PAGE_DATA record into the stream. The batch
* is constructed in ctx->save.batch_pfns.
*
@@ -625,6 +635,14 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
}
/*
+ * Checkpointed save.
+ */
+static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
+{
+ return suspend_and_send_dirty(ctx);
+}
+
+/*
* Send all domain memory, pausing the domain first. Generally used for
* suspend-to-file.
*/
@@ -723,29 +741,53 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
if ( rc )
goto err;
- rc = ctx->save.ops.start_of_checkpoint(ctx);
- if ( rc )
- goto err;
+ do {
+ rc = ctx->save.ops.start_of_checkpoint(ctx);
+ if ( rc )
+ goto err;
- if ( ctx->save.live )
- rc = send_domain_memory_live(ctx);
- else
- rc = send_domain_memory_nonlive(ctx);
+ if ( ctx->save.live )
+ rc = send_domain_memory_live(ctx);
+ else if ( ctx->save.checkpointed )
+ rc = send_domain_memory_checkpointed(ctx);
+ else
+ rc = send_domain_memory_nonlive(ctx);
- if ( rc )
- goto err;
+ if ( rc )
+ goto err;
- if ( !ctx->dominfo.shutdown ||
- (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
- {
- ERROR("Domain has not been suspended");
- rc = -1;
- goto err;
- }
+ if ( !ctx->dominfo.shutdown ||
+ (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+ {
+ ERROR("Domain has not been suspended");
+ rc = -1;
+ goto err;
+ }
- rc = ctx->save.ops.end_of_checkpoint(ctx);
- if ( rc )
- goto err;
+ rc = ctx->save.ops.end_of_checkpoint(ctx);
+ if ( rc )
+ goto err;
+
+ if ( ctx->save.checkpointed )
+ {
+ /*
+ * We have now completed the initial live portion of the checkpoint
+ * process. Therefore switch into periodically sending synchronous
+ * batches of pages.
+ */
+ ctx->save.live = false;
+
+ rc = write_checkpoint_record(ctx);
+ if ( rc )
+ goto err;
+
+ ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
+
+ rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+ if ( rc <= 0 )
+ ctx->save.checkpointed = false;
+ }
+ } while ( ctx->save.checkpointed );
xc_report_progress_single(xch, "End of stream");
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH Remus v6 3/3] libxc/restore: implement Remus checkpointed restore
2015-05-15 10:17 [PATCH Remus v6 0/3] Remus support for Migration-v2 Yang Hongyang
2015-05-15 10:17 ` [PATCH Remus v6 1/3] libxc/save: refactor of send_domain_memory_live() Yang Hongyang
2015-05-15 10:17 ` [PATCH Remus v6 2/3] libxc/save: implement Remus checkpointed save Yang Hongyang
@ 2015-05-15 10:17 ` Yang Hongyang
2015-05-15 13:13 ` Andrew Cooper
2 siblings, 1 reply; 8+ messages in thread
From: Yang Hongyang @ 2015-05-15 10:17 UTC (permalink / raw)
To: xen-devel
Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
eddie.dong, guijianfeng, rshriram, ian.jackson
With Remus, the restore flow should be:
the first full migration stream -> { periodically restore stream }
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
tools/libxc/xc_sr_common.h | 15 +++++
tools/libxc/xc_sr_restore.c | 133 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 135 insertions(+), 13 deletions(-)
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index f8121e7..565c5da 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -208,6 +208,21 @@ struct xc_sr_context
/* Plain VM, or checkpoints over time. */
bool checkpointed;
+ /* Currently buffering records between a checkpoint */
+ bool buffer_all_records;
+
+/*
+ * With Remus, we buffer the records sent by the primary at checkpoint,
+ * in case the primary will fail, we can recover from the last
+ * checkpoint state.
+ * This should be enough for most of the cases because primary only send
+ * dirty pages at checkpoint.
+ */
+#define DEFAULT_BUF_RECORDS 1024
+ struct xc_sr_record *buffered_records;
+ unsigned allocated_rec_num;
+ unsigned buffered_rec_num;
+
/*
* Xenstore and Console parameters.
* INPUT: evtchn & domid
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 9ab5760..fc47f43 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -468,6 +468,67 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
return rc;
}
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+static int handle_checkpoint(struct xc_sr_context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ int rc = 0;
+ unsigned i;
+
+ if ( !ctx->restore.checkpointed )
+ {
+ ERROR("Found checkpoint in non-checkpointed stream");
+ rc = -1;
+ goto err;
+ }
+
+ if ( ctx->restore.buffer_all_records )
+ {
+ IPRINTF("All records buffered");
+
+ for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+ {
+ rc = process_record(ctx, &ctx->restore.buffered_records[i]);
+ if ( rc )
+ goto err;
+ }
+ ctx->restore.buffered_rec_num = 0;
+ IPRINTF("All records processed");
+ }
+ else
+ ctx->restore.buffer_all_records = true;
+
+ err:
+ return rc;
+}
+
+static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned new_alloc_num;
+ struct xc_sr_record *p;
+
+ if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
+ {
+ new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
+ p = realloc(ctx->restore.buffered_records,
+ new_alloc_num * sizeof(struct xc_sr_record));
+ if ( !p )
+ {
+ ERROR("Failed to realloc memory for buffered records");
+ return -1;
+ }
+
+ ctx->restore.buffered_records = p;
+ ctx->restore.allocated_rec_num = new_alloc_num;
+ }
+
+ memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
+ rec, sizeof(*rec));
+
+ return 0;
+}
+
static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
{
xc_interface *xch = ctx->xch;
@@ -487,12 +548,17 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
ctx->restore.verify = true;
break;
+ case REC_TYPE_CHECKPOINT:
+ rc = handle_checkpoint(ctx);
+ break;
+
default:
rc = ctx->restore.ops.process_record(ctx, rec);
break;
}
free(rec->data);
+ rec->data = NULL;
if ( rc == RECORD_NOT_PROCESSED )
{
@@ -529,6 +595,15 @@ static int setup(struct xc_sr_context *ctx)
goto err;
}
+ ctx->restore.buffered_records = malloc(
+ DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
+ if ( !ctx->restore.buffered_records )
+ {
+ ERROR("Unable to allocate memory for buffered records");
+ rc = -1;
+ goto err;
+ }
+
err:
return rc;
}
@@ -536,7 +611,12 @@ static int setup(struct xc_sr_context *ctx)
static void cleanup(struct xc_sr_context *ctx)
{
xc_interface *xch = ctx->xch;
+ unsigned i;
+
+ for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+ free(ctx->restore.buffered_records[i].data);
+ free(ctx->restore.buffered_records);
free(ctx->restore.populated_pfns);
if ( ctx->restore.ops.cleanup(ctx) )
PERROR("Failed to clean up");
@@ -564,23 +644,50 @@ static int restore(struct xc_sr_context *ctx)
{
rc = read_record(ctx, &rec);
if ( rc )
- goto err;
-
- rc = process_record(ctx, &rec);
- if ( rc )
- goto err;
-
- } while ( rec.type != REC_TYPE_END );
+ {
+ if ( ctx->restore.buffer_all_records )
+ goto remus_failover;
+ else
+ goto err;
+ }
#ifdef XG_LIBXL_HVM_COMPAT
- if ( ctx->dominfo.hvm )
- {
- rc = read_qemu(ctx);
- if ( rc )
- goto err;
- }
+ if ( ctx->dominfo.hvm &&
+ (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) )
+ {
+ rc = read_qemu(ctx);
+ if ( rc )
+ {
+ if ( ctx->restore.buffer_all_records )
+ goto remus_failover;
+ else
+ goto err;
+ }
+ }
#endif
+ if ( ctx->restore.buffer_all_records &&
+ rec.type != REC_TYPE_END &&
+ rec.type != REC_TYPE_CHECKPOINT )
+ {
+ rc = buffer_record(ctx, &rec);
+ if ( rc )
+ goto err;
+ }
+ else
+ {
+ rc = process_record(ctx, &rec);
+ if ( rc )
+ goto err;
+ }
+
+ } while ( rec.type != REC_TYPE_END );
+
+ remus_failover:
+ /*
+ * With Remus, if we reach here, there must be some error on primary,
+ * failover from the last checkpoint state.
+ */
rc = ctx->restore.ops.stream_complete(ctx);
if ( rc )
goto err;
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread