All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH Remus v4 0/2] Remus support for Migration-v2
@ 2015-05-14  8:56 Yang Hongyang
  2015-05-14  8:56 ` [PATCH Remus v4 1/2] libxc/save: implement Remus checkpointed save Yang Hongyang
  2015-05-14  8:56 ` [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore Yang Hongyang
  0 siblings, 2 replies; 4+ messages in thread
From: Yang Hongyang @ 2015-05-14  8:56 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
	eddie.dong, guijianfeng, rshriram, ian.jackson

This patchset implement the Remus support for Migration v2 but without
memory compressing.

Git tree available at:
    https://github.com/macrosheep/xen/tree/Remus-newmig-v4

This patchset is based on
    [PATCH v6 00/16] Misc patches to aid migration v2 Remus support
    https://github.com/macrosheep/xen/tree/misc-remus-v6

Yang Hongyang (2):
  libxc/save: implement Remus checkpointed save
  libxc/restore: implement Remus checkpointed restore

 tools/libxc/xc_sr_common.h  |  14 +++++
 tools/libxc/xc_sr_restore.c | 121 ++++++++++++++++++++++++++++++++++++++++----
 tools/libxc/xc_sr_save.c    |  80 ++++++++++++++++++++++-------
 3 files changed, 186 insertions(+), 29 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH Remus v4 1/2] libxc/save: implement Remus checkpointed save
  2015-05-14  8:56 [PATCH Remus v4 0/2] Remus support for Migration-v2 Yang Hongyang
@ 2015-05-14  8:56 ` Yang Hongyang
  2015-05-14  8:56 ` [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore Yang Hongyang
  1 sibling, 0 replies; 4+ messages in thread
From: Yang Hongyang @ 2015-05-14  8:56 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
	eddie.dong, guijianfeng, rshriram, ian.jackson

With Remus, the save flow should be:
live migration->{ periodically save(checkpointed save) }

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
 tools/libxc/xc_sr_save.c | 80 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 1d0a46d..1c5d199 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -57,6 +57,16 @@ static int write_end_record(struct xc_sr_context *ctx)
 }
 
 /*
+ * Writes an CHECKPOINT record into the stream.
+ */
+static int write_checkpoint_record(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record checkpoint = { REC_TYPE_CHECKPOINT, 0, NULL };
+
+    return write_record(ctx, &checkpoint);
+}
+
+/*
  * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
  * is constructed in ctx->save.batch_pfns.
  *
@@ -467,6 +477,14 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
                                     &ctx->save.dirty_bitmap_hbuf);
 
+    /*
+     * With Remus, we will enter checkpointed save after live migration.
+     * In checkpointed save loop, we skip the live part and pause straight
+     * away to send dirty pages between checkpoints.
+     */
+    if ( !ctx->save.live )
+        goto last_iter;
+
     rc = enable_logdirty(ctx);
     if ( rc )
         goto out;
@@ -505,6 +523,7 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
             goto out;
     }
 
+ last_iter:
     rc = suspend_domain(ctx);
     if ( rc )
         goto out;
@@ -667,29 +686,52 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
     if ( rc )
         goto err;
 
-    rc = ctx->save.ops.start_of_checkpoint(ctx);
-    if ( rc )
-        goto err;
+    do {
+        rc = ctx->save.ops.start_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
 
-    if ( ctx->save.live )
-        rc = send_domain_memory_live(ctx);
-    else
-        rc = send_domain_memory_nonlive(ctx);
+        if ( ctx->save.live || ctx->save.checkpointed )
+            rc = send_domain_memory_live(ctx);
+        else
+            rc = send_domain_memory_nonlive(ctx);
 
-    if ( rc )
-        goto err;
+        if ( rc )
+            goto err;
 
-    if ( !ctx->dominfo.shutdown ||
-         (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain has not been suspended");
-        rc = -1;
-        goto err;
-    }
+        if ( !ctx->dominfo.shutdown ||
+            (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+        {
+            ERROR("Domain has not been suspended");
+            rc = -1;
+            goto err;
+        }
 
-    rc = ctx->save.ops.end_of_checkpoint(ctx);
-    if ( rc )
-        goto err;
+        rc = ctx->save.ops.end_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
+
+        if ( ctx->save.checkpointed )
+        {
+            if ( ctx->save.live )
+            {
+                /* End of live migration, we are sending checkpointed stream */
+                ctx->save.live = false;
+            }
+
+            rc = write_checkpoint_record(ctx);
+            if ( rc )
+                goto err;
+
+            ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
+
+            rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+            if ( rc > 0 )
+                xc_report_progress_single(xch, "Checkpointed save");
+            else
+                ctx->save.checkpointed = false;
+        }
+    } while ( ctx->save.checkpointed );
 
     xc_report_progress_single(xch, "End of stream");
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore
  2015-05-14  8:56 [PATCH Remus v4 0/2] Remus support for Migration-v2 Yang Hongyang
  2015-05-14  8:56 ` [PATCH Remus v4 1/2] libxc/save: implement Remus checkpointed save Yang Hongyang
@ 2015-05-14  8:56 ` Yang Hongyang
  2015-05-14  9:24   ` Andrew Cooper
  1 sibling, 1 reply; 4+ messages in thread
From: Yang Hongyang @ 2015-05-14  8:56 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, ian.campbell, wency, andrew.cooper3, yunhong.jiang,
	eddie.dong, guijianfeng, rshriram, ian.jackson

With Remus, the restore flow should be:
the first full migration stream -> { periodically restore stream }

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
 tools/libxc/xc_sr_common.h  |  14 +++++
 tools/libxc/xc_sr_restore.c | 121 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index f8121e7..3bf27f1 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -208,6 +208,20 @@ struct xc_sr_context
             /* Plain VM, or checkpoints over time. */
             bool checkpointed;
 
+            /* Currently buffering records between a checkpoint */
+            bool buffer_all_records;
+
+/*
+ * With Remus, we buffer the records sent by the primary at checkpoint,
+ * in case the primary will fail, we can recover from the last
+ * checkpoint state.
+ * This should be enough because primary only send dirty pages at
+ * checkpoint.
+ */
+#define MAX_BUF_RECORDS 1024
+            struct xc_sr_record *buffered_records;
+            unsigned buffered_rec_num;
+
             /*
              * Xenstore and Console parameters.
              * INPUT:  evtchn & domid
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 9ab5760..3c93406 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -468,10 +468,73 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
     return rc;
 }
 
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+static int handle_checkpoint(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = 0;
+    unsigned i;
+    struct xc_sr_record *rec;
+
+    if ( !ctx->restore.checkpointed )
+    {
+        ERROR("Found checkpoint in non-checkpointed stream");
+        rc = -1;
+        goto err;
+    }
+
+    if ( ctx->restore.buffer_all_records )
+    {
+        IPRINTF("All records buffered");
+
+        /*
+         * We need to set buffer_all_records to false in
+         * order to process records instead of buffer records.
+         * buffer_all_records should be set back to true after
+         * we successfully processed all records.
+         */
+        ctx->restore.buffer_all_records = false;
+        for ( i = 0; i < ctx->restore.buffered_rec_num; i++)
+        {
+            rec = ctx->restore.buffered_records +
+                                        i * sizeof(struct xc_sr_record);
+            rc = process_record(ctx, rec);
+            if ( rc )
+                goto err;
+        }
+        ctx->restore.buffered_rec_num = 0;
+        ctx->restore.buffer_all_records = true;
+        IPRINTF("All records processed");
+    }
+    else
+        ctx->restore.buffer_all_records = true;
+
+ err:
+    return rc;
+}
+
 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
 {
     xc_interface *xch = ctx->xch;
     int rc = 0;
+    struct xc_sr_record *buf_rec;
+
+    if ( ctx->restore.buffer_all_records &&
+         rec->type != REC_TYPE_END &&
+         rec->type != REC_TYPE_CHECKPOINT )
+    {
+        if ( ctx->restore.buffered_rec_num >= MAX_BUF_RECORDS )
+        {
+            ERROR("There are too many records within a checkpoint");
+            return -1;
+        }
+
+        buf_rec = ctx->restore.buffered_records +
+                  ctx->restore.buffered_rec_num++ * sizeof(struct xc_sr_record);
+        memcpy(buf_rec, rec, sizeof(struct xc_sr_record));
+
+        return 0;
+    }
 
     switch ( rec->type )
     {
@@ -487,12 +550,17 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
         ctx->restore.verify = true;
         break;
 
+    case REC_TYPE_CHECKPOINT:
+        rc = handle_checkpoint(ctx);
+        break;
+
     default:
         rc = ctx->restore.ops.process_record(ctx, rec);
         break;
     }
 
     free(rec->data);
+    rec->data = NULL;
 
     if ( rc == RECORD_NOT_PROCESSED )
     {
@@ -529,6 +597,15 @@ static int setup(struct xc_sr_context *ctx)
         goto err;
     }
 
+    ctx->restore.buffered_records = malloc(
+        MAX_BUF_RECORDS * sizeof(struct xc_sr_record));
+    if ( !ctx->restore.buffered_records )
+    {
+        ERROR("Unable to allocate memory for buffered records");
+        rc = -1;
+        goto err;
+    }
+
  err:
     return rc;
 }
@@ -536,7 +613,15 @@ static int setup(struct xc_sr_context *ctx)
 static void cleanup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
+    unsigned i;
+    struct xc_sr_record *rec;
 
+    for ( i = 0; i < ctx->restore.buffered_rec_num; i++)
+    {
+        rec = ctx->restore.buffered_records + i * sizeof(struct xc_sr_record);
+        free(rec->data);
+    }
+    free(ctx->restore.buffered_records);
     free(ctx->restore.populated_pfns);
     if ( ctx->restore.ops.cleanup(ctx) )
         PERROR("Failed to clean up");
@@ -564,7 +649,27 @@ static int restore(struct xc_sr_context *ctx)
     {
         rc = read_record(ctx, &rec);
         if ( rc )
-            goto err;
+        {
+            if ( ctx->restore.buffer_all_records )
+                goto remus_failover;
+            else
+                goto err;
+        }
+
+#ifdef XG_LIBXL_HVM_COMPAT
+        if ( ctx->dominfo.hvm &&
+             (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) )
+        {
+            rc = read_qemu(ctx);
+            if ( rc )
+            {
+                if ( ctx->restore.buffer_all_records )
+                    goto remus_failover;
+                else
+                    goto err;
+            }
+        }
+#endif
 
         rc = process_record(ctx, &rec);
         if ( rc )
@@ -572,15 +677,11 @@ static int restore(struct xc_sr_context *ctx)
 
     } while ( rec.type != REC_TYPE_END );
 
-#ifdef XG_LIBXL_HVM_COMPAT
-    if ( ctx->dominfo.hvm )
-    {
-        rc = read_qemu(ctx);
-        if ( rc )
-            goto err;
-    }
-#endif
-
+ remus_failover:
+    /*
+     * With Remus, if we reach here, there must be some error on primary,
+     * failover from the last checkpoint state.
+     */
     rc = ctx->restore.ops.stream_complete(ctx);
     if ( rc )
         goto err;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore
  2015-05-14  8:56 ` [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore Yang Hongyang
@ 2015-05-14  9:24   ` Andrew Cooper
  0 siblings, 0 replies; 4+ messages in thread
From: Andrew Cooper @ 2015-05-14  9:24 UTC (permalink / raw)
  To: Yang Hongyang, xen-devel
  Cc: wei.liu2, ian.campbell, wency, guijianfeng, yunhong.jiang,
	eddie.dong, rshriram, ian.jackson

On 14/05/15 09:56, Yang Hongyang wrote:
> With Remus, the restore flow should be:
> the first full migration stream -> { periodically restore stream }
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Ian Campbell <Ian.Campbell@citrix.com>
> CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
> CC: Wei Liu <wei.liu2@citrix.com>
> ---
>  tools/libxc/xc_sr_common.h  |  14 +++++
>  tools/libxc/xc_sr_restore.c | 121 ++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 125 insertions(+), 10 deletions(-)
>
> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
> index f8121e7..3bf27f1 100644
> --- a/tools/libxc/xc_sr_common.h
> +++ b/tools/libxc/xc_sr_common.h
> @@ -208,6 +208,20 @@ struct xc_sr_context
>              /* Plain VM, or checkpoints over time. */
>              bool checkpointed;
>  
> +            /* Currently buffering records between a checkpoint */
> +            bool buffer_all_records;
> +
> +/*
> + * With Remus, we buffer the records sent by the primary at checkpoint,
> + * in case the primary will fail, we can recover from the last
> + * checkpoint state.
> + * This should be enough because primary only send dirty pages at
> + * checkpoint.
> + */
> +#define MAX_BUF_RECORDS 1024
> +            struct xc_sr_record *buffered_records;
> +            unsigned buffered_rec_num;
> +
>              /*
>               * Xenstore and Console parameters.
>               * INPUT:  evtchn & domid
> diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
> index 9ab5760..3c93406 100644
> --- a/tools/libxc/xc_sr_restore.c
> +++ b/tools/libxc/xc_sr_restore.c
> @@ -468,10 +468,73 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
>      return rc;
>  }
>  
> +static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
> +static int handle_checkpoint(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    int rc = 0;
> +    unsigned i;
> +    struct xc_sr_record *rec;
> +
> +    if ( !ctx->restore.checkpointed )
> +    {
> +        ERROR("Found checkpoint in non-checkpointed stream");
> +        rc = -1;
> +        goto err;
> +    }
> +
> +    if ( ctx->restore.buffer_all_records )
> +    {
> +        IPRINTF("All records buffered");
> +
> +        /*
> +         * We need to set buffer_all_records to false in
> +         * order to process records instead of buffer records.
> +         * buffer_all_records should be set back to true after
> +         * we successfully processed all records.
> +         */
> +        ctx->restore.buffer_all_records = false;
> +        for ( i = 0; i < ctx->restore.buffered_rec_num; i++)

Space before closing bracket.

> +        {
> +            rec = ctx->restore.buffered_records +
> +                                        i * sizeof(struct xc_sr_record);

This pointer arithmetic looks wrong.

FWIW, "rec = &ctx->restore.buffered_records[i];" would be clearer,
although you don't even need to pull it into a variable as it is only
referenced once.

> +            rc = process_record(ctx, rec);
> +            if ( rc )
> +                goto err;
> +        }
> +        ctx->restore.buffered_rec_num = 0;
> +        ctx->restore.buffer_all_records = true;
> +        IPRINTF("All records processed");
> +    }
> +    else
> +        ctx->restore.buffer_all_records = true;
> +
> + err:
> +    return rc;
> +}
> +
>  static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
>  {
>      xc_interface *xch = ctx->xch;
>      int rc = 0;
> +    struct xc_sr_record *buf_rec;
> +
> +    if ( ctx->restore.buffer_all_records &&
> +         rec->type != REC_TYPE_END &&
> +         rec->type != REC_TYPE_CHECKPOINT )
> +    {
> +        if ( ctx->restore.buffered_rec_num >= MAX_BUF_RECORDS )
> +        {
> +            ERROR("There are too many records within a checkpoint");
> +            return -1;
> +        }
> +
> +        buf_rec = ctx->restore.buffered_records +
> +                  ctx->restore.buffered_rec_num++ * sizeof(struct xc_sr_record);

Ah - this is how the other bit of pointer arithmetic doesn’t break, but
it will wander off the array if the sender provides more than 32 records.

> +        memcpy(buf_rec, rec, sizeof(struct xc_sr_record));

As before,

memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
rec, sizeof(*rec));

might be a little more simple.

> +
> +        return 0;
> +    }
>  
>      switch ( rec->type )
>      {
> @@ -487,12 +550,17 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
>          ctx->restore.verify = true;
>          break;
>  
> +    case REC_TYPE_CHECKPOINT:
> +        rc = handle_checkpoint(ctx);
> +        break;
> +
>      default:
>          rc = ctx->restore.ops.process_record(ctx, rec);
>          break;
>      }
>  
>      free(rec->data);
> +    rec->data = NULL;
>  
>      if ( rc == RECORD_NOT_PROCESSED )
>      {
> @@ -529,6 +597,15 @@ static int setup(struct xc_sr_context *ctx)
>          goto err;
>      }
>  
> +    ctx->restore.buffered_records = malloc(
> +        MAX_BUF_RECORDS * sizeof(struct xc_sr_record));
> +    if ( !ctx->restore.buffered_records )
> +    {
> +        ERROR("Unable to allocate memory for buffered records");
> +        rc = -1;
> +        goto err;
> +    }
> +
>   err:
>      return rc;
>  }
> @@ -536,7 +613,15 @@ static int setup(struct xc_sr_context *ctx)
>  static void cleanup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    unsigned i;
> +    struct xc_sr_record *rec;
>  
> +    for ( i = 0; i < ctx->restore.buffered_rec_num; i++)

Style.

> +    {
> +        rec = ctx->restore.buffered_records + i * sizeof(struct xc_sr_record);
> +        free(rec->data);

More bad pointer arithmetic.


Other than the pointer arithmetic issues (and a few minor style issues),
this patch looks fine.

~Andrew

> +    }
> +    free(ctx->restore.buffered_records);
>      free(ctx->restore.populated_pfns);
>      if ( ctx->restore.ops.cleanup(ctx) )
>          PERROR("Failed to clean up");
> @@ -564,7 +649,27 @@ static int restore(struct xc_sr_context *ctx)
>      {
>          rc = read_record(ctx, &rec);
>          if ( rc )
> -            goto err;
> +        {
> +            if ( ctx->restore.buffer_all_records )
> +                goto remus_failover;
> +            else
> +                goto err;
> +        }
> +
> +#ifdef XG_LIBXL_HVM_COMPAT
> +        if ( ctx->dominfo.hvm &&
> +             (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) )
> +        {
> +            rc = read_qemu(ctx);
> +            if ( rc )
> +            {
> +                if ( ctx->restore.buffer_all_records )
> +                    goto remus_failover;
> +                else
> +                    goto err;
> +            }
> +        }
> +#endif
>  
>          rc = process_record(ctx, &rec);
>          if ( rc )
> @@ -572,15 +677,11 @@ static int restore(struct xc_sr_context *ctx)
>  
>      } while ( rec.type != REC_TYPE_END );
>  
> -#ifdef XG_LIBXL_HVM_COMPAT
> -    if ( ctx->dominfo.hvm )
> -    {
> -        rc = read_qemu(ctx);
> -        if ( rc )
> -            goto err;
> -    }
> -#endif
> -
> + remus_failover:
> +    /*
> +     * With Remus, if we reach here, there must be some error on primary,
> +     * failover from the last checkpoint state.
> +     */
>      rc = ctx->restore.ops.stream_complete(ctx);
>      if ( rc )
>          goto err;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-05-14  9:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-14  8:56 [PATCH Remus v4 0/2] Remus support for Migration-v2 Yang Hongyang
2015-05-14  8:56 ` [PATCH Remus v4 1/2] libxc/save: implement Remus checkpointed save Yang Hongyang
2015-05-14  8:56 ` [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore Yang Hongyang
2015-05-14  9:24   ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.