All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Yang Hongyang <yanghy@cn.fujitsu.com>, xen-devel@lists.xen.org
Cc: rshriram@cs.ubc.ca, Ian.Jackson@eu.citrix.com, Ian.Campbell@citrix.com
Subject: Re: [RFC PATCH 2/3] remus: implement remus checkpoint in v2 save
Date: Wed, 9 Jul 2014 11:53:07 +0100	[thread overview]
Message-ID: <53BD1F13.2040301@citrix.com> (raw)
In-Reply-To: <1404892050-24650-3-git-send-email-yanghy@cn.fujitsu.com>

On 09/07/14 08:47, Yang Hongyang wrote:
> implement remus checkpoint in v2 save
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
>  tools/libxc/saverestore/common.h |  1 +
>  tools/libxc/saverestore/save.c   | 88 ++++++++++++++++++++++++----------------
>  2 files changed, 55 insertions(+), 34 deletions(-)
>
> diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
> index 24ba95b..1dd9f51 100644
> --- a/tools/libxc/saverestore/common.h
> +++ b/tools/libxc/saverestore/common.h
> @@ -153,6 +153,7 @@ struct xc_sr_context
>  
>      xc_dominfo_t dominfo;
>      bool checkpointed;
> +    bool firsttime;

This is also only used on the save side.

>  
>      union
>      {
> diff --git a/tools/libxc/saverestore/save.c b/tools/libxc/saverestore/save.c
> index d2fa8a6..98a5c2f 100644
> --- a/tools/libxc/saverestore/save.c
> +++ b/tools/libxc/saverestore/save.c
> @@ -375,6 +375,8 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
>          goto out;
>      }
>  
> +    if ( ctx->checkpointed && !ctx->firsttime )
> +        goto lastiter;
>      /* This juggling is required if logdirty is already on, e.g. VRAM tracking */
>      if ( xc_shadow_control(xch, ctx->domid,
>                             XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
> @@ -436,6 +438,7 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
>              break;
>      }
>  
> +lastiter:
>      rc = suspend_domain(ctx);
>      if ( rc )
>          goto out;
> @@ -570,44 +573,60 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
>      if ( rc )
>          goto err;
>  
> -    rc = ctx->save.ops.start_of_stream(ctx);
> -    if ( rc )
> -        goto err;
> +    do {
> +        rc = ctx->save.ops.start_of_stream(ctx);
> +        if ( rc )
> +            goto err;

I am not sure start_of_stream() wants to be inside the loop.  For PV
guests, it sends the X86_PV_INFO which is only expected to be sent
once.  The X86_PV_P2M_FRAMES record is deliberately safe to send
multiple times (in the hope that someone might evenutally fix the
ballooning issues), but is a waste of time to send like this, as its
content wont be changing.

>  
> -    if ( ctx->save.live )
> -    {
> -        DPRINTF("Starting live migrate");
> -        rc = send_domain_memory_live(ctx);
> -    }
> -    else
> -    {
> -        DPRINTF("Starting nonlive save");
> -        rc = send_domain_memory_nonlive(ctx);
> -    }
> +        if ( ctx->save.live )
> +        {
> +            DPRINTF("Starting live migrate");
> +            rc = send_domain_memory_live(ctx);
> +        }
> +        else
> +        {
> +            DPRINTF("Starting nonlive save");
> +            rc = send_domain_memory_nonlive(ctx);
> +        }
>  
> -    if ( rc )
> -        goto err;
> +        if ( rc )
> +            goto err;
>  
> -    /* Refresh domain information now it has paused. */
> -    if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
> -         (ctx->dominfo.domid != ctx->domid) )
> -    {
> -        PERROR("Unable to refresh domain information");
> -        rc = -1;
> -        goto err;
> -    }
> -    else if ( (!ctx->dominfo.shutdown ||
> -               ctx->dominfo.shutdown_reason != SHUTDOWN_suspend ) &&
> -              !ctx->dominfo.paused )
> -    {
> -        ERROR("Domain has not been suspended");
> -        rc = -1;
> -        goto err;
> -    }
> +        /* Refresh domain information now it has paused. */
> +        if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
> +             (ctx->dominfo.domid != ctx->domid) )
> +        {
> +            PERROR("Unable to refresh domain information");
> +            rc = -1;
> +            goto err;
> +        }
> +        else if ( (!ctx->dominfo.shutdown ||
> +                  ctx->dominfo.shutdown_reason != SHUTDOWN_suspend ) &&
> +                  !ctx->dominfo.paused )
> +        {
> +            ERROR("Domain has not been suspended");
> +            rc = -1;
> +            goto err;
> +        }
>  
> -    rc = ctx->save.ops.end_of_stream(ctx);
> -    if ( rc )
> -        goto err;
> +        rc = ctx->save.ops.end_of_stream(ctx);
> +        if ( rc )
> +            goto err;
> +
> +        if ( ctx->checkpointed ) {
> +            if ( ctx->firsttime )
> +                ctx->firsttime = false;
> +
> +            ctx->save.callbacks->postcopy(ctx->save.callbacks->data);

Can postcopy() fail?

~Andrew

> +
> +            rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
> +            if ( rc > 0 ) {
> +                IPRINTF("Next checkpoint\n");
> +            } else {
> +                ctx->checkpointed = false;
> +            }
> +        }
> +    } while ( ctx->checkpointed );
>  
>      rc = write_end_record(ctx);
>      if ( rc )
> @@ -653,6 +672,7 @@ int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_ite
>      ctx.save.live  = !!(flags & XCFLAGS_LIVE);
>      ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
>      ctx.checkpointed = !!(flags & XCFLAGS_CHECKPOINTED);
> +    ctx.firsttime = true;
>  
>      if ( ctx.checkpointed ) {
>          /* This is a checkpointed save, we need these callbacks */

  reply	other threads:[~2014-07-09 10:53 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-09  7:47 [RFC PATCH 0/3] Remus: add remus support for migration v2 Yang Hongyang
2014-07-09  7:47 ` [RFC PATCH 1/3] remus: add a bool var to indicate checkpointed stream Yang Hongyang
2014-07-09  9:45   ` Andrew Cooper
2014-07-09  9:53     ` Hongyang Yang
2014-07-09  7:47 ` [RFC PATCH 2/3] remus: implement remus checkpoint in v2 save Yang Hongyang
2014-07-09 10:53   ` Andrew Cooper [this message]
2014-07-10  3:25     ` Hongyang Yang
2014-07-10  8:49       ` Ian Campbell
2014-07-10  9:24       ` Andrew Cooper
2014-07-16 15:22   ` Shriram Rajagopalan
2014-07-16 15:38     ` Andrew Cooper
2014-07-16 16:02       ` Shriram Rajagopalan
2014-07-16 16:33         ` Andrew Cooper
2014-07-09  7:47 ` [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus Yang Hongyang
2014-07-09 11:16   ` Andrew Cooper
2014-07-09 11:26     ` Andrew Cooper
2014-07-10  3:30       ` Hongyang Yang
2014-07-10  9:25         ` Andrew Cooper
2014-07-10  9:32           ` Hongyang Yang
2014-07-10  9:42             ` Andrew Cooper
2014-07-10  9:47               ` Hongyang Yang
2014-07-09  8:53 ` [RFC PATCH 0/3] Remus: add remus support for migration v2 Ian Campbell
2014-07-09  9:56   ` Hongyang Yang
2014-07-09  9:42 ` Andrew Cooper
2014-07-09 10:06   ` Hongyang Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53BD1F13.2040301@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.