xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Yang Hongyang <yanghy@cn.fujitsu.com>, xen-devel@lists.xen.org
Cc: rshriram@cs.ubc.ca, Ian.Jackson@eu.citrix.com, Ian.Campbell@citrix.com
Subject: Re: [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus
Date: Wed, 9 Jul 2014 12:16:07 +0100	[thread overview]
Message-ID: <53BD2477.1070505@citrix.com> (raw)
In-Reply-To: <1404892050-24650-4-git-send-email-yanghy@cn.fujitsu.com>

On 09/07/14 08:47, Yang Hongyang wrote:
> cache vcpu context when restore, and set context when stream
> complete.

Can you explain why this is needed?  I can't see why it should be required.

>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
>  tools/libxc/saverestore/common.h         |  6 +++
>  tools/libxc/saverestore/restore_x86_pv.c | 67 ++++++++++++++++++++++----------
>  2 files changed, 53 insertions(+), 20 deletions(-)
>
> diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
> index 1dd9f51..ba54f83 100644
> --- a/tools/libxc/saverestore/common.h
> +++ b/tools/libxc/saverestore/common.h
> @@ -235,6 +235,12 @@ struct xc_sr_context
>  
>              /* Read-only mapping of guests shared info page */
>              shared_info_any_t *shinfo;
> +
> +            /*
> +             * We need to cache vcpu context when doing checkpointed
> +             * restore, otherwise restore will fail
> +             */
> +            vcpu_guest_context_any_t *vcpu;

"vcpus" seems more appropriate.

>          } x86_pv;
>      };
>  };
> diff --git a/tools/libxc/saverestore/restore_x86_pv.c b/tools/libxc/saverestore/restore_x86_pv.c
> index 14fc020..7a54047 100644
> --- a/tools/libxc/saverestore/restore_x86_pv.c
> +++ b/tools/libxc/saverestore/restore_x86_pv.c
> @@ -428,8 +428,8 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>  {
>      xc_interface *xch = ctx->xch;
>      struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
> -    vcpu_guest_context_any_t vcpu;
> -    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu.x64) : sizeof(vcpu.x32);
> +    vcpu_guest_context_any_t *vcpu;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);

Constructs like this are used in several places.  A

static inline size_t pv_vcpu_size(const struct xc_sr_context *ctx)

helper in common_x86.h would be a neat improvement.

>      xen_pfn_t pfn, mfn;
>      unsigned long tmp;
>      unsigned i;
> @@ -455,22 +455,23 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    memcpy(&vcpu, &vhdr->context, vcpusz);
> +    vcpu = (void *)ctx->x86_pv.vcpu + vcpusz * vhdr->vcpu_id;

vhdr->vcpu_id is essentially untrusted input at this point.  It needs to
be validated against dominfo.max_vcpu_id if it is to be used like this.

It was safe before as the first place it was used was with the set
hypercall which would validate in Xen.

> +    memcpy(vcpu, &vhdr->context, vcpusz);

If you are caching the context, the rest of this function can also be
deferred until the end, so it is run once rather than N times.

>  
>      /* Vcpu 0 is special: Convert the suspend record to an mfn. */
>      if ( vhdr->vcpu_id == 0 )
>      {
> -        rc = process_start_info(ctx, &vcpu);
> +        rc = process_start_info(ctx, vcpu);
>          if ( rc )
>              return rc;
>          rc = -1;
>      }
>  
> -    SET_FIELD(&vcpu, flags,
> -              GET_FIELD(&vcpu, flags, ctx->x86_pv.width) | VGCF_online,
> +    SET_FIELD(vcpu, flags,
> +              GET_FIELD(vcpu, flags, ctx->x86_pv.width) | VGCF_online,
>                ctx->x86_pv.width);
>  
> -    tmp = GET_FIELD(&vcpu, gdt_ents, ctx->x86_pv.width);
> +    tmp = GET_FIELD(vcpu, gdt_ents, ctx->x86_pv.width);
>      if ( tmp > 8192 )
>      {
>          ERROR("GDT entry count (%lu) out of range", tmp);
> @@ -481,7 +482,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>      /* Convert GDT frames to mfns. */
>      for ( i = 0; (i * 512) < tmp; ++i )
>      {
> -        pfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86_pv.width);
> +        pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86_pv.width);
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
>              ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
> @@ -502,11 +503,11 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        SET_FIELD(&vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
> +        SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
>      }
>  
>      /* Convert CR3 to an mfn. */
> -    pfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86_pv.width));
> +    pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86_pv.width));
>      if ( pfn > ctx->x86_pv.max_pfn )
>      {
>          ERROR("cr3 (pfn %#lx) out of range", pfn);
> @@ -529,12 +530,12 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
> +    SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
>  
>      /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
> -    if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
> +    if ( ctx->x86_pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
>      {
> -        pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
> +        pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
>  
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
> @@ -558,13 +559,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
> -    }
> -
> -    if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
> -    {
> -        PERROR("Failed to set vcpu%u's basic info", vhdr->vcpu_id);
> -        goto err;
> +        vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
>      }
>  
>      rc = 0;
> @@ -881,6 +876,7 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx, uint32_t type, void *
>  static int x86_pv_setup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    size_t vcpusz;
>      int rc;
>  
>      if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
> @@ -904,6 +900,9 @@ static int x86_pv_setup(struct xc_sr_context *ctx)
>      if ( rc )
>          return rc;
>  
> +    vcpusz = ctx->x86_pv.width == 8 ?
> +             sizeof(ctx->x86_pv.vcpu->x64) : sizeof(ctx->x86_pv.vcpu->x32);
> +    ctx->x86_pv.vcpu = calloc((ctx->dominfo.max_vcpu_id + 1) , vcpusz);

Extraneous brackets and space.

>      return rc;
>  }
>  
> @@ -946,6 +945,29 @@ static int x86_pv_process_record(struct xc_sr_context *ctx, struct xc_sr_record
>      }
>  }
>  
> +static int update_vcpu_context(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    vcpu_guest_context_any_t *vcpu;
> +    uint32_t vcpu_id;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);
> +
> +    for ( vcpu_id = 0; vcpu_id <= ctx->dominfo.max_vcpu_id; vcpu_id++ )
> +    {
> +        vcpu = (void *)ctx->x86_pv.vcpu + vcpu_id * vcpusz;
> +        if ( !vcpu )
> +            continue;

This check can never fail.

> +
> +        if ( xc_vcpu_setcontext(xch, ctx->domid, vcpu_id, vcpu) )

There is a latent bug introduced here.  If X86_PV_VCPU_BASIC records
have not been seen for all vcpus, this code will attempt to set a
context of a block of zeroes, and fail because of a bad cr3.

~Andrew

> +        {
> +            PERROR("Failed to set vcpu%u's basic info", vcpu_id);
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  /*
>   * restore_ops function.  Pin the pagetables, rewrite the p2m and seed the
>   * grant table.
> @@ -963,6 +985,10 @@ static int x86_pv_stream_complete(struct xc_sr_context *ctx)
>      if ( rc )
>          return rc;
>  
> +    rc = update_vcpu_context(ctx);
> +    if ( rc )
> +        return rc;
> +
>      rc = xc_dom_gnttab_seed(xch, ctx->domid,
>                              ctx->restore.console_mfn,
>                              ctx->restore.xenstore_mfn,
> @@ -985,6 +1011,7 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx)
>      free(ctx->x86_pv.p2m);
>      free(ctx->x86_pv.p2m_pfns);
>      free(ctx->x86_pv.pfn_types);
> +    free(ctx->x86_pv.vcpu);
>  
>      if ( ctx->x86_pv.m2p )
>          munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);

  reply	other threads:[~2014-07-09 11:16 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-09  7:47 [RFC PATCH 0/3] Remus: add remus support for migration v2 Yang Hongyang
2014-07-09  7:47 ` [RFC PATCH 1/3] remus: add a bool var to indicate checkpointed stream Yang Hongyang
2014-07-09  9:45   ` Andrew Cooper
2014-07-09  9:53     ` Hongyang Yang
2014-07-09  7:47 ` [RFC PATCH 2/3] remus: implement remus checkpoint in v2 save Yang Hongyang
2014-07-09 10:53   ` Andrew Cooper
2014-07-10  3:25     ` Hongyang Yang
2014-07-10  8:49       ` Ian Campbell
2014-07-10  9:24       ` Andrew Cooper
2014-07-16 15:22   ` Shriram Rajagopalan
2014-07-16 15:38     ` Andrew Cooper
2014-07-16 16:02       ` Shriram Rajagopalan
2014-07-16 16:33         ` Andrew Cooper
2014-07-09  7:47 ` [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus Yang Hongyang
2014-07-09 11:16   ` Andrew Cooper [this message]
2014-07-09 11:26     ` Andrew Cooper
2014-07-10  3:30       ` Hongyang Yang
2014-07-10  9:25         ` Andrew Cooper
2014-07-10  9:32           ` Hongyang Yang
2014-07-10  9:42             ` Andrew Cooper
2014-07-10  9:47               ` Hongyang Yang
2014-07-09  8:53 ` [RFC PATCH 0/3] Remus: add remus support for migration v2 Ian Campbell
2014-07-09  9:56   ` Hongyang Yang
2014-07-09  9:42 ` Andrew Cooper
2014-07-09 10:06   ` Hongyang Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53BD2477.1070505@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).