Re: [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Yang Hongyang <yanghy@cn.fujitsu.com>, xen-devel@lists.xen.org
Cc: rshriram@cs.ubc.ca, Ian.Jackson@eu.citrix.com, Ian.Campbell@citrix.com
Subject: Re: [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus
Date: Wed, 9 Jul 2014 12:16:07 +0100	[thread overview]
Message-ID: <53BD2477.1070505@citrix.com> (raw)
In-Reply-To: <1404892050-24650-4-git-send-email-yanghy@cn.fujitsu.com>

On 09/07/14 08:47, Yang Hongyang wrote:
> cache vcpu context when restore, and set context when stream
> complete.

Can you explain why this is needed?  I can't see why it should be required.

>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
>  tools/libxc/saverestore/common.h         |  6 +++
>  tools/libxc/saverestore/restore_x86_pv.c | 67 ++++++++++++++++++++++----------
>  2 files changed, 53 insertions(+), 20 deletions(-)
>
> diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
> index 1dd9f51..ba54f83 100644
> --- a/tools/libxc/saverestore/common.h
> +++ b/tools/libxc/saverestore/common.h
> @@ -235,6 +235,12 @@ struct xc_sr_context
>  
>              /* Read-only mapping of guests shared info page */
>              shared_info_any_t *shinfo;
> +
> +            /*
> +             * We need to cache vcpu context when doing checkpointed
> +             * restore, otherwise restore will fail
> +             */
> +            vcpu_guest_context_any_t *vcpu;

"vcpus" seems more appropriate.

>          } x86_pv;
>      };
>  };
> diff --git a/tools/libxc/saverestore/restore_x86_pv.c b/tools/libxc/saverestore/restore_x86_pv.c
> index 14fc020..7a54047 100644
> --- a/tools/libxc/saverestore/restore_x86_pv.c
> +++ b/tools/libxc/saverestore/restore_x86_pv.c
> @@ -428,8 +428,8 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>  {
>      xc_interface *xch = ctx->xch;
>      struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
> -    vcpu_guest_context_any_t vcpu;
> -    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu.x64) : sizeof(vcpu.x32);
> +    vcpu_guest_context_any_t *vcpu;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);

Constructs like this are used in several places.  A

static inline size_t pv_vcpu_size(const struct xc_sr_context *ctx)

helper in common_x86.h would be a neat improvement.

>      xen_pfn_t pfn, mfn;
>      unsigned long tmp;
>      unsigned i;
> @@ -455,22 +455,23 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    memcpy(&vcpu, &vhdr->context, vcpusz);
> +    vcpu = (void *)ctx->x86_pv.vcpu + vcpusz * vhdr->vcpu_id;

vhdr->vcpu_id is essentially untrusted input at this point.  It needs to
be validated against dominfo.max_vcpu_id if it is to be used like this.

It was safe before as the first place it was used was with the set
hypercall which would validate in Xen.

> +    memcpy(vcpu, &vhdr->context, vcpusz);

If you are caching the context, the rest of this function can also be
deferred until the end, so it is run once rather than N times.

>  
>      /* Vcpu 0 is special: Convert the suspend record to an mfn. */
>      if ( vhdr->vcpu_id == 0 )
>      {
> -        rc = process_start_info(ctx, &vcpu);
> +        rc = process_start_info(ctx, vcpu);
>          if ( rc )
>              return rc;
>          rc = -1;
>      }
>  
> -    SET_FIELD(&vcpu, flags,
> -              GET_FIELD(&vcpu, flags, ctx->x86_pv.width) | VGCF_online,
> +    SET_FIELD(vcpu, flags,
> +              GET_FIELD(vcpu, flags, ctx->x86_pv.width) | VGCF_online,
>                ctx->x86_pv.width);
>  
> -    tmp = GET_FIELD(&vcpu, gdt_ents, ctx->x86_pv.width);
> +    tmp = GET_FIELD(vcpu, gdt_ents, ctx->x86_pv.width);
>      if ( tmp > 8192 )
>      {
>          ERROR("GDT entry count (%lu) out of range", tmp);
> @@ -481,7 +482,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>      /* Convert GDT frames to mfns. */
>      for ( i = 0; (i * 512) < tmp; ++i )
>      {
> -        pfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86_pv.width);
> +        pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86_pv.width);
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
>              ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
> @@ -502,11 +503,11 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        SET_FIELD(&vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
> +        SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
>      }
>  
>      /* Convert CR3 to an mfn. */
> -    pfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86_pv.width));
> +    pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86_pv.width));
>      if ( pfn > ctx->x86_pv.max_pfn )
>      {
>          ERROR("cr3 (pfn %#lx) out of range", pfn);
> @@ -529,12 +530,12 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
> +    SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
>  
>      /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
> -    if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
> +    if ( ctx->x86_pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
>      {
> -        pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
> +        pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
>  
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
> @@ -558,13 +559,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
> -    }
> -
> -    if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
> -    {
> -        PERROR("Failed to set vcpu%u's basic info", vhdr->vcpu_id);
> -        goto err;
> +        vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
>      }
>  
>      rc = 0;
> @@ -881,6 +876,7 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx, uint32_t type, void *
>  static int x86_pv_setup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    size_t vcpusz;
>      int rc;
>  
>      if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
> @@ -904,6 +900,9 @@ static int x86_pv_setup(struct xc_sr_context *ctx)
>      if ( rc )
>          return rc;
>  
> +    vcpusz = ctx->x86_pv.width == 8 ?
> +             sizeof(ctx->x86_pv.vcpu->x64) : sizeof(ctx->x86_pv.vcpu->x32);
> +    ctx->x86_pv.vcpu = calloc((ctx->dominfo.max_vcpu_id + 1) , vcpusz);

Extraneous brackets and space.

>      return rc;
>  }
>  
> @@ -946,6 +945,29 @@ static int x86_pv_process_record(struct xc_sr_context *ctx, struct xc_sr_record
>      }
>  }
>  
> +static int update_vcpu_context(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    vcpu_guest_context_any_t *vcpu;
> +    uint32_t vcpu_id;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);
> +
> +    for ( vcpu_id = 0; vcpu_id <= ctx->dominfo.max_vcpu_id; vcpu_id++ )
> +    {
> +        vcpu = (void *)ctx->x86_pv.vcpu + vcpu_id * vcpusz;
> +        if ( !vcpu )
> +            continue;

This check can never fail.

> +
> +        if ( xc_vcpu_setcontext(xch, ctx->domid, vcpu_id, vcpu) )

There is a latent bug introduced here.  If X86_PV_VCPU_BASIC records
have not been seen for all vcpus, this code will attempt to set a
context of a block of zeroes, and fail because of a bad cr3.

~Andrew

> +        {
> +            PERROR("Failed to set vcpu%u's basic info", vcpu_id);
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  /*
>   * restore_ops function.  Pin the pagetables, rewrite the p2m and seed the
>   * grant table.
> @@ -963,6 +985,10 @@ static int x86_pv_stream_complete(struct xc_sr_context *ctx)
>      if ( rc )
>          return rc;
>  
> +    rc = update_vcpu_context(ctx);
> +    if ( rc )
> +        return rc;
> +
>      rc = xc_dom_gnttab_seed(xch, ctx->domid,
>                              ctx->restore.console_mfn,
>                              ctx->restore.xenstore_mfn,
> @@ -985,6 +1011,7 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx)
>      free(ctx->x86_pv.p2m);
>      free(ctx->x86_pv.p2m_pfns);
>      free(ctx->x86_pv.pfn_types);
> +    free(ctx->x86_pv.vcpu);
>  
>      if ( ctx->x86_pv.m2p )
>          munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);

next prev parent reply	other threads:[~2014-07-09 11:16 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-09  7:47 [RFC PATCH 0/3] Remus: add remus support for migration v2 Yang Hongyang
2014-07-09  7:47 ` [RFC PATCH 1/3] remus: add a bool var to indicate checkpointed stream Yang Hongyang
2014-07-09  9:45   ` Andrew Cooper
2014-07-09  9:53     ` Hongyang Yang
2014-07-09  7:47 ` [RFC PATCH 2/3] remus: implement remus checkpoint in v2 save Yang Hongyang
2014-07-09 10:53   ` Andrew Cooper
2014-07-10  3:25     ` Hongyang Yang
2014-07-10  8:49       ` Ian Campbell
2014-07-10  9:24       ` Andrew Cooper
2014-07-16 15:22   ` Shriram Rajagopalan
2014-07-16 15:38     ` Andrew Cooper
2014-07-16 16:02       ` Shriram Rajagopalan
2014-07-16 16:33         ` Andrew Cooper
2014-07-09  7:47 ` [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus Yang Hongyang
2014-07-09 11:16   ` Andrew Cooper [this message]
2014-07-09 11:26     ` Andrew Cooper
2014-07-10  3:30       ` Hongyang Yang
2014-07-10  9:25         ` Andrew Cooper
2014-07-10  9:32           ` Hongyang Yang
2014-07-10  9:42             ` Andrew Cooper
2014-07-10  9:47               ` Hongyang Yang
2014-07-09  8:53 ` [RFC PATCH 0/3] Remus: add remus support for migration v2 Ian Campbell
2014-07-09  9:56   ` Hongyang Yang
2014-07-09  9:42 ` Andrew Cooper
2014-07-09 10:06   ` Hongyang Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53BD2477.1070505@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.