From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Yang Hongyang <yanghy@cn.fujitsu.com>, xen-devel@lists.xen.org
Cc: rshriram@cs.ubc.ca, Ian.Jackson@eu.citrix.com, Ian.Campbell@citrix.com
Subject: Re: [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus
Date: Wed, 9 Jul 2014 12:16:07 +0100 [thread overview]
Message-ID: <53BD2477.1070505@citrix.com> (raw)
In-Reply-To: <1404892050-24650-4-git-send-email-yanghy@cn.fujitsu.com>
On 09/07/14 08:47, Yang Hongyang wrote:
> cache vcpu context when restore, and set context when stream
> complete.
Can you explain why this is needed? I can't see why it should be required.
>
> Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
> ---
> tools/libxc/saverestore/common.h | 6 +++
> tools/libxc/saverestore/restore_x86_pv.c | 67 ++++++++++++++++++++++----------
> 2 files changed, 53 insertions(+), 20 deletions(-)
>
> diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
> index 1dd9f51..ba54f83 100644
> --- a/tools/libxc/saverestore/common.h
> +++ b/tools/libxc/saverestore/common.h
> @@ -235,6 +235,12 @@ struct xc_sr_context
>
> /* Read-only mapping of guests shared info page */
> shared_info_any_t *shinfo;
> +
> + /*
> + * We need to cache vcpu context when doing checkpointed
> + * restore, otherwise restore will fail
> + */
> + vcpu_guest_context_any_t *vcpu;
"vcpus" seems more appropriate.
> } x86_pv;
> };
> };
> diff --git a/tools/libxc/saverestore/restore_x86_pv.c b/tools/libxc/saverestore/restore_x86_pv.c
> index 14fc020..7a54047 100644
> --- a/tools/libxc/saverestore/restore_x86_pv.c
> +++ b/tools/libxc/saverestore/restore_x86_pv.c
> @@ -428,8 +428,8 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> {
> xc_interface *xch = ctx->xch;
> struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
> - vcpu_guest_context_any_t vcpu;
> - size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu.x64) : sizeof(vcpu.x32);
> + vcpu_guest_context_any_t *vcpu;
> + size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);
Constructs like this are used in several places. A
static inline size_t pv_vcpu_size(const struct xc_sr_context *ctx)
helper in common_x86.h would be a neat improvement.
> xen_pfn_t pfn, mfn;
> unsigned long tmp;
> unsigned i;
> @@ -455,22 +455,23 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> goto err;
> }
>
> - memcpy(&vcpu, &vhdr->context, vcpusz);
> + vcpu = (void *)ctx->x86_pv.vcpu + vcpusz * vhdr->vcpu_id;
vhdr->vcpu_id is essentially untrusted input at this point. It needs to
be validated against dominfo.max_vcpu_id if it is to be used like this.
It was safe before as the first place it was used was with the set
hypercall which would validate in Xen.
> + memcpy(vcpu, &vhdr->context, vcpusz);
If you are caching the context, the rest of this function can also be
deferred until the end, so it is run once rather than N times.
>
> /* Vcpu 0 is special: Convert the suspend record to an mfn. */
> if ( vhdr->vcpu_id == 0 )
> {
> - rc = process_start_info(ctx, &vcpu);
> + rc = process_start_info(ctx, vcpu);
> if ( rc )
> return rc;
> rc = -1;
> }
>
> - SET_FIELD(&vcpu, flags,
> - GET_FIELD(&vcpu, flags, ctx->x86_pv.width) | VGCF_online,
> + SET_FIELD(vcpu, flags,
> + GET_FIELD(vcpu, flags, ctx->x86_pv.width) | VGCF_online,
> ctx->x86_pv.width);
>
> - tmp = GET_FIELD(&vcpu, gdt_ents, ctx->x86_pv.width);
> + tmp = GET_FIELD(vcpu, gdt_ents, ctx->x86_pv.width);
> if ( tmp > 8192 )
> {
> ERROR("GDT entry count (%lu) out of range", tmp);
> @@ -481,7 +482,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> /* Convert GDT frames to mfns. */
> for ( i = 0; (i * 512) < tmp; ++i )
> {
> - pfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86_pv.width);
> + pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86_pv.width);
> if ( pfn > ctx->x86_pv.max_pfn )
> {
> ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
> @@ -502,11 +503,11 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> goto err;
> }
>
> - SET_FIELD(&vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
> + SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
> }
>
> /* Convert CR3 to an mfn. */
> - pfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86_pv.width));
> + pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86_pv.width));
> if ( pfn > ctx->x86_pv.max_pfn )
> {
> ERROR("cr3 (pfn %#lx) out of range", pfn);
> @@ -529,12 +530,12 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> goto err;
> }
>
> - SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
> + SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
>
> /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
> - if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
> + if ( ctx->x86_pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
> {
> - pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
> + pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
>
> if ( pfn > ctx->x86_pv.max_pfn )
> {
> @@ -558,13 +559,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context *ctx, struct xc_sr_reco
> goto err;
> }
>
> - vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
> - }
> -
> - if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
> - {
> - PERROR("Failed to set vcpu%u's basic info", vhdr->vcpu_id);
> - goto err;
> + vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
> }
>
> rc = 0;
> @@ -881,6 +876,7 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx, uint32_t type, void *
> static int x86_pv_setup(struct xc_sr_context *ctx)
> {
> xc_interface *xch = ctx->xch;
> + size_t vcpusz;
> int rc;
>
> if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
> @@ -904,6 +900,9 @@ static int x86_pv_setup(struct xc_sr_context *ctx)
> if ( rc )
> return rc;
>
> + vcpusz = ctx->x86_pv.width == 8 ?
> + sizeof(ctx->x86_pv.vcpu->x64) : sizeof(ctx->x86_pv.vcpu->x32);
> + ctx->x86_pv.vcpu = calloc((ctx->dominfo.max_vcpu_id + 1) , vcpusz);
Extraneous brackets and space.
> return rc;
> }
>
> @@ -946,6 +945,29 @@ static int x86_pv_process_record(struct xc_sr_context *ctx, struct xc_sr_record
> }
> }
>
> +static int update_vcpu_context(struct xc_sr_context *ctx)
> +{
> + xc_interface *xch = ctx->xch;
> + vcpu_guest_context_any_t *vcpu;
> + uint32_t vcpu_id;
> + size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : sizeof(vcpu->x32);
> +
> + for ( vcpu_id = 0; vcpu_id <= ctx->dominfo.max_vcpu_id; vcpu_id++ )
> + {
> + vcpu = (void *)ctx->x86_pv.vcpu + vcpu_id * vcpusz;
> + if ( !vcpu )
> + continue;
This check can never fail.
> +
> + if ( xc_vcpu_setcontext(xch, ctx->domid, vcpu_id, vcpu) )
There is a latent bug introduced here. If X86_PV_VCPU_BASIC records
have not been seen for all vcpus, this code will attempt to set a
context of a block of zeroes, and fail because of a bad cr3.
~Andrew
> + {
> + PERROR("Failed to set vcpu%u's basic info", vcpu_id);
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> /*
> * restore_ops function. Pin the pagetables, rewrite the p2m and seed the
> * grant table.
> @@ -963,6 +985,10 @@ static int x86_pv_stream_complete(struct xc_sr_context *ctx)
> if ( rc )
> return rc;
>
> + rc = update_vcpu_context(ctx);
> + if ( rc )
> + return rc;
> +
> rc = xc_dom_gnttab_seed(xch, ctx->domid,
> ctx->restore.console_mfn,
> ctx->restore.xenstore_mfn,
> @@ -985,6 +1011,7 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx)
> free(ctx->x86_pv.p2m);
> free(ctx->x86_pv.p2m_pfns);
> free(ctx->x86_pv.pfn_types);
> + free(ctx->x86_pv.vcpu);
>
> if ( ctx->x86_pv.m2p )
> munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);
next prev parent reply other threads:[~2014-07-09 11:16 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-07-09 7:47 [RFC PATCH 0/3] Remus: add remus support for migration v2 Yang Hongyang
2014-07-09 7:47 ` [RFC PATCH 1/3] remus: add a bool var to indicate checkpointed stream Yang Hongyang
2014-07-09 9:45 ` Andrew Cooper
2014-07-09 9:53 ` Hongyang Yang
2014-07-09 7:47 ` [RFC PATCH 2/3] remus: implement remus checkpoint in v2 save Yang Hongyang
2014-07-09 10:53 ` Andrew Cooper
2014-07-10 3:25 ` Hongyang Yang
2014-07-10 8:49 ` Ian Campbell
2014-07-10 9:24 ` Andrew Cooper
2014-07-16 15:22 ` Shriram Rajagopalan
2014-07-16 15:38 ` Andrew Cooper
2014-07-16 16:02 ` Shriram Rajagopalan
2014-07-16 16:33 ` Andrew Cooper
2014-07-09 7:47 ` [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus Yang Hongyang
2014-07-09 11:16 ` Andrew Cooper [this message]
2014-07-09 11:26 ` Andrew Cooper
2014-07-10 3:30 ` Hongyang Yang
2014-07-10 9:25 ` Andrew Cooper
2014-07-10 9:32 ` Hongyang Yang
2014-07-10 9:42 ` Andrew Cooper
2014-07-10 9:47 ` Hongyang Yang
2014-07-09 8:53 ` [RFC PATCH 0/3] Remus: add remus support for migration v2 Ian Campbell
2014-07-09 9:56 ` Hongyang Yang
2014-07-09 9:42 ` Andrew Cooper
2014-07-09 10:06 ` Hongyang Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53BD2477.1070505@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=Ian.Campbell@citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=rshriram@cs.ubc.ca \
--cc=xen-devel@lists.xen.org \
--cc=yanghy@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).