All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/8] HVM save restore: vcpu context support
@ 2007-01-11 14:10 Zhai, Edwin
  2007-01-11 17:38 ` Anthony Liguori
  2007-01-19 15:30 ` Petersson, Mats
  0 siblings, 2 replies; 7+ messages in thread
From: Zhai, Edwin @ 2007-01-11 14:10 UTC (permalink / raw)
  To: Ian Pratt, Keir Fraser; +Cc: xen-devel, edwin.zhai

[PATCH 4/8] HVM save restore: vcpu context support

Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>

save/restore HVM vcpu context such as vmcs

diff -r ee20d1905bde xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domain.c	Thu Jan 11 16:46:59 2007 +0800
@@ -573,6 +573,7 @@ int arch_set_info_guest(
     else
     {
         hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs);
+        hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt);
     }
 
     if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
diff -r ee20d1905bde xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domctl.c	Thu Jan 11 16:49:34 2007 +0800
@@ -322,8 +322,10 @@ void arch_get_info_guest(struct vcpu *v,
 
     if ( is_hvm_vcpu(v) )
     {
-        if ( !IS_COMPAT(v->domain) )
+        if ( !IS_COMPAT(v->domain) ) {
             hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg);
+            hvm_save_cpu_context(v, &c.nat->hvmcpu_ctxt);
+        }
 #ifdef CONFIG_COMPAT
         else
         {
diff -r ee20d1905bde xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Jan 11 16:48:01 2007 +0800
@@ -429,6 +429,299 @@ static void vmx_store_cpu_guest_regs(
     vmx_vmcs_exit(v);
 }
 
+static int __get_instruction_length(void);
+int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c)
+{
+    unsigned long inst_len;
+
+    inst_len = __get_instruction_length();
+    c->eip = __vmread(GUEST_RIP);
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n", 
+            inst_len, c->eip);
+#endif
+
+    c->esp = __vmread(GUEST_RSP);
+    c->eflags = __vmread(GUEST_RFLAGS);
+
+    c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+    c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+    c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
+            c->cr3,
+            c->cr0,
+            c->cr4);
+#endif
+
+    c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
+    c->idtr_base = __vmread(GUEST_IDTR_BASE);
+
+    c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
+    c->gdtr_base = __vmread(GUEST_GDTR_BASE);
+
+    c->cs_sel = __vmread(GUEST_CS_SELECTOR);
+    c->cs_limit = __vmread(GUEST_CS_LIMIT);
+    c->cs_base = __vmread(GUEST_CS_BASE);
+    c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
+
+    c->ds_sel = __vmread(GUEST_DS_SELECTOR);
+    c->ds_limit = __vmread(GUEST_DS_LIMIT);
+    c->ds_base = __vmread(GUEST_DS_BASE);
+    c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
+
+    c->es_sel = __vmread(GUEST_ES_SELECTOR);
+    c->es_limit = __vmread(GUEST_ES_LIMIT);
+    c->es_base = __vmread(GUEST_ES_BASE);
+    c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
+
+    c->ss_sel = __vmread(GUEST_SS_SELECTOR);
+    c->ss_limit = __vmread(GUEST_SS_LIMIT);
+    c->ss_base = __vmread(GUEST_SS_BASE);
+    c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
+
+    c->fs_sel = __vmread(GUEST_FS_SELECTOR);
+    c->fs_limit = __vmread(GUEST_FS_LIMIT);
+    c->fs_base = __vmread(GUEST_FS_BASE);
+    c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
+
+    c->gs_sel = __vmread(GUEST_GS_SELECTOR);
+    c->gs_limit = __vmread(GUEST_GS_LIMIT);
+    c->gs_base = __vmread(GUEST_GS_BASE);
+    c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
+
+    c->tr_sel = __vmread(GUEST_TR_SELECTOR);
+    c->tr_limit = __vmread(GUEST_TR_LIMIT);
+    c->tr_base = __vmread(GUEST_TR_BASE);
+    c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
+
+    c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
+    c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
+    c->ldtr_base = __vmread(GUEST_LDTR_BASE);
+    c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
+
+    c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
+    c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
+    c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
+
+    return 1;
+}
+
+int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c)
+{
+    unsigned long mfn, old_base_mfn;
+
+    vmx_vmcs_enter(v);
+
+    __vmwrite(GUEST_RIP, c->eip);
+    __vmwrite(GUEST_RSP, c->esp);
+    __vmwrite(GUEST_RFLAGS, c->eflags);
+
+    v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
+    __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
+            c->cr3,
+            c->cr0,
+            c->cr4);
+#endif
+
+    if (!vmx_paging_enabled(v)) {
+        printk("vmx_vmcs_restore: paging not enabled.");
+        goto skip_cr3;
+    }
+
+    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+        /*
+         * This is simple TLB flush, implying the guest has
+         * removed some translation or changed page attributes.
+         * We simply invalidate the shadow.
+         */
+        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+            goto bad_cr3;
+        }
+    } else {
+        /*
+         * If different, make a shadow. Check if the PDBR is valid
+         * first.
+         */
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
+        /* current!=vcpu as not called by arch_vmx_do_launch */
+        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+            goto bad_cr3;
+        }
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
+        if (old_base_mfn)
+             put_page(mfn_to_page(old_base_mfn));
+        /*
+         * arch.shadow_table should now hold the next CR3 for shadow
+         */
+        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+    }
+
+ skip_cr3:
+#if defined(__x86_64__)
+    if (vmx_long_mode_enabled(v)) {
+        unsigned long vm_entry_value;
+        vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
+        vm_entry_value |= VM_ENTRY_IA32E_MODE;
+        __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+    }
+#endif
+
+    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
+    __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+
+    __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+    __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+    __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+    __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+    __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+    __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+    __vmwrite(GUEST_CS_BASE, c->cs_base);
+    __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
+
+    __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+    __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+    __vmwrite(GUEST_DS_BASE, c->ds_base);
+    __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
+
+    __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+    __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+    __vmwrite(GUEST_ES_BASE, c->es_base);
+    __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
+
+    __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+    __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+    __vmwrite(GUEST_SS_BASE, c->ss_base);
+    __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
+
+    __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+    __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+    __vmwrite(GUEST_FS_BASE, c->fs_base);
+    __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
+
+    __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+    __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+    __vmwrite(GUEST_GS_BASE, c->gs_base);
+    __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
+
+    __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+    __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+    __vmwrite(GUEST_TR_BASE, c->tr_base);
+    __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
+
+    __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+    __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+    __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+    __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
+
+    __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
+    __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
+    __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+
+    vmx_vmcs_exit(v);
+
+    shadow_update_paging_modes(v);
+    return 0;
+
+ bad_cr3:
+    gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
+    vmx_vmcs_exit(v);
+    return -EINVAL;
+}
+
+#ifdef HVM_DEBUG_SUSPEND
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+    int i = 0;
+    printk("**** msr state ****\n");
+    printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        printk("0x%lx,", m->msrs[i]);
+    printk("\n");
+}
+#else
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+}
+#endif
+        
+void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    struct vmcs_data *data = &ctxt->data;
+    struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+    unsigned long guest_flags = guest_state->flags;
+    int i = 0;
+
+    data->shadow_gs = guest_state->shadow_gs;
+    data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled;
+    /* save msrs */
+    data->flags = guest_flags;
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        data->msr_items[i] = guest_state->msrs[i];
+
+    dump_msr_state(guest_state);
+}
+
+void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    int i = 0;
+    struct vmcs_data *data = &ctxt->data;
+    struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+
+    /* restore msrs */
+    guest_state->flags = data->flags;
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        guest_state->msrs[i] = data->msr_items[i];
+
+    guest_state->shadow_gs = data->shadow_gs;
+
+    /*XXX:no need to restore msrs, current!=vcpu as not called by arch_vmx_do_launch */
+/*    vmx_restore_guest_msrs(v);*/
+
+    v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled;
+
+    dump_msr_state(guest_state);
+}
+
+void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    struct vmcs_data *data = &ctxt->data;
+
+    vmx_save_cpu_state(v, ctxt);
+
+    vmx_vmcs_enter(v);
+
+    vmx_vmcs_save(v, data);
+
+    vmx_vmcs_exit(v);
+
+}
+
+void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    vmx_load_cpu_state(v, ctxt);
+
+    if (vmx_vmcs_restore(v, &ctxt->data)) {
+        printk("vmx_vmcs restore failed!\n");
+        domain_crash(v->domain);
+    }
+
+    /* only load vmcs once */
+    ctxt->valid = 0;
+
+}
+
 /*
  * The VMX spec (section 4.3.1.2, Checks on Guest Segment
  * Registers) says that virtual-8086 mode guests' segment
@@ -750,6 +1043,9 @@ static void vmx_setup_hvm_funcs(void)
 
     hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
     hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+    hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt;
+    hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt;
 
     hvm_funcs.paging_enabled = vmx_paging_enabled;
     hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
diff -r ee20d1905bde xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/asm-x86/hvm/hvm.h	Thu Jan 11 16:48:01 2007 +0800
@@ -79,6 +79,13 @@ struct hvm_function_table {
         struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs);
     void (*load_cpu_guest_regs)(
         struct vcpu *v, struct cpu_user_regs *r);
+
+    /* save and load hvm guest cpu context for save/restore */
+    void (*save_cpu_ctxt)(
+        struct vcpu *v, struct hvmcpu_context *ctxt);
+    void (*load_cpu_ctxt)(
+        struct vcpu *v, struct hvmcpu_context *ctxt);
+
     /*
      * Examine specifics of the guest state:
      * 1) determine whether paging is enabled,
@@ -157,6 +164,35 @@ hvm_load_cpu_guest_regs(struct vcpu *v, 
     hvm_funcs.load_cpu_guest_regs(v, r);
 }
 
+void hvm_set_guest_time(struct vcpu *v, u64 gtime);
+u64 hvm_get_guest_time(struct vcpu *v);
+
+static inline void
+hvm_save_cpu_context(
+        struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    hvm_funcs.save_cpu_ctxt(v, ctxt);
+
+    /* save guest time */
+    ctxt->gtime = hvm_get_guest_time(v);
+
+    /* set valid flag to recover whole vmcs when restore */
+    ctxt->valid = 0x55885588;
+}
+
+static inline void
+hvm_load_cpu_context(
+        struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    if ( ctxt->valid != 0x55885588)
+        return;
+
+    hvm_funcs.load_cpu_ctxt(v, ctxt);
+
+    /* restore guest time*/
+    hvm_set_guest_time(v, ctxt->gtime);
+}
+
 static inline int
 hvm_paging_enabled(struct vcpu *v)
 {
@@ -222,8 +258,6 @@ void hvm_cpuid(unsigned int input, unsig
 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
 void hvm_stts(struct vcpu *v);
-void hvm_set_guest_time(struct vcpu *v, u64 gtime);
-u64 hvm_get_guest_time(struct vcpu *v);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
 
diff -r ee20d1905bde xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/public/arch-x86/xen.h	Thu Jan 11 16:51:03 2007 +0800
@@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * World vmcs state
+ */
+struct vmcs_data {
+    uint64_t  eip;        /* execution pointer */
+    uint64_t  esp;        /* stack pointer */
+    uint64_t  eflags;     /* flags register */
+    uint64_t  cr0;
+    uint64_t  cr3;        /* page table directory */
+    uint64_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint64_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint64_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint64_t  cs_base;
+    uint32_t  cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint64_t  ds_base;
+    uint32_t  ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint64_t  es_base;
+    uint32_t  es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint64_t  ss_base;
+    uint32_t  ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint64_t  fs_base;
+    uint32_t  fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint64_t  gs_base;
+    uint32_t  gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint64_t  tr_base;
+    uint32_t  tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint64_t  ldtr_base;
+    uint32_t  ldtr_arbytes;
+    uint32_t  sysenter_cs;
+    uint64_t  sysenter_esp;
+    uint64_t  sysenter_eip;
+    /* msr for em64t */
+    uint64_t shadow_gs;
+    uint64_t flags;
+    /* same size as VMX_MSR_COUNT */
+    uint64_t msr_items[6];
+    uint64_t vmxassist_enabled;
+};
+typedef struct vmcs_data vmcs_data_t;
+
+struct hvmcpu_context {
+    uint32_t valid;
+    struct vmcs_data data;
+    uint64_t gtime;
+};
 
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
@@ -154,6 +218,7 @@ struct vcpu_guest_context {
 #endif
 #endif
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+    struct hvmcpu_context hvmcpu_ctxt;      /* whole vmcs region */
 #ifdef __x86_64__
     /* Segment base addresses. */
     uint64_t      fs_base;
diff -r ee20d1905bde xen/include/xlat.lst
--- a/xen/include/xlat.lst	Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/xlat.lst	Thu Jan 11 16:51:35 2007 +0800
@@ -8,6 +8,8 @@
 ?	vcpu_time_info			xen.h
 !	cpu_user_regs			arch-x86/xen-@arch@.h
 !	trap_info			arch-x86/xen.h
+!	hvmcpu_context			arch-x86/xen.h
+!	vmcs_data			arch-x86/xen.h
 !	vcpu_guest_context		arch-x86/xen.h
 ?	acm_getdecision			acm_ops.h
 !	ctl_cpumap			domctl.h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-11 14:10 [PATCH 4/8] HVM save restore: vcpu context support Zhai, Edwin
@ 2007-01-11 17:38 ` Anthony Liguori
  2007-01-12  1:59   ` Zhai, Edwin
  2007-01-19 15:30 ` Petersson, Mats
  1 sibling, 1 reply; 7+ messages in thread
From: Anthony Liguori @ 2007-01-11 17:38 UTC (permalink / raw)
  To: Zhai, Edwin; +Cc: xen-devel

Zhai, Edwin wrote:
> [PATCH 4/8] HVM save restore: vcpu context support
> 
> Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
> 
>  typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
> +
> +/*
> + * World vmcs state
> + */
> +struct vmcs_data {
> +    uint64_t  eip;        /* execution pointer */
> +    uint64_t  esp;        /* stack pointer */
> +    uint64_t  eflags;     /* flags register */
> +    uint64_t  cr0;
> +    uint64_t  cr3;        /* page table directory */
> +    uint64_t  cr4;
> +    uint32_t  idtr_limit; /* idt */
> +    uint64_t  idtr_base;

If I read the code correctly, vmcs_data ends up becoming part of:

+
+#define HVM_CTXT_SIZE        6144
+typedef struct hvm_domain_context {
+    uint32_t cur;
+    uint32_t size;
+    uint8_t data[HVM_CTXT_SIZE];
+} hvm_domain_context_t;
+DEFINE_XEN_GUEST_HANDLE(hvm_domain_context_t);

Which then gets saved to disk.  My first concern would be that struct 
vmcs_data is not padding safe.  How idtr_limit gets padding may change 
in future versions of GCC which would break the save format.

The second is how HVM_CTXT_SIZE gets defined.  Not sure there's a great 
way to address though (although the first issue is definitely fixable).

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-11 17:38 ` Anthony Liguori
@ 2007-01-12  1:59   ` Zhai, Edwin
  0 siblings, 0 replies; 7+ messages in thread
From: Zhai, Edwin @ 2007-01-12  1:59 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: xen-devel, Zhai, Edwin

On Thu, Jan 11, 2007 at 11:38:34AM -0600, Anthony Liguori wrote:
> Zhai, Edwin wrote:
> >[PATCH 4/8] HVM save restore: vcpu context support
> >
> >Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
> >
> > typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
> >+
> >+/*
> >+ * World vmcs state
> >+ */
> >+struct vmcs_data {
> >+    uint64_t  eip;        /* execution pointer */
> >+    uint64_t  esp;        /* stack pointer */
> >+    uint64_t  eflags;     /* flags register */
> >+    uint64_t  cr0;
> >+    uint64_t  cr3;        /* page table directory */
> >+    uint64_t  cr4;
> >+    uint32_t  idtr_limit; /* idt */
> >+    uint64_t  idtr_base;
> 
> If I read the code correctly, vmcs_data ends up becoming part of:
> 
> +
> +#define HVM_CTXT_SIZE        6144
> +typedef struct hvm_domain_context {
> +    uint32_t cur;
> +    uint32_t size;
> +    uint8_t data[HVM_CTXT_SIZE];
> +} hvm_domain_context_t;
> +DEFINE_XEN_GUEST_HANDLE(hvm_domain_context_t);

vmcs_data ends up as part of vcpu_guest_context. hvm_domain_context is a long 
buffer for saving dev state in HV.

> 
> Which then gets saved to disk.  My first concern would be that struct 
> vmcs_data is not padding safe.  How idtr_limit gets padding may change 
> in future versions of GCC which would break the save format.
> 
> The second is how HVM_CTXT_SIZE gets defined.  Not sure there's a great 

i just define a big enough buffer for hvm context and handle overflow.

the data true length dynamically increase when more vcpus come out, so seems
hard to let control panel know it. 

> way to address though (although the first issue is definitely fixable).
> 
> Regards,
> 
> Anthony Liguori
> 

-- 
best rgds,
edwin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-11 14:10 [PATCH 4/8] HVM save restore: vcpu context support Zhai, Edwin
  2007-01-11 17:38 ` Anthony Liguori
@ 2007-01-19 15:30 ` Petersson, Mats
  2007-01-19 15:39   ` Tim Deegan
  1 sibling, 1 reply; 7+ messages in thread
From: Petersson, Mats @ 2007-01-19 15:30 UTC (permalink / raw)
  To: Zhai, Edwin, Ian Pratt, Keir Fraser; +Cc: xen-devel

I know this is late, because the patch has alreyad gone in, and
appologies if I've misunderstood something here... 

[snip

> +++ b/xen/include/public/arch-x86/xen.h	Thu Jan 11 
Xen.h is a generic file
> 16:51:03 2007 +0800
> @@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
>  DEFINE_XEN_GUEST_HANDLE(trap_info_t);
>  
>  typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
> +
> +/*
> + * World vmcs state
> + */
> +struct vmcs_data {
The name vmcs data indicate that it's Intel architecture specific to me.


Even if we agree on a format that supports both AMD and Intel, shouldn't
this sort of info be in some HVM specific file? [I haven't yet looked at
the differences that may exisit between AMD and Intel]. 


> +    uint64_t  eip;        /* execution pointer */
> +    uint64_t  esp;        /* stack pointer */
> +    uint64_t  eflags;     /* flags register */
> +    uint64_t  cr0;
> +    uint64_t  cr3;        /* page table directory */
> +    uint64_t  cr4;
> +    uint32_t  idtr_limit; /* idt */
> +    uint64_t  idtr_base;
> +    uint32_t  gdtr_limit; /* gdt */
> +    uint64_t  gdtr_base;
> +    uint32_t  cs_sel;     /* cs selector */
> +    uint32_t  cs_limit;
> +    uint64_t  cs_base;
> +    uint32_t  cs_arbytes;
Defining a struct for the segment info that is then used for all
segments would make this repeat a whole lot less... 
> +    uint32_t  ds_sel;     /* ds selector */
> +    uint32_t  ds_limit;
> +    uint64_t  ds_base;
> +    uint32_t  ds_arbytes;
> +    uint32_t  es_sel;     /* es selector */
> +    uint32_t  es_limit;
> +    uint64_t  es_base;
> +    uint32_t  es_arbytes;
> +    uint32_t  ss_sel;     /* ss selector */
> +    uint32_t  ss_limit;
> +    uint64_t  ss_base;
> +    uint32_t  ss_arbytes;
> +    uint32_t  fs_sel;     /* fs selector */
> +    uint32_t  fs_limit;
> +    uint64_t  fs_base;
> +    uint32_t  fs_arbytes;
> +    uint32_t  gs_sel;     /* gs selector */
> +    uint32_t  gs_limit;
> +    uint64_t  gs_base;
> +    uint32_t  gs_arbytes;
> +    uint32_t  tr_sel;     /* task selector */
> +    uint32_t  tr_limit;
> +    uint64_t  tr_base;
> +    uint32_t  tr_arbytes;
> +    uint32_t  ldtr_sel;   /* ldtr selector */
> +    uint32_t  ldtr_limit;
> +    uint64_t  ldtr_base;
> +    uint32_t  ldtr_arbytes;
> +    uint32_t  sysenter_cs;
> +    uint64_t  sysenter_esp;
> +    uint64_t  sysenter_eip;
> +    /* msr for em64t */
> +    uint64_t shadow_gs;
> +    uint64_t flags;
> +    /* same size as VMX_MSR_COUNT */
> +    uint64_t msr_items[6];
> +    uint64_t vmxassist_enabled;
> +};
> +typedef struct vmcs_data vmcs_data_t;

Is vmcs_data_t used anywhere?
> +
> +struct hvmcpu_context {
> +    uint32_t valid;
> +    struct vmcs_data data;
> +    uint64_t gtime;
> +};

Surely this struct should also be in a HVM directory, as it's specific
to HVM domains?

--
Mats
>  
>  /*
>   * The following is all CPU context. Note that the fpu_ctxt 
> block is filled 
> @@ -154,6 +218,7 @@ struct vcpu_guest_context {
>  #endif
>  #endif
>      unsigned long vm_assist;                /* VMASST_TYPE_* 
> bitmap */
> +    struct hvmcpu_context hvmcpu_ctxt;      /* whole vmcs region */
>  #ifdef __x86_64__
>      /* Segment base addresses. */
>      uint64_t      fs_base;
> diff -r ee20d1905bde xen/include/xlat.lst
> --- a/xen/include/xlat.lst	Thu Jan 11 16:40:55 2007 +0800
> +++ b/xen/include/xlat.lst	Thu Jan 11 16:51:35 2007 +0800
> @@ -8,6 +8,8 @@
>  ?	vcpu_time_info			xen.h
>  !	cpu_user_regs			arch-x86/xen-@arch@.h
>  !	trap_info			arch-x86/xen.h
> +!	hvmcpu_context			arch-x86/xen.h
> +!	vmcs_data			arch-x86/xen.h
>  !	vcpu_guest_context		arch-x86/xen.h
>  ?	acm_getdecision			acm_ops.h
>  !	ctl_cpumap			domctl.h
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
> 
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-19 15:30 ` Petersson, Mats
@ 2007-01-19 15:39   ` Tim Deegan
  2007-01-19 15:45     ` Petersson, Mats
  0 siblings, 1 reply; 7+ messages in thread
From: Tim Deegan @ 2007-01-19 15:39 UTC (permalink / raw)
  To: Petersson, Mats; +Cc: Ian Pratt, xen-devel, Zhai, Edwin

Hi, 

At 16:30 +0100 on 19 Jan (1169224231), Petersson, Mats wrote:
> The name vmcs data indicate that it's Intel architecture specific to me.
> 
> Even if we agree on a format that supports both AMD and Intel, shouldn't
> this sort of info be in some HVM specific file? [I haven't yet looked at
> the differences that may exisit between AMD and Intel]. 

I'll be committing some changes to the HVM save-restore in the next few
days.  This structure will be renamed and pulled out into a public
hvm-specific file (and reorganised quite a bit to make fields be aligned
nicely).  I hope that any AMD/Intel differences can be folded into
this structure -- can you let me know what fields you'll need?
 
Cheers,

Tim.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-19 15:39   ` Tim Deegan
@ 2007-01-19 15:45     ` Petersson, Mats
  2007-01-19 15:54       ` Keir Fraser
  0 siblings, 1 reply; 7+ messages in thread
From: Petersson, Mats @ 2007-01-19 15:45 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Ian Pratt, xen-devel, Zhai, Edwin

 

> -----Original Message-----
> From: Tim Deegan [mailto:Tim.Deegan@xensource.com] 
> Sent: 19 January 2007 15:39
> To: Petersson, Mats
> Cc: Zhai, Edwin; Ian Pratt; Keir Fraser; xen-devel@lists.xensource.com
> Subject: Re: [Xen-devel] [PATCH 4/8] HVM save restore: vcpu 
> context support
> 
> Hi, 
> 
> At 16:30 +0100 on 19 Jan (1169224231), Petersson, Mats wrote:
> > The name vmcs data indicate that it's Intel architecture 
> specific to me.
> > 
> > Even if we agree on a format that supports both AMD and 
> Intel, shouldn't
> > this sort of info be in some HVM specific file? [I haven't 
> yet looked at
> > the differences that may exisit between AMD and Intel]. 
> 
> I'll be committing some changes to the HVM save-restore in 
> the next few
> days.  This structure will be renamed and pulled out into a public
> hvm-specific file (and reorganised quite a bit to make fields 
> be aligned
> nicely).  I hope that any AMD/Intel differences can be folded into
> this structure -- can you let me know what fields you'll need?

Yes, of course. So I'll just add my fields to that struct if I need any
new ones, and you'll rename it later, is that the plan?

Of course, if you beat me to it with updating it, I'll just move along
with that. 

I only started looking at this a couple of hours ago... 

Is this supposed to be complete, or are there more patches coming?

--
Mats
>  
> Cheers,
> 
> Tim.
> 
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/8] HVM save restore: vcpu context support
  2007-01-19 15:45     ` Petersson, Mats
@ 2007-01-19 15:54       ` Keir Fraser
  0 siblings, 0 replies; 7+ messages in thread
From: Keir Fraser @ 2007-01-19 15:54 UTC (permalink / raw)
  To: Petersson, Mats, Tim Deegan; +Cc: Ian Pratt, xen-devel, Zhai, Edwin




On 19/1/07 15:45, "Petersson, Mats" <Mats.Petersson@amd.com> wrote:

> Yes, of course. So I'll just add my fields to that struct if I need any
> new ones, and you'll rename it later, is that the plan?
> 
> Of course, if you beat me to it with updating it, I'll just move along
> with that. 
> 
> I only started looking at this a couple of hours ago...
> 
> Is this supposed to be complete, or are there more patches coming?

This code is very much in flux. I'm certain there'll be more changes beyond
Tim's initial large-scale cleanup. The initial effort is to get the
interfaces looking along the right lines so we can all pull in the same
direction.

 -- Keir

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-01-19 15:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-11 14:10 [PATCH 4/8] HVM save restore: vcpu context support Zhai, Edwin
2007-01-11 17:38 ` Anthony Liguori
2007-01-12  1:59   ` Zhai, Edwin
2007-01-19 15:30 ` Petersson, Mats
2007-01-19 15:39   ` Tim Deegan
2007-01-19 15:45     ` Petersson, Mats
2007-01-19 15:54       ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.