All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] TSC scaling for live migration between platforms with different TSC frequecies
@ 2009-06-18  2:56 Zhang, Xiantao
  2009-06-18  7:37 ` [PATCH] TSC scaling for live migration betweenplatforms " Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 35+ messages in thread
From: Zhang, Xiantao @ 2009-06-18  2:56 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel@lists.xensource.com

[-- Attachment #1: Type: text/plain, Size: 790 bytes --]

Hi, Keir

    This patchset targets for enabling TSC scaling in software for live migration between platforms with different TSC frequecies.  Once found the target host's frequency is different with source host's, hypervisor will trap and emulate guest's all rdtsc instructions with its expected frequency.  
    If hardware's TSC frequency is difffernt with guest's exepcted freq, guest may behave abnormally, eg. incorrect wallclock, soft lockup, even hang in some cases.  Therefore, this patchset is necessary to avoid such issues. 

PATCH 0001-- Save guest's preferred TSC in image for save/restore and migration
PATCH 0002-- Move multidiv64 as a library function. 
PATCH 0003-- Scaling host TSC freqeuncy patch. 

Signed-off-by Xiantao Zhang <xiantao.zhang@intel.com>
Xiantao

[-- Attachment #2: 0001-save_tsc_frequency.patch --]
[-- Type: application/octet-stream, Size: 3964 bytes --]

# HG changeset patch
# User root@localhost.localdomain
# Date 1245206466 14400
# Node ID 4015e09394c1857a6e68e6d6909d11cf5ecba241
# Parent  f8187a343ad2bdbfe3166d7ee7e3d55a9f157fdc
save/restore : Save guest's preferred TSC frequency in image

For save/restore or live migration between two different frequency platforms
, guest's preferred TSC frequency is required to caculate guest's TSC after resotre, so save it in the image header.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

diff -r f8187a343ad2 -r 4015e09394c1 xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/i8254.c	Tue Jun 16 22:41:06 2009 -0400
@@ -481,8 +481,6 @@ void pit_init(struct vcpu *v, unsigned l
     register_portio_handler(v->domain, PIT_BASE, 4, handle_pit_io);
     register_portio_handler(v->domain, 0x61, 1, handle_speaker_io);
 
-    ticks_per_sec(v) = cpu_khz * (int64_t)1000;
-
     pit_reset(v->domain);
 }
 
diff -r f8187a343ad2 -r 4015e09394c1 xen/arch/x86/hvm/save.c
--- a/xen/arch/x86/hvm/save.c	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/save.c	Tue Jun 16 22:41:06 2009 -0400
@@ -32,7 +32,8 @@ void arch_hvm_save(struct domain *d, str
     cpuid(1, &eax, &ebx, &ecx, &edx);
     hdr->cpuid = eax;
 
-    hdr->pad0 = 0;
+    /* Save guest's preferred TSC. */
+    hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz;
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -59,6 +60,9 @@ int arch_hvm_load(struct domain *d, stru
         gdprintk(XENLOG_WARNING, "HVM restore: saved CPUID (%#"PRIx32") "
                "does not match host (%#"PRIx32").\n", hdr->cpuid, eax);
 
+    /* Restore guest's preferred TSC frequency. */
+    d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz;
+
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
 
diff -r f8187a343ad2 -r 4015e09394c1 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/arch/x86/hvm/vpt.c	Tue Jun 16 22:41:06 2009 -0400
@@ -32,6 +32,8 @@ void hvm_init_guest_time(struct domain *
     spin_lock_init(&pl->pl_time_lock);
     pl->stime_offset = -(u64)get_s_time();
     pl->last_guest_time = 0;
+
+    d->arch.hvm_domain.gtsc_khz = cpu_khz;
 }
 
 u64 hvm_get_guest_time(struct vcpu *v)
diff -r f8187a343ad2 -r 4015e09394c1 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/asm-x86/hvm/domain.h	Tue Jun 16 22:41:06 2009 -0400
@@ -44,7 +44,8 @@ struct hvm_domain {
     struct hvm_ioreq_page  ioreq;
     struct hvm_ioreq_page  buf_ioreq;
 
-    s64                    tsc_frequency;
+    uint32_t               gtsc_khz; /* kHz */
+    uint32_t               pad0;
     struct pl_time         pl_time;
 
     struct hvm_io_handler  io_handler;
diff -r f8187a343ad2 -r 4015e09394c1 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/asm-x86/hvm/vpt.h	Tue Jun 16 22:41:06 2009 -0400
@@ -136,8 +136,6 @@ struct pl_time {    /* platform time */
     spinlock_t pl_time_lock;
 };
 
-#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
-
 void pt_save_timer(struct vcpu *v);
 void pt_restore_timer(struct vcpu *v);
 void pt_update_irq(struct vcpu *v);
diff -r f8187a343ad2 -r 4015e09394c1 xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h	Fri Feb 20 17:02:36 2009 +0000
+++ b/xen/include/public/arch-x86/hvm/save.h	Tue Jun 16 22:41:06 2009 -0400
@@ -38,7 +38,7 @@ struct hvm_save_header {
     uint32_t version;           /* File format version */
     uint64_t changeset;         /* Version of Xen that saved this file */
     uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
-    uint32_t pad0;
+    uint32_t gtsc_khz;        /* Guest's TSC frequency in kHz */
 };
 
 DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);

[-- Attachment #3: 0002-move_multidiv64_out.patch --]
[-- Type: application/octet-stream, Size: 3360 bytes --]

# HG changeset patch
# User root@localhost.localdomain
# Date 1245207897 14400
# Node ID 09f3065668f20ebc1d6424d3c051c352baeba618
# Parent  4015e09394c1857a6e68e6d6909d11cf5ecba241
Move muldiv64 out and make it as a public function.

muldiv64 is used to caculate u64*u32/u32, and we
will use it for TSC scaling.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

diff -r 4015e09394c1 -r 09f3065668f2 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile	Tue Jun 16 22:41:06 2009 -0400
+++ b/xen/arch/x86/Makefile	Tue Jun 16 23:04:57 2009 -0400
@@ -53,6 +53,7 @@ obj-y += crash.o
 obj-y += crash.o
 obj-y += tboot.o
 obj-y += hpet.o
+obj-y += lib.o
 obj-y += bzimage.o
 
 obj-$(crash_debug) += gdbstub.o
diff -r 4015e09394c1 -r 09f3065668f2 xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c	Tue Jun 16 22:41:06 2009 -0400
+++ b/xen/arch/x86/hvm/i8254.c	Tue Jun 16 23:04:57 2009 -0400
@@ -56,30 +56,6 @@ static int handle_speaker_io(
 
 #define get_guest_time(v) \
    (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
-
-/* Compute with 96 bit intermediate result: (a*b)/c */
-static uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
-{
-    union {
-        uint64_t ll;
-        struct {
-#ifdef WORDS_BIGENDIAN
-            uint32_t high, low;
-#else
-            uint32_t low, high;
-#endif            
-        } l;
-    } u, res;
-    uint64_t rl, rh;
-
-    u.ll = a;
-    rl = (uint64_t)u.l.low * (uint64_t)b;
-    rh = (uint64_t)u.l.high * (uint64_t)b;
-    rh += (rl >> 32);
-    res.l.high = rh / c;
-    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
-    return res.ll;
-}
 
 static int pit_get_count(PITState *pit, int channel)
 {
diff -r 4015e09394c1 -r 09f3065668f2 xen/arch/x86/lib.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/lib.c	Tue Jun 16 23:04:57 2009 -0400
@@ -0,0 +1,44 @@
+#include <xen/ctype.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <asm/byteorder.h>
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+
+#ifdef __x86_64__
+ uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+ {
+     __asm__ __volatile__ ("mul %%rdx;"
+                            "div %%rcx;"
+                            : "=a"(a)
+                            : "0"(a), "d"(b), "c"(c)
+                            );
+    return a;
+}
+
+#else
+
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif            
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+#endif
diff -r 4015e09394c1 -r 09f3065668f2 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h	Tue Jun 16 22:41:06 2009 -0400
+++ b/xen/include/xen/lib.h	Tue Jun 16 23:04:57 2009 -0400
@@ -91,6 +91,8 @@ unsigned long long simple_strtoull(
 
 unsigned long long parse_size_and_unit(const char *s, const char **ps);
 
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
+
 #define TAINT_UNSAFE_SMP                (1<<0)
 #define TAINT_MACHINE_CHECK             (1<<1)
 #define TAINT_BAD_PAGE                  (1<<2)

[-- Attachment #4: 0003-scaling_guest_tsc.patch --]
[-- Type: application/octet-stream, Size: 5774 bytes --]

# HG changeset patch
# User root@localhost.localdomain
# Date 1245206943 14400
# Node ID 824c4af117d8c39511db49cbd8aef091e512f52c
# Parent  e3d6e4bdb6341fa5b86a3b6c28e2acb7e0d31e9a
Scaling guest's TSC when the target machine's frequency is different with its requirement.

Using trap&emulate for guest's each rdtsc instruction first, maybe it can be optimized later.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

diff -r 09f3065668f2 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/arch/x86/hvm/hvm.c	Wed Jun 17 21:58:11 2009 -0400
@@ -139,26 +139,62 @@ uint8_t hvm_combine_hw_exceptions(uint8_
     return TRAP_double_fault;
 }
 
+void hvm_enable_rdtsc_exiting(struct domain *d)
+{
+    struct vcpu *v;
+
+    for_each_vcpu ( d, v ) {
+        if ( hvm_funcs.enable_rdtsc_exiting )
+            hvm_funcs.enable_rdtsc_exiting(v);
+    }
+}
+
+int hvm_gtsc_need_scale(struct domain *d)
+{
+    uint32_t gtsc_khz;
+
+    gtsc_khz = d->arch.hvm_domain.gtsc_khz / 1000;
+
+    if ( gtsc_khz && gtsc_khz != (uint32_t)cpu_khz / 1000 ) {
+        d->arch.hvm_domain.tsc_scaled = 1;
+        return 1;
+    }
+
+    d->arch.hvm_domain.tsc_scaled = 0;
+    return 0;
+}
+
+static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc)
+{
+    u32 gtsc_khz, scaled_htsc = host_tsc;
+
+    if ( v->domain->arch.hvm_domain.tsc_scaled ) {
+        gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz;
+        scaled_htsc = muldiv64(host_tsc, gtsc_khz, cpu_khz);
+    }
+
+    return scaled_htsc;
+}
+
 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
 {
-    u64 host_tsc;
+    u64 host_tsc, scaled_htsc;
 
     rdtscll(host_tsc);
-
-    v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc;
+    scaled_htsc = hvm_h2g_scale_tsc(v, host_tsc);
+
+    v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - scaled_htsc;
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
 }
 
 u64 hvm_get_guest_tsc(struct vcpu *v)
 {
-    u64 host_tsc;
-
-    if ( opt_softtsc )
-        host_tsc = hvm_get_guest_time(v);
-    else
-        rdtscll(host_tsc);
-
-    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
+    u64 host_tsc, scaled_htsc;
+
+    rdtscll(host_tsc);
+    scaled_htsc = hvm_h2g_scale_tsc(v, host_tsc);
+
+    return scaled_htsc + v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 void hvm_migrate_timers(struct vcpu *v)
diff -r 09f3065668f2 xen/arch/x86/hvm/save.c
--- a/xen/arch/x86/hvm/save.c	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/arch/x86/hvm/save.c	Wed Jun 17 21:56:19 2009 -0400
@@ -63,6 +63,14 @@ int arch_hvm_load(struct domain *d, stru
     /* Restore guest's preferred TSC frequency. */
     d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz;
 
+    if ( hdr->gtsc_khz && hvm_gtsc_need_scale(d) ) {
+        hvm_enable_rdtsc_exiting(d);
+
+        printk("Migrate to a platform with different freq:%ldMhz, "
+            "expected freq:%dMhz, enable rdtsc exiting!\n",
+                    cpu_khz / 1000, hdr->gtsc_khz / 1000);
+    }
+
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
 
diff -r 09f3065668f2 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Jun 17 21:56:19 2009 -0400
@@ -946,6 +946,14 @@ static void vmx_set_tsc_offset(struct vc
     vmx_vmcs_exit(v);
 }
 
+static void vmx_enable_rdtsc_exiting(struct vcpu *v)
+{
+    vmx_vmcs_enter(v);
+    v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
+    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+     vmx_vmcs_exit(v);
+ }
+
 void do_nmi(struct cpu_user_regs *);
 
 static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page)
@@ -1371,7 +1379,8 @@ static struct hvm_function_table vmx_fun
     .msr_write_intercept  = vmx_msr_write_intercept,
     .invlpg_intercept     = vmx_invlpg_intercept,
     .set_uc_mode          = vmx_set_uc_mode,
-    .set_info_guest       = vmx_set_info_guest
+    .set_info_guest       = vmx_set_info_guest,
+    .enable_rdtsc_exiting = vmx_enable_rdtsc_exiting
 };
 
 static unsigned long *vpid_bitmap;
diff -r 09f3065668f2 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/arch/x86/hvm/vpt.c	Wed Jun 17 21:56:19 2009 -0400
@@ -34,6 +34,7 @@ void hvm_init_guest_time(struct domain *
     pl->last_guest_time = 0;
 
     d->arch.hvm_domain.gtsc_khz = cpu_khz;
+    d->arch.hvm_domain.tsc_scaled = 0;
 }
 
 u64 hvm_get_guest_time(struct vcpu *v)
diff -r 09f3065668f2 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/include/asm-x86/hvm/domain.h	Wed Jun 17 21:56:19 2009 -0400
@@ -45,7 +45,7 @@ struct hvm_domain {
     struct hvm_ioreq_page  buf_ioreq;
 
     uint32_t               gtsc_khz; /* kHz */
-    uint32_t               pad0;
+    uint32_t               tsc_scaled;
     struct pl_time         pl_time;
 
     struct hvm_io_handler  io_handler;
diff -r 09f3065668f2 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Tue Jun 16 23:04:57 2009 -0400
+++ b/xen/include/asm-x86/hvm/hvm.h	Wed Jun 17 21:56:19 2009 -0400
@@ -129,6 +129,7 @@ struct hvm_function_table {
     void (*invlpg_intercept)(unsigned long vaddr);
     void (*set_uc_mode)(struct vcpu *v);
     void (*set_info_guest)(struct vcpu *v);
+    void (*enable_rdtsc_exiting)(struct vcpu *v);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -282,6 +283,9 @@ int hvm_event_needs_reinjection(uint8_t 
 
 uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2);
 
+void hvm_enable_rdtsc_exiting(struct domain *d);
+int hvm_gtsc_need_scale(struct domain *d);
+
 static inline int hvm_cpu_up(void)
 {
     if ( hvm_funcs.cpu_up )

[-- Attachment #5: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2009-06-24  1:18 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-18  2:56 [PATCH] TSC scaling for live migration between platforms with different TSC frequecies Zhang, Xiantao
2009-06-18  7:37 ` [PATCH] TSC scaling for live migration betweenplatforms " Jan Beulich
2009-06-18  8:52   ` Zhang, Xiantao
2009-06-18 15:40     ` Dan Magenheimer
2009-06-19  1:48       ` Zhang, Xiantao
2009-06-18  9:02 ` [PATCH] TSC scaling for live migration between platforms " Tim Deegan
2009-06-18  9:46   ` Zhang, Xiantao
2009-06-18  9:56     ` Tim Deegan
2009-06-18  9:10 ` Patrick Colp
2009-06-18  9:27   ` Tim Deegan
2009-06-18  9:47     ` Zhang, Xiantao
2009-06-18 15:45       ` Dan Magenheimer
2009-06-18 16:04         ` Tim Deegan
2009-06-18 20:07           ` Dan Magenheimer
2009-06-18 10:56 ` Ian Pratt
2009-06-18 15:58   ` Dan Magenheimer
2009-06-18 16:45     ` John Levon
2009-06-18 20:27       ` Dan Magenheimer
2009-06-18 20:45         ` John Levon
2009-06-18 20:57           ` Dan Magenheimer
2009-06-18 21:00             ` John Levon
2009-06-18 22:27               ` Dan Magenheimer
2009-06-19 13:36                 ` John Levon
2009-06-19  1:21               ` Zhang, Xiantao
2009-06-19 13:54                 ` John Levon
2009-06-18 23:49       ` Dong, Eddie
2009-06-19  2:25       ` Zhang, Xiantao
2009-06-19 13:53         ` John Levon
2009-06-19 15:07           ` Zhang, Xiantao
2009-06-19 20:44             ` Dan Magenheimer
2009-06-22  1:38               ` Zhang, Xiantao
2009-06-19  1:34   ` Zhang, Xiantao
2009-06-22  5:14 ` Zhang, Xiantao
2009-06-23 10:18   ` Keir Fraser
2009-06-24  1:18     ` Zhang, Xiantao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.