* [PATCH 0/4] paravirt clock series.
@ 2008-05-08 11:48 Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 1/4] Add helper functions for paravirtual clocksources Gerd Hoffmann
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-08 11:48 UTC (permalink / raw)
To: virtualization, kvm-devel; +Cc: Gerd Hoffmann
Respin of the paravirt clock patch series.
On the host side the kvm paravirt clock is made compatible with the
xen clock.
On the guest side some xen code has been factored out into a separate
source file shared by both kvm and xen clock implementations.
This time it should work ok for kvm smp guests ;)
cheers,
Gerd
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/4] Add helper functions for paravirtual clocksources.
2008-05-08 11:48 [PATCH 0/4] paravirt clock series Gerd Hoffmann
@ 2008-05-08 11:48 ` Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 2/4] Make xen use the generic paravirt clocksource code Gerd Hoffmann
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-08 11:48 UTC (permalink / raw)
To: virtualization, kvm-devel; +Cc: Jeremy Fitzhardinge, Gerd Hoffmann
The helper functions are intended to be used by both xen and kvm
paravirtual clock sources. Following patches of this series put
them into use. They are based on the xen code.
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
arch/x86/Kconfig | 4 +
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/pvclock.c | 148 +++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/pvclock.h | 6 ++
4 files changed, 159 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/kernel/pvclock.c
create mode 100644 include/asm-x86/pvclock.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 845ea2b..b12e188 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -415,6 +415,10 @@ config PARAVIRT
over full virtualization. However, when run without a hypervisor
the kernel is theoretically slower and slightly larger.
+config PARAVIRT_CLOCK
+ bool
+ default n
+
endif
config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bbdacb3..5d8e086 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
ifdef CONFIG_INPUT_PCSPKR
obj-y += pcspeaker.o
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 0000000..33e526f
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,148 @@
+/* paravirtual clock -- common code used by kvm/xen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ * xen: magic shared_info page
+ * kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ int tsc_shift;
+ u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+ return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+ u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+ return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+ struct kvm_vcpu_time_info *src)
+{
+ do {
+ dst->version = src->version;
+ rmb(); /* fetch version before data */
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_timestamp = src->system_time;
+ dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
+ rmb(); /* test version after fetching data */
+ } while ((src->version & 1) || (dst->version != src->version));
+
+ return dst->version;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+
+cycle_t pvclock_clocksource_read(struct kvm_vcpu_time_info *src)
+{
+ struct pvclock_shadow_time shadow;
+ unsigned version;
+ cycle_t ret, offset;
+
+ do {
+ version = pvclock_get_time_values(&shadow, src);
+ barrier();
+ offset = pvclock_get_nsec_offset(&shadow);
+ ret = shadow.system_timestamp + offset;
+ barrier();
+ } while (version != src->version);
+
+ return ret;
+}
+
+void pvclock_read_wallclock(struct kvm_wall_clock *wall_clock,
+ struct kvm_vcpu_time_info *vcpu_time,
+ struct timespec *ts)
+{
+ u32 version;
+ u64 delta;
+ struct timespec now;
+
+ /* get wallclock at system boot */
+ do {
+ version = wall_clock->wc_version;
+ rmb(); /* fetch version before time */
+ now.tv_sec = wall_clock->wc_sec;
+ now.tv_nsec = wall_clock->wc_nsec;
+ rmb(); /* fetch time before checking version */
+ } while ((wall_clock->wc_version & 1) || (version != wall_clock->wc_version));
+
+ delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
+ delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+ now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+ now.tv_sec = delta;
+
+ set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 0000000..2b9812f
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,6 @@
+#include <linux/clocksource.h>
+#include <asm/kvm_para.h>
+cycle_t pvclock_clocksource_read(struct kvm_vcpu_time_info *src);
+void pvclock_read_wallclock(struct kvm_wall_clock *wall,
+ struct kvm_vcpu_time_info *vcpu,
+ struct timespec *ts);
--
1.5.4.1
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/4] Make xen use the generic paravirt clocksource code.
2008-05-08 11:48 [PATCH 0/4] paravirt clock series Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 1/4] Add helper functions for paravirtual clocksources Gerd Hoffmann
@ 2008-05-08 11:48 ` Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible " Gerd Hoffmann
3 siblings, 0 replies; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-08 11:48 UTC (permalink / raw)
To: virtualization, kvm-devel; +Cc: Jeremy Fitzhardinge, Gerd Hoffmann
This patch switches the xen paravirt clock over to use the
generic paravirt clock code.
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
arch/x86/xen/Kconfig | 1 +
arch/x86/xen/time.c | 110 +++++---------------------------------------------
2 files changed, 12 insertions(+), 99 deletions(-)
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be..3a4f16a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
config XEN
bool "Xen guest support"
select PARAVIRT
+ select PARAVIRT_CLOCK
depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
help
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5..3d5f945 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
#include <linux/clockchips.h>
#include <linux/kernel_stat.h>
+#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -30,17 +31,6 @@
static cycle_t xen_clocksource_read(void);
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
@@ -230,95 +220,14 @@ unsigned long xen_cpu_khz(void)
return xen_khz;
}
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
- struct vcpu_time_info *src;
- struct shadow_time_info *dst;
-
- /* src is shared memory with the hypervisor, so we need to
- make sure we get a consistent snapshot, even in the face of
- being preempted. */
- src = &__get_cpu_var(xen_vcpu)->time;
- dst = &__get_cpu_var(shadow_time);
-
- do {
- dst->version = src->version;
- rmb(); /* fetch version before data */
- dst->tsc_timestamp = src->tsc_timestamp;
- dst->system_timestamp = src->system_time;
- dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
- dst->tsc_shift = src->tsc_shift;
- rmb(); /* test version after fetching data */
- } while ((src->version & 1) | (dst->version ^ src->version));
-
- return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
- u64 product;
-#ifdef __i386__
- u32 tmp1, tmp2;
-#endif
-
- if (shift < 0)
- delta >>= -shift;
- else
- delta <<= shift;
-
-#ifdef __i386__
- __asm__ (
- "mul %5 ; "
- "mov %4,%%eax ; "
- "mov %%edx,%4 ; "
- "mul %5 ; "
- "xor %5,%5 ; "
- "add %4,%%eax ; "
- "adc %5,%%edx ; "
- : "=A" (product), "=r" (tmp1), "=r" (tmp2)
- : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
- __asm__ (
- "mul %%rdx ; shrd $32,%%rdx,%%rax"
- : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
- return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
- u64 now, delta;
- now = native_read_tsc();
- delta = now - shadow->tsc_timestamp;
- return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
static cycle_t xen_clocksource_read(void)
{
- struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+ struct vcpu_time_info *src;
cycle_t ret;
- unsigned version;
-
- do {
- version = get_time_values_from_xen();
- barrier();
- ret = shadow->system_timestamp + get_nsec_offset(shadow);
- barrier();
- } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
- put_cpu_var(shadow_time);
+ src = &get_cpu_var(xen_vcpu)->time;
+ ret = pvclock_clocksource_read((void*)src);
+ put_cpu_var(xen_vcpu);
return ret;
}
@@ -349,9 +258,14 @@ static void xen_read_wallclock(struct timespec *ts)
unsigned long xen_get_wallclock(void)
{
+ const struct shared_info *s = HYPERVISOR_shared_info;
+ struct kvm_wall_clock *wall_clock = (void*)&(s->wc_version);
+ struct vcpu_time_info *vcpu_time;
struct timespec ts;
- xen_read_wallclock(&ts);
+ vcpu_time = &get_cpu_var(xen_vcpu)->time;
+ pvclock_read_wallclock(wall_clock, (void*)vcpu_time, &ts);
+ put_cpu_var(xen_vcpu);
return ts.tv_sec;
}
@@ -576,8 +490,6 @@ __init void xen_time_init(void)
{
int cpu = smp_processor_id();
- get_time_values_from_xen();
-
clocksource_register(&xen_clocksource);
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
--
1.5.4.1
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.
2008-05-08 11:48 [PATCH 0/4] paravirt clock series Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 1/4] Add helper functions for paravirtual clocksources Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 2/4] Make xen use the generic paravirt clocksource code Gerd Hoffmann
@ 2008-05-08 11:48 ` Gerd Hoffmann
2008-05-13 8:03 ` [kvm-devel] " Avi Kivity
2008-05-08 11:48 ` [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible " Gerd Hoffmann
3 siblings, 1 reply; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-08 11:48 UTC (permalink / raw)
To: virtualization, kvm-devel; +Cc: Gerd Hoffmann
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
arch/x86/kvm/x86.c | 63 +++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 979f983..6906d54 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -493,7 +493,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
{
static int version;
struct kvm_wall_clock wc;
- struct timespec wc_ts;
+ struct timespec now,sys,boot;
if (!wall_clock)
return;
@@ -502,9 +502,16 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
- wc_ts = current_kernel_time();
- wc.wc_sec = wc_ts.tv_sec;
- wc.wc_nsec = wc_ts.tv_nsec;
+#if 0
+ /* Hmm, getboottime() isn't exported to modules ... */
+ getboottime(&boot);
+#else
+ now = current_kernel_time();
+ ktime_get_ts(&sys);
+ boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+#endif
+ wc.wc_sec = boot.tv_sec;
+ wc.wc_nsec = boot.tv_nsec;
wc.wc_version = version;
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -537,20 +544,58 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
- * state, we just write "2" at the end
+ * state, we just increase by 2 at the end.
*/
- vcpu->hv_clock.version = 2;
+ vcpu->hv_clock.version += 2;
shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
+ sizeof(vcpu->hv_clock));
kunmap_atomic(shared_kaddr, KM_USER0);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+ uint32_t quotient, remainder;
+
+ __asm__ ( "divl %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+ return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info *hv_clock)
+{
+ uint64_t nsecs = 1000000000LL;
+ int32_t shift = 0;
+ uint64_t tps64;
+ uint32_t tps32;
+
+ tps64 = tsc_khz * 1000LL;
+ while (tps64 > nsecs*2) {
+ tps64 >>= 1;
+ shift--;
+ }
+
+ tps32 = (uint32_t)tps64;
+ while (tps32 <= (uint32_t)nsecs) {
+ tps32 <<= 1;
+ shift++;
+ }
+
+ hv_clock->tsc_shift = shift;
+ hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+#if 0
+ printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+ __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+ hv_clock->tsc_to_system_mul);
+#endif
+}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
@@ -599,9 +644,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
- vcpu->arch.hv_clock.tsc_to_system_mul =
- clocksource_khz2mult(tsc_khz, 22);
- vcpu->arch.hv_clock.tsc_shift = 22;
+ kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
--
1.5.4.1
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible with xen.
2008-05-08 11:48 [PATCH 0/4] paravirt clock series Gerd Hoffmann
` (2 preceding siblings ...)
2008-05-08 11:48 ` [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen Gerd Hoffmann
@ 2008-05-08 11:48 ` Gerd Hoffmann
3 siblings, 0 replies; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-08 11:48 UTC (permalink / raw)
To: virtualization, kvm-devel; +Cc: Gerd Hoffmann
This patch switches the kvm clocksource code over to use the
paravirt clock helpers, thereby making it compatible with xen.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
arch/x86/Kconfig | 1 +
arch/x86/kernel/kvmclock.c | 84 ++++++++++++++++---------------------------
2 files changed, 32 insertions(+), 53 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b12e188..30feb9f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -388,6 +388,7 @@ config VMI
config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
+ select PARAVIRT_CLOCK
depends on !(X86_VISWS || X86_VOYAGER)
help
Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5..1c63f75 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
#include <linux/clocksource.h>
#include <linux/kvm_para.h>
+#include <asm/pvclock.h>
#include <asm/arch_hooks.h>
#include <asm/msr.h>
#include <asm/apic.h>
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
- int cpu = smp_processor_id();
- u64 delta = native_read_tsc() - last_tsc;
- return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
*/
unsigned long kvm_get_wallclock(void)
{
- u32 wc_sec, wc_nsec;
- u64 delta;
+ struct kvm_vcpu_time_info *vcpu_time;
struct timespec ts;
- int version, nsec;
int low, high;
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- delta = kvm_clock_read();
+ vcpu_time = &get_cpu_var(hv_clock);
+ pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ put_cpu_var(hv_clock);
- native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- do {
- version = wall_clock.wc_version;
- rmb();
- wc_sec = wall_clock.wc_sec;
- wc_nsec = wall_clock.wc_nsec;
- rmb();
- } while ((wall_clock.wc_version != version) || (version & 1));
-
- delta = kvm_clock_read() - delta;
- delta += wc_nsec;
- nsec = do_div(delta, NSEC_PER_SEC);
- set_normalized_timespec(&ts, wc_sec + delta, nsec);
- /*
- * Of all mechanisms of time adjustment I've tested, this one
- * was the champion!
- */
- return ts.tv_sec + 1;
+ return ts.tv_sec;
}
int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
return 0;
}
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
static cycle_t kvm_clock_read(void)
{
- u64 last_tsc, now;
- int cpu;
+ struct kvm_vcpu_time_info *src;
+ cycle_t ret;
- preempt_disable();
- cpu = smp_processor_id();
-
- last_tsc = get_clock(cpu, tsc_timestamp);
- now = get_clock(cpu, system_time);
-
- now += kvm_get_delta(last_tsc);
- preempt_enable();
-
- return now;
+ src = &get_cpu_var(hv_clock);
+ ret = pvclock_clocksource_read(src);
+ put_cpu_var(hv_clock);
+ return ret;
}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -123,13 +89,14 @@ static struct clocksource kvm_clock = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+ printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+ cpu, high, low, txt);
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
}
@@ -140,12 +107,20 @@ static void kvm_setup_secondary_clock(void)
* Now that the first cpu already had this clocksource initialized,
* we shouldn't fail.
*/
- WARN_ON(kvm_register_clock());
+ WARN_ON(kvm_register_clock("secondary cpu clock"));
/* ok, done with our trickery, call native */
setup_secondary_APIC_clock();
}
#endif
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+ WARN_ON(kvm_register_clock("primary cpu clock"));
+ native_smp_prepare_boot_cpu();
+}
+#endif
+
/*
* After the clock is registered, the host will keep writing to the
* registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +149,7 @@ void __init kvmclock_init(void)
return;
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
- if (kvm_register_clock())
+ if (kvm_register_clock("boot clock"))
return;
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +157,9 @@ void __init kvmclock_init(void)
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
#endif
+#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+#endif
machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
--
1.5.4.1
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.
2008-05-08 11:48 ` [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen Gerd Hoffmann
@ 2008-05-13 8:03 ` Avi Kivity
2008-05-16 7:47 ` Gerd Hoffmann
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2008-05-13 8:03 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: kvm-devel, virtualization
Gerd Hoffmann wrote:
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
> arch/x86/kvm/x86.c | 63 +++++++++++++++++++++++++++++++++++++++++++--------
> 1 files changed, 53 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 979f983..6906d54 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -493,7 +493,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
> {
> static int version;
> struct kvm_wall_clock wc;
> - struct timespec wc_ts;
> + struct timespec now,sys,boot;
>
Add spaces.
>
> if (!wall_clock)
> return;
> @@ -502,9 +502,16 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
>
> kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
>
> - wc_ts = current_kernel_time();
> - wc.wc_sec = wc_ts.tv_sec;
> - wc.wc_nsec = wc_ts.tv_nsec;
> +#if 0
> + /* Hmm, getboottime() isn't exported to modules ... */
> + getboottime(&boot);
> +#else
> + now = current_kernel_time();
> + ktime_get_ts(&sys);
> + boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
> +#endif
> + wc.wc_sec = boot.tv_sec;
> + wc.wc_nsec = boot.tv_nsec;
>
Please drop the #if 0.
> wc.wc_version = version;
>
> kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
> @@ -537,20 +544,58 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
> /*
> * The interface expects us to write an even number signaling that the
> * update is finished. Since the guest won't see the intermediate
> - * state, we just write "2" at the end
> + * state, we just increase by 2 at the end.
> */
> - vcpu->hv_clock.version = 2;
> + vcpu->hv_clock.version += 2;
>
> shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
>
> memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
> - sizeof(vcpu->hv_clock));
> + sizeof(vcpu->hv_clock));
>
> kunmap_atomic(shared_kaddr, KM_USER0);
>
> mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
> }
>
> +static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
> +{
> + uint32_t quotient, remainder;
> +
> + __asm__ ( "divl %4"
> + : "=a" (quotient), "=d" (remainder)
> + : "0" (0), "1" (dividend), "r" (divisor) );
> + return quotient;
> +}
>
do_div()?
> +
> +static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info *hv_clock)
> +{
> + uint64_t nsecs = 1000000000LL;
> + int32_t shift = 0;
> + uint64_t tps64;
> + uint32_t tps32;
> +
> + tps64 = tsc_khz * 1000LL;
> + while (tps64 > nsecs*2) {
> + tps64 >>= 1;
> + shift--;
> + }
> +
> + tps32 = (uint32_t)tps64;
> + while (tps32 <= (uint32_t)nsecs) {
> + tps32 <<= 1;
> + shift++;
> + }
> +
> + hv_clock->tsc_shift = shift;
> + hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
> +
> +#if 0
> + printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
> + __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
> + hv_clock->tsc_to_system_mul);
> +#endif
> +}
>
pr_debug() or something?
>
> int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> {
> @@ -599,9 +644,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> /* ...but clean it before doing the actual write */
> vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
>
> - vcpu->arch.hv_clock.tsc_to_system_mul =
> - clocksource_khz2mult(tsc_khz, 22);
> - vcpu->arch.hv_clock.tsc_shift = 22;
> + kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
>
What if the tsc frequency changes later on? we need to adjust the
multiplier, no?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.
2008-05-13 8:03 ` [kvm-devel] " Avi Kivity
@ 2008-05-16 7:47 ` Gerd Hoffmann
2008-05-18 6:05 ` [kvm-devel] " Avi Kivity
2008-05-21 15:24 ` Avi Kivity
0 siblings, 2 replies; 9+ messages in thread
From: Gerd Hoffmann @ 2008-05-16 7:47 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel, virtualization
Avi Kivity wrote:
>> + struct timespec now,sys,boot;
>
> Add spaces.
Done.
>> +#if 0
>> + /* Hmm, getboottime() isn't exported to modules ... */
>> + getboottime(&boot);
>> +#else
>> + now = current_kernel_time();
>> + ktime_get_ts(&sys);
>> + boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
>> +#endif
>> + wc.wc_sec = boot.tv_sec;
>> + wc.wc_nsec = boot.tv_nsec;
>
> Please drop the #if 0.
Done, and added a comment for the calculation.
>> +static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
>> +{
>> + uint32_t quotient, remainder;
>> +
>> + __asm__ ( "divl %4"
>> + : "=a" (quotient), "=d" (remainder)
>> + : "0" (0), "1" (dividend), "r" (divisor) );
>> + return quotient;
>> +}
>>
>
> do_div()?
No, this one does something else. Already tried to get rid of that one
before ;)
> pr_debug() or something?
Done.
>> + kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
>>
> What if the tsc frequency changes later on? we need to adjust the
> multiplier, no?
We better do that, yes.
New patch series prepared and tested, will be posted in a moment ...
cheers,
Gerd
--
http://kraxel.fedorapeople.org/xenner/
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.
2008-05-16 7:47 ` Gerd Hoffmann
@ 2008-05-18 6:05 ` Avi Kivity
2008-05-21 15:24 ` Avi Kivity
1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2008-05-18 6:05 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: kvm-devel, virtualization
Gerd Hoffmann wrote:
>
>>> +static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
>>> +{
>>> + uint32_t quotient, remainder;
>>> +
>>> + __asm__ ( "divl %4"
>>> + : "=a" (quotient), "=d" (remainder)
>>> + : "0" (0), "1" (dividend), "r" (divisor) );
>>> + return quotient;
>>> +}
>>>
>>>
>> do_div()?
>>
>
> No, this one does something else. Already tried to get rid of that one
> before ;)
>
>
There is actually a simpler way of coding this:
return dividend / divisor;
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.
2008-05-16 7:47 ` Gerd Hoffmann
2008-05-18 6:05 ` [kvm-devel] " Avi Kivity
@ 2008-05-21 15:24 ` Avi Kivity
1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2008-05-21 15:24 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: kvm, virtualization
Gerd Hoffmann wrote:
>
>>> +static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
>>> +{
>>> + uint32_t quotient, remainder;
>>> +
>>> + __asm__ ( "divl %4"
>>> + : "=a" (quotient), "=d" (remainder)
>>> + : "0" (0), "1" (dividend), "r" (divisor) );
>>> + return quotient;
>>> +}
>>>
>>>
>> do_div()?
>>
>
> No, this one does something else. Already tried to get rid of that one
> before ;)
>
>
Ah yes, it calculates (dividend << 32) / divisor.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-05-21 15:24 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-08 11:48 [PATCH 0/4] paravirt clock series Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 1/4] Add helper functions for paravirtual clocksources Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 2/4] Make xen use the generic paravirt clocksource code Gerd Hoffmann
2008-05-08 11:48 ` [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen Gerd Hoffmann
2008-05-13 8:03 ` [kvm-devel] " Avi Kivity
2008-05-16 7:47 ` Gerd Hoffmann
2008-05-18 6:05 ` [kvm-devel] " Avi Kivity
2008-05-21 15:24 ` Avi Kivity
2008-05-08 11:48 ` [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible " Gerd Hoffmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).