virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] paravirt clock source patches, #4
@ 2008-06-03 13:07 Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 1/5] Add structs and functions for paravirt clocksource Gerd Hoffmann
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

paravirt clock source patches, next round.

There is now a pvclock-abi.h file with the structs and some longish
comments in it and everybody is switched over to use the stuff in
there.

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/5] Add structs and functions for paravirt clocksource.
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
@ 2008-06-03 13:07 ` Gerd Hoffmann
  2008-06-03 13:34   ` Jeremy Fitzhardinge
  2008-06-03 13:07 ` [PATCH 2/5] Make xen use the paravirt clocksource structs and functions Gerd Hoffmann
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

This patch adds structs for the paravirt clocksource ABI
used by both xen and kvm (pvclock-abi.h).

It also adds some helper functions to read system time and
wall clock time from a paravirtual clocksource (pvclock.[ch]).
They are based on the xen code.  They are enabled using
CONFIG_PARAVIRT_CLOCK.

Subsequent patches of this series will put the code in use.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/Kconfig              |    4 +
 arch/x86/kernel/Makefile      |    1 +
 arch/x86/kernel/pvclock.c     |  141 +++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/pvclock-abi.h |   42 ++++++++++++
 include/asm-x86/pvclock.h     |   13 ++++
 5 files changed, 201 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/kernel/pvclock.c
 create mode 100644 include/asm-x86/pvclock-abi.h
 create mode 100644 include/asm-x86/pvclock.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcbec34..5fc46c1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -410,6 +410,10 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+	bool
+	default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3..77807d4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 0000000..05fbe9a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ *    xen: magic shared_info page
+ *    kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+	u32 tsc_to_nsec_mul;
+	int tsc_shift;
+	u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+	u64 product;
+#ifdef __i386__
+	u32 tmp1, tmp2;
+#endif
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#ifdef __i386__
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"xor  %5,%5    ; "
+		"add  %4,%%eax ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+	__asm__ (
+		"mul %%rdx ; shrd $32,%%rdx,%%rax"
+		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+	return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+	u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+					struct pvclock_vcpu_time_info *src)
+{
+	do {
+		dst->version = src->version;
+		rmb();		/* fetch version before data */
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		rmb();		/* test version after fetching data */
+	} while ((src->version & 1) || (dst->version != src->version));
+
+	return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	struct pvclock_shadow_time shadow;
+	unsigned version;
+	cycle_t ret, offset;
+
+	do {
+		version = pvclock_get_time_values(&shadow, src);
+		barrier();
+		offset = pvclock_get_nsec_offset(&shadow);
+		ret = shadow.system_timestamp + offset;
+		barrier();
+	} while (version != src->version);
+
+	return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+			    struct pvclock_vcpu_time_info *vcpu_time,
+			    struct timespec *ts)
+{
+	u32 version;
+	u64 delta;
+	struct timespec now;
+
+	/* get wallclock at system boot */
+	do {
+		version = wall_clock->version;
+		rmb();		/* fetch version before time */
+		now.tv_sec  = wall_clock->sec;
+		now.tv_nsec = wall_clock->nsec;
+		rmb();		/* fetch time before checking version */
+	} while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+	delta = pvclock_clocksource_read(vcpu_time);	/* time since system boot */
+	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+	now.tv_sec = delta;
+
+	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
new file mode 100644
index 0000000..2adb0da
--- /dev/null
+++ b/include/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_X86_PVCLOCK_ABI_H_
+#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	uint32_t version;
+	uint32_t pad0;
+	uint64_t tsc_timestamp;
+	uint64_t system_time;
+	uint32_t tsc_to_system_mul;
+	int8_t   tsc_shift;
+	int8_t	 pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	uint32_t version;
+	uint32_t sec;
+	uint32_t nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 0000000..85b1bba
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_X86_PVCLOCK_H_
+#define _ASM_X86_PVCLOCK_H_
+
+#include <linux/clocksource.h>
+#include <asm/pvclock-abi.h>
+
+/* some helper functions for xen and kvm pv clock sources */
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
+			    struct pvclock_vcpu_time_info *vcpu,
+			    struct timespec *ts);
+
+#endif /* _ASM_X86_PVCLOCK_H_ */
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/5] Make xen use the paravirt clocksource structs and functions.
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 1/5] Add structs and functions for paravirt clocksource Gerd Hoffmann
@ 2008-06-03 13:07 ` Gerd Hoffmann
  2008-06-03 13:41   ` Jeremy Fitzhardinge
  2008-06-03 13:07 ` [PATCH 3/5] Make kvm host use the paravirt clocksource structs Gerd Hoffmann
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

This patch updates the xen guest to use the pvclock structs
and helper functions.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/xen/Kconfig        |    1 +
 arch/x86/xen/time.c         |  130 ++++---------------------------------------
 include/xen/interface/xen.h |    5 +-
 3 files changed, 14 insertions(+), 122 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be..3a4f16a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
 config XEN
 	bool "Xen guest support"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on X86_32
 	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
 	help
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5..bf31fb5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
 #include <linux/clockchips.h>
 #include <linux/kernel_stat.h>
 
+#include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -30,17 +31,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
-	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
-	u32 tsc_to_nsec_mul;
-	int tsc_shift;
-	u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -230,121 +220,26 @@ unsigned long xen_cpu_khz(void)
 	return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-	struct vcpu_time_info   *src;
-	struct shadow_time_info *dst;
-
-	/* src is shared memory with the hypervisor, so we need to
-	   make sure we get a consistent snapshot, even in the face of
-	   being preempted. */
-	src = &__get_cpu_var(xen_vcpu)->time;
-	dst = &__get_cpu_var(shadow_time);
-
-	do {
-		dst->version = src->version;
-		rmb();		/* fetch version before data */
-		dst->tsc_timestamp     = src->tsc_timestamp;
-		dst->system_timestamp  = src->system_time;
-		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-		dst->tsc_shift         = src->tsc_shift;
-		rmb();		/* test version after fetching data */
-	} while ((src->version & 1) | (dst->version ^ src->version));
-
-	return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-	u64 product;
-#ifdef __i386__
-	u32 tmp1, tmp2;
-#endif
-
-	if (shift < 0)
-		delta >>= -shift;
-	else
-		delta <<= shift;
-
-#ifdef __i386__
-	__asm__ (
-		"mul  %5       ; "
-		"mov  %4,%%eax ; "
-		"mov  %%edx,%4 ; "
-		"mul  %5       ; "
-		"xor  %5,%5    ; "
-		"add  %4,%%eax ; "
-		"adc  %5,%%edx ; "
-		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
-		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-	__asm__ (
-		"mul %%rdx ; shrd $32,%%rdx,%%rax"
-		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-	return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-	u64 now, delta;
-	now = native_read_tsc();
-	delta = now - shadow->tsc_timestamp;
-	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+        struct vcpu_time_info *src;
 	cycle_t ret;
-	unsigned version;
-
-	do {
-		version = get_time_values_from_xen();
-		barrier();
-		ret = shadow->system_timestamp + get_nsec_offset(shadow);
-		barrier();
-	} while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-	put_cpu_var(shadow_time);
 
+	src = &get_cpu_var(xen_vcpu)->time;
+	ret = pvclock_clocksource_read((void*)src);
+	put_cpu_var(xen_vcpu);
 	return ret;
 }
 
 static void xen_read_wallclock(struct timespec *ts)
 {
-	const struct shared_info *s = HYPERVISOR_shared_info;
-	u32 version;
-	u64 delta;
-	struct timespec now;
-
-	/* get wallclock at system boot */
-	do {
-		version = s->wc_version;
-		rmb();		/* fetch version before time */
-		now.tv_sec  = s->wc_sec;
-		now.tv_nsec = s->wc_nsec;
-		rmb();		/* fetch time before checking version */
-	} while ((s->wc_version & 1) | (version ^ s->wc_version));
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct pvclock_wall_clock *wall_clock = &(s->wc);
+        struct vcpu_time_info *vcpu_time;
 
-	delta = xen_clocksource_read();	/* time since system boot */
-	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
-
-	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
-	now.tv_sec = delta;
-
-	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+	vcpu_time = &get_cpu_var(xen_vcpu)->time;
+	pvclock_read_wallclock(wall_clock, (void*)vcpu_time, ts);
+	put_cpu_var(xen_vcpu);
 }
 
 unsigned long xen_get_wallclock(void)
@@ -352,7 +247,6 @@ unsigned long xen_get_wallclock(void)
 	struct timespec ts;
 
 	xen_read_wallclock(&ts);
-
 	return ts.tv_sec;
 }
 
@@ -576,8 +470,6 @@ __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 
-	get_time_values_from_xen();
-
 	clocksource_register(&xen_clocksource);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9b018da..aeee096 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,6 +10,7 @@
 #define __XEN_PUBLIC_XEN_H__
 
 #include <asm/xen/interface.h>
+#include <asm/pvclock-abi.h>
 
 /*
  * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -384,9 +385,7 @@ struct shared_info {
 	 * Wallclock time: updated only by control software. Guests should base
 	 * their gettimeofday() syscall on this wallclock-base value.
 	 */
-	uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
-	uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
-	uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
+	struct pvclock_wall_clock wc;
 
 	struct arch_shared_info arch;
 
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/5] Make kvm host use the paravirt clocksource structs.
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 1/5] Add structs and functions for paravirt clocksource Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 2/5] Make xen use the paravirt clocksource structs and functions Gerd Hoffmann
@ 2008-06-03 13:07 ` Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions Gerd Hoffmann
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

This patch updates the kvm host code to use the pvclock structs.
It also makes the paravirt clock compatible with Xen.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/kvm/x86.c         |   75 ++++++++++++++++++++++++++++++++++++-------
 include/asm-x86/kvm_host.h |    4 ++-
 2 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77fb2bd..7a62860 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -499,8 +499,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
 	static int version;
-	struct kvm_wall_clock wc;
-	struct timespec wc_ts;
+	struct pvclock_wall_clock wc;
+	struct timespec now, sys, boot;
 
 	if (!wall_clock)
 		return;
@@ -509,10 +509,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-	wc_ts = current_kernel_time();
-	wc.wc_sec = wc_ts.tv_sec;
-	wc.wc_nsec = wc_ts.tv_nsec;
-	wc.wc_version = version;
+	/*
+	 * The guest calculates current wall clock time by adding
+	 * system time (updated by kvm_write_guest_time below) to the
+	 * wall clock specified here.  guest system time equals host
+	 * system time for us, thus we must fill in host boot time here.
+	 */
+	now = current_kernel_time();
+	ktime_get_ts(&sys);
+	boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+
+	wc.sec = boot.tv_sec;
+	wc.nsec = boot.tv_nsec;
+	wc.version = version;
 
 	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
 
@@ -520,6 +529,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+	uint32_t quotient, remainder;
+
+	/* Don't try to replace with do_div(), this one calculates
+	 * "(dividend << 32) / divisor" */
+	__asm__ ( "divl %4"
+		  : "=a" (quotient), "=d" (remainder)
+		  : "0" (0), "1" (dividend), "r" (divisor) );
+	return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
+{
+	uint64_t nsecs = 1000000000LL;
+	int32_t  shift = 0;
+	uint64_t tps64;
+	uint32_t tps32;
+
+	tps64 = tsc_khz * 1000LL;
+	while (tps64 > nsecs*2) {
+		tps64 >>= 1;
+		shift--;
+	}
+
+	tps32 = (uint32_t)tps64;
+	while (tps32 <= (uint32_t)nsecs) {
+		tps32 <<= 1;
+		shift++;
+	}
+
+	hv_clock->tsc_shift = shift;
+	hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+	pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+		 __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+		 hv_clock->tsc_to_system_mul);
+}
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
 	struct timespec ts;
@@ -530,6 +578,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
+	if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
+		kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = tsc_khz;
+	}
+	
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
 	kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -544,14 +597,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
-	 * state, we just write "2" at the end
+	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version = 2;
+	vcpu->hv_clock.version += 2;
 
 	shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
 	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-		sizeof(vcpu->hv_clock));
+	       sizeof(vcpu->hv_clock));
 
 	kunmap_atomic(shared_kaddr, KM_USER0);
 
@@ -639,10 +692,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		vcpu->arch.hv_clock.tsc_to_system_mul =
-					clocksource_khz2mult(tsc_khz, 22);
-		vcpu->arch.hv_clock.tsc_shift = 22;
-
 		down_read(&current->mm->mmap_sem);
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index cd50380..459461c 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -18,6 +18,7 @@
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
 
+#include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 
 #define KVM_MAX_VCPUS 16
@@ -285,7 +286,8 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 
 	gpa_t time;
-	struct kvm_vcpu_time_info hv_clock;
+	struct pvclock_vcpu_time_info hv_clock;
+	unsigned int hv_clock_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
 
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions.
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
                   ` (2 preceding siblings ...)
  2008-06-03 13:07 ` [PATCH 3/5] Make kvm host use the paravirt clocksource structs Gerd Hoffmann
@ 2008-06-03 13:07 ` Gerd Hoffmann
  2008-06-03 13:07 ` [PATCH 5/5] Remove now unused structs from kvm_para.h Gerd Hoffmann
  2008-06-03 13:42 ` [PATCH 0/5] paravirt clock source patches, #4 Jeremy Fitzhardinge
  5 siblings, 0 replies; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

This patch updates the kvm host code to use the pvclock structs
and functions, thereby making it compatible with Xen.

The patch also fixes an initialization bug: on SMP systems the
per-cpu has two different locations early at boot and after CPU
bringup.  kvmclock must take that in account when registering the
physical address within the host.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/Kconfig           |    1 +
 arch/x86/kernel/kvmclock.c |   91 ++++++++++++++++---------------------------
 2 files changed, 35 insertions(+), 57 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5fc46c1..b631288 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
 config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a3098..927f0c5 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/kvm_para.h>
+#include <asm/pvclock.h>
 #include <asm/arch_hooks.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
 
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
  */
 static unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct pvclock_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
-
-	delta = kvm_clock_read();
-
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+	
+	return ts.tv_sec;
 }
 
 static int kvm_set_wallclock(unsigned long now)
 {
-	return 0;
+	return -1;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
-
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
 
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();
 	int low, high;
 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 	 * Now that the first cpu already had this clocksource initialized,
 	 * we shouldn't fail.
 	 */
-	WARN_ON(kvm_register_clock());
+	WARN_ON(kvm_register_clock("secondary cpu clock"));
 	/* ok, done with our trickery, call native */
 	setup_secondary_APIC_clock();
 }
 #endif
 
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+	WARN_ON(kvm_register_clock("primary cpu clock"));
+	native_smp_prepare_boot_cpu();
+}
+#endif
+
 /*
  * After the clock is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
 		return;
 
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
-		if (kvm_register_clock())
+		if (kvm_register_clock("boot clock"))
 			return;
 		pv_time_ops.get_wallclock = kvm_get_wallclock;
 		pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
 #endif
+#ifdef CONFIG_SMP
+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+#endif
 		machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC
 		machine_ops.crash_shutdown  = kvm_crash_shutdown;
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/5] Remove now unused structs from kvm_para.h
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
                   ` (3 preceding siblings ...)
  2008-06-03 13:07 ` [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions Gerd Hoffmann
@ 2008-06-03 13:07 ` Gerd Hoffmann
  2008-06-03 13:26   ` David Miller
  2008-06-03 13:42 ` [PATCH 0/5] paravirt clock source patches, #4 Jeremy Fitzhardinge
  5 siblings, 1 reply; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:07 UTC (permalink / raw)
  To: kvm-owner, virtualization

The kvm_* structs are obsoleted by the pvclock_* ones.
Now all users have been switched over and the old structs
can be dropped.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/asm-x86/kvm_para.h |   18 ------------------
 1 files changed, 0 insertions(+), 18 deletions(-)

diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 5098459..bfd9900 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt {
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-/* xen binary-compatible interface. See xen headers for details */
-struct kvm_vcpu_time_info {
-	uint32_t version;
-	uint32_t pad0;
-	uint64_t tsc_timestamp;
-	uint64_t system_time;
-	uint32_t tsc_to_system_mul;
-	int8_t   tsc_shift;
-	int8_t	 pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct kvm_wall_clock {
-	uint32_t wc_version;
-	uint32_t wc_sec;
-	uint32_t wc_nsec;
-} __attribute__((__packed__));
-
-
 extern void kvmclock_init(void);
 
 
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] Remove now unused structs from kvm_para.h
  2008-06-03 13:07 ` [PATCH 5/5] Remove now unused structs from kvm_para.h Gerd Hoffmann
@ 2008-06-03 13:26   ` David Miller
  0 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2008-06-03 13:26 UTC (permalink / raw)
  To: kraxel; +Cc: virtualization, kvm-owner


You sent these patches to "kvm-owner", ie. the mailing list owner, and
not the list itself which would be plain "kvm".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] Add structs and functions for paravirt clocksource.
  2008-06-03 13:07 ` [PATCH 1/5] Add structs and functions for paravirt clocksource Gerd Hoffmann
@ 2008-06-03 13:34   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 15+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-03 13:34 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: virtualization, kvm-owner

Gerd Hoffmann wrote:
> +/*
> + * These structs MUST NOT be changed.
> + * They are the ABI between hypervisor and guest OS.
> + * Both Xen and KVM are using this.
> + *
> + * pvclock_vcpu_time_info holds the system time and the tsc timestamp
> + * of the last update. So the guest can use the tsc delta to get a
> + * more precise system time.
>   

You should note that these are per-cpu parameters.

> + *
> + * pvclock_wall_clock references the point in time when the system
> + * time was zero (usually boot time), thus the guest calculates the
> + * current wall clock by adding the system time.
> + *
> + * Protocol for the "version" fields is: hypervisor raises it (making
> + * it uneven) before it starts updating the fields and raises it again
> + * (making it even) when it is done.  Thus the guest can make sure the
> + * time values it got are consistent by checking the version before
> + * and after reading them.
> + */
> +
> +struct pvclock_vcpu_time_info {
> +	uint32_t version;
>   

Use the standard u32/u64 I think (or the __u32 forms, maybe).

> +	uint32_t pad0;
> +	uint64_t tsc_timestamp;
> +	uint64_t system_time;
> +	uint32_t tsc_to_system_mul;
> +	int8_t   tsc_shift;
> +	int8_t	 pad[3];
> +} __attribute__((__packed__)); /* 32 bytes */
> +
> +struct pvclock_wall_clock {
> +	uint32_t version;
> +	uint32_t sec;
> +	uint32_t nsec;
> +} __attribute__((__packed__));
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
> diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
> new file mode 100644
> index 0000000..85b1bba
> --- /dev/null
> +++ b/include/asm-x86/pvclock.h
> @@ -0,0 +1,13 @@
> +#ifndef _ASM_X86_PVCLOCK_H_
> +#define _ASM_X86_PVCLOCK_H_
> +
> +#include <linux/clocksource.h>
> +#include <asm/pvclock-abi.h>
> +
> +/* some helper functions for xen and kvm pv clock sources */
> +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
> +void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
> +			    struct pvclock_vcpu_time_info *vcpu,
> +			    struct timespec *ts);
> +
> +#endif /* _ASM_X86_PVCLOCK_H_ */
>   

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/5] Make xen use the paravirt clocksource structs and functions.
  2008-06-03 13:07 ` [PATCH 2/5] Make xen use the paravirt clocksource structs and functions Gerd Hoffmann
@ 2008-06-03 13:41   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 15+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-03 13:41 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: virtualization, kvm-owner

Gerd Hoffmann wrote:
> This patch updates the xen guest to use the pvclock structs
> and helper functions.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  arch/x86/xen/Kconfig        |    1 +
>  arch/x86/xen/time.c         |  130 ++++---------------------------------------
>  include/xen/interface/xen.h |    5 +-
>  3 files changed, 14 insertions(+), 122 deletions(-)
>
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index 2e641be..3a4f16a 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -5,6 +5,7 @@
>  config XEN
>  	bool "Xen guest support"
>  	select PARAVIRT
> +	select PARAVIRT_CLOCK
>  	depends on X86_32
>  	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
>  	help
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index c39e1a5..bf31fb5 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -13,6 +13,7 @@
>  #include <linux/clockchips.h>
>  #include <linux/kernel_stat.h>
>  
> +#include <asm/pvclock.h>
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  
> @@ -30,17 +31,6 @@
>  
>  static cycle_t xen_clocksource_read(void);
>  
> -/* These are perodically updated in shared_info, and then copied here. */
> -struct shadow_time_info {
> -	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
> -	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
> -	u32 tsc_to_nsec_mul;
> -	int tsc_shift;
> -	u32 version;
> -};
> -
> -static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
> -
>  /* runstate info updated by Xen */
>  static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
>  
> @@ -230,121 +220,26 @@ unsigned long xen_cpu_khz(void)
>  	return xen_khz;
>  }
>  
> -/*
> - * Reads a consistent set of time-base values from Xen, into a shadow data
> - * area.
> - */
> -static unsigned get_time_values_from_xen(void)
> -{
> -	struct vcpu_time_info   *src;
> -	struct shadow_time_info *dst;
> -
> -	/* src is shared memory with the hypervisor, so we need to
> -	   make sure we get a consistent snapshot, even in the face of
> -	   being preempted. */
> -	src = &__get_cpu_var(xen_vcpu)->time;
> -	dst = &__get_cpu_var(shadow_time);
> -
> -	do {
> -		dst->version = src->version;
> -		rmb();		/* fetch version before data */
> -		dst->tsc_timestamp     = src->tsc_timestamp;
> -		dst->system_timestamp  = src->system_time;
> -		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
> -		dst->tsc_shift         = src->tsc_shift;
> -		rmb();		/* test version after fetching data */
> -	} while ((src->version & 1) | (dst->version ^ src->version));
> -
> -	return dst->version;
> -}
> -
> -/*
> - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
> - * yielding a 64-bit result.
> - */
> -static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
> -{
> -	u64 product;
> -#ifdef __i386__
> -	u32 tmp1, tmp2;
> -#endif
> -
> -	if (shift < 0)
> -		delta >>= -shift;
> -	else
> -		delta <<= shift;
> -
> -#ifdef __i386__
> -	__asm__ (
> -		"mul  %5       ; "
> -		"mov  %4,%%eax ; "
> -		"mov  %%edx,%4 ; "
> -		"mul  %5       ; "
> -		"xor  %5,%5    ; "
> -		"add  %4,%%eax ; "
> -		"adc  %5,%%edx ; "
> -		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
> -		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
> -#elif __x86_64__
> -	__asm__ (
> -		"mul %%rdx ; shrd $32,%%rdx,%%rax"
> -		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
> -#else
> -#error implement me!
> -#endif
> -
> -	return product;
> -}
> -
> -static u64 get_nsec_offset(struct shadow_time_info *shadow)
> -{
> -	u64 now, delta;
> -	now = native_read_tsc();
> -	delta = now - shadow->tsc_timestamp;
> -	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
> -}
> -
>  static cycle_t xen_clocksource_read(void)
>  {
> -	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
> +        struct vcpu_time_info *src;
>  	cycle_t ret;
> -	unsigned version;
> -
> -	do {
> -		version = get_time_values_from_xen();
> -		barrier();
> -		ret = shadow->system_timestamp + get_nsec_offset(shadow);
> -		barrier();
> -	} while (version != __get_cpu_var(xen_vcpu)->time.version);
> -
> -	put_cpu_var(shadow_time);
>  
> +	src = &get_cpu_var(xen_vcpu)->time;
> +	ret = pvclock_clocksource_read((void*)src);
> +	put_cpu_var(xen_vcpu);
>  	return ret;
>  }
>  
>  static void xen_read_wallclock(struct timespec *ts)
>  {
> -	const struct shared_info *s = HYPERVISOR_shared_info;
> -	u32 version;
> -	u64 delta;
> -	struct timespec now;
> -
> -	/* get wallclock at system boot */
> -	do {
> -		version = s->wc_version;
> -		rmb();		/* fetch version before time */
> -		now.tv_sec  = s->wc_sec;
> -		now.tv_nsec = s->wc_nsec;
> -		rmb();		/* fetch time before checking version */
> -	} while ((s->wc_version & 1) | (version ^ s->wc_version));
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +	struct pvclock_wall_clock *wall_clock = &(s->wc);
> +        struct vcpu_time_info *vcpu_time;
>  
> -	delta = xen_clocksource_read();	/* time since system boot */
> -	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
> -
> -	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
> -	now.tv_sec = delta;
> -
> -	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
> +	vcpu_time = &get_cpu_var(xen_vcpu)->time;
> +	pvclock_read_wallclock(wall_clock, (void*)vcpu_time, ts);
>   

Why this cast here?  Because the corresponding struct in vcpu_info 
hasn't been updated to match?

    J

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] paravirt clock source patches, #4
  2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
                   ` (4 preceding siblings ...)
  2008-06-03 13:07 ` [PATCH 5/5] Remove now unused structs from kvm_para.h Gerd Hoffmann
@ 2008-06-03 13:42 ` Jeremy Fitzhardinge
  2008-06-03 13:46   ` Gerd Hoffmann
  2008-06-03 13:49   ` Gerd Hoffmann
  5 siblings, 2 replies; 15+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-03 13:42 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: virtualization, kvm-owner

Gerd Hoffmann wrote:
> paravirt clock source patches, next round.
>
> There is now a pvclock-abi.h file with the structs and some longish
> comments in it and everybody is switched over to use the stuff in
> there.

This all looks pretty good.  How do you want this to get into the kernel?

    J

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] paravirt clock source patches, #4
  2008-06-03 13:42 ` [PATCH 0/5] paravirt clock source patches, #4 Jeremy Fitzhardinge
@ 2008-06-03 13:46   ` Gerd Hoffmann
  2008-06-03 13:49   ` Gerd Hoffmann
  1 sibling, 0 replies; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:46 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: virtualization

Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> paravirt clock source patches, next round.
>>
>> There is now a pvclock-abi.h file with the structs and some longish
>> comments in it and everybody is switched over to use the stuff in
>> there.
> 
> This all looks pretty good.  How do you want this to get into the kernel?
> 
>    J


-- 
http://kraxel.fedorapeople.org/xenner/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] paravirt clock source patches, #4
  2008-06-03 13:42 ` [PATCH 0/5] paravirt clock source patches, #4 Jeremy Fitzhardinge
  2008-06-03 13:46   ` Gerd Hoffmann
@ 2008-06-03 13:49   ` Gerd Hoffmann
  2008-06-04 14:01     ` Avi Kivity
  1 sibling, 1 reply; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 13:49 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: kvm, virtualization

Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> paravirt clock source patches, next round.
>>
>> There is now a pvclock-abi.h file with the structs and some longish
>> comments in it and everybody is switched over to use the stuff in
>> there.
> 
> This all looks pretty good.  How do you want this to get into the kernel?

[ note: fixed up kvm list address: s/-owner// ]

Good question.  The kvm patches have dependencies on not-yet merged
bits, so they have to go through the kvm queue.  The first two can also
go through Ingos x86 tree I guess.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions.
  2008-06-03 14:17 [PATCH 0/5] paravirt clock source patches, #5 Gerd Hoffmann
@ 2008-06-03 14:17 ` Gerd Hoffmann
  0 siblings, 0 replies; 15+ messages in thread
From: Gerd Hoffmann @ 2008-06-03 14:17 UTC (permalink / raw)
  To: kvm, virtualization; +Cc: Gerd Hoffmann

This patch updates the kvm host code to use the pvclock structs
and functions, thereby making it compatible with Xen.

The patch also fixes an initialization bug: on SMP systems the
per-cpu has two different locations early at boot and after CPU
bringup.  kvmclock must take that in account when registering the
physical address within the host.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/Kconfig           |    1 +
 arch/x86/kernel/kvmclock.c |   91 ++++++++++++++++---------------------------
 2 files changed, 35 insertions(+), 57 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5fc46c1..b631288 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
 config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a3098..927f0c5 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/kvm_para.h>
+#include <asm/pvclock.h>
 #include <asm/arch_hooks.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
 
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
  */
 static unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct pvclock_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
-
-	delta = kvm_clock_read();
-
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+	
+	return ts.tv_sec;
 }
 
 static int kvm_set_wallclock(unsigned long now)
 {
-	return 0;
+	return -1;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
-
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
 
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();
 	int low, high;
 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 	 * Now that the first cpu already had this clocksource initialized,
 	 * we shouldn't fail.
 	 */
-	WARN_ON(kvm_register_clock());
+	WARN_ON(kvm_register_clock("secondary cpu clock"));
 	/* ok, done with our trickery, call native */
 	setup_secondary_APIC_clock();
 }
 #endif
 
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+	WARN_ON(kvm_register_clock("primary cpu clock"));
+	native_smp_prepare_boot_cpu();
+}
+#endif
+
 /*
  * After the clock is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
 		return;
 
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
-		if (kvm_register_clock())
+		if (kvm_register_clock("boot clock"))
 			return;
 		pv_time_ops.get_wallclock = kvm_get_wallclock;
 		pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
 #endif
+#ifdef CONFIG_SMP
+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+#endif
 		machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC
 		machine_ops.crash_shutdown  = kvm_crash_shutdown;
-- 
1.5.4.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] paravirt clock source patches, #4
  2008-06-03 13:49   ` Gerd Hoffmann
@ 2008-06-04 14:01     ` Avi Kivity
  2008-06-16 13:53       ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-06-04 14:01 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Jeremy Fitzhardinge, kvm, virtualization, Ingo Molnar

Gerd Hoffmann wrote:
> Jeremy Fitzhardinge wrote:
>   
>> Gerd Hoffmann wrote:
>>     
>>> paravirt clock source patches, next round.
>>>
>>> There is now a pvclock-abi.h file with the structs and some longish
>>> comments in it and everybody is switched over to use the stuff in
>>> there.
>>>       
>> This all looks pretty good.  How do you want this to get into the kernel?
>>     
>
> [ note: fixed up kvm list address: s/-owner// ]
>
> Good question.  The kvm patches have dependencies on not-yet merged
> bits, so they have to go through the kvm queue.  The first two can also
> go through Ingos x86 tree I guess.
>
>   

Alternativey, if Ingo acks, I'll send all five through kvm.git.

Note the kvm specific patches need to be backported as we want them for 
2.6.26.  I can do that.

(Ingo: said patches are in 
http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18149)

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] paravirt clock source patches, #4
  2008-06-04 14:01     ` Avi Kivity
@ 2008-06-16 13:53       ` Avi Kivity
  0 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2008-06-16 13:53 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Jeremy Fitzhardinge, kvm, virtualization, Ingo Molnar

Avi Kivity wrote:     
>>> This all looks pretty good.  How do you want this to get into the 
>>> kernel?
>>>     
>>
>> [ note: fixed up kvm list address: s/-owner// ]
>>
>> Good question.  The kvm patches have dependencies on not-yet merged
>> bits, so they have to go through the kvm queue.  The first two can also
>> go through Ingos x86 tree I guess.
>>
>>   
>
> Alternativey, if Ingo acks, I'll send all five through kvm.git.
>
> Note the kvm specific patches need to be backported as we want them 
> for 2.6.26.  I can do that.
>
> (Ingo: said patches are in 
> http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18149)
>

What's happening with this?  Is it going through x86.git or what?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-06-16 13:53 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-03 13:07 [PATCH 0/5] paravirt clock source patches, #4 Gerd Hoffmann
2008-06-03 13:07 ` [PATCH 1/5] Add structs and functions for paravirt clocksource Gerd Hoffmann
2008-06-03 13:34   ` Jeremy Fitzhardinge
2008-06-03 13:07 ` [PATCH 2/5] Make xen use the paravirt clocksource structs and functions Gerd Hoffmann
2008-06-03 13:41   ` Jeremy Fitzhardinge
2008-06-03 13:07 ` [PATCH 3/5] Make kvm host use the paravirt clocksource structs Gerd Hoffmann
2008-06-03 13:07 ` [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions Gerd Hoffmann
2008-06-03 13:07 ` [PATCH 5/5] Remove now unused structs from kvm_para.h Gerd Hoffmann
2008-06-03 13:26   ` David Miller
2008-06-03 13:42 ` [PATCH 0/5] paravirt clock source patches, #4 Jeremy Fitzhardinge
2008-06-03 13:46   ` Gerd Hoffmann
2008-06-03 13:49   ` Gerd Hoffmann
2008-06-04 14:01     ` Avi Kivity
2008-06-16 13:53       ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2008-06-03 14:17 [PATCH 0/5] paravirt clock source patches, #5 Gerd Hoffmann
2008-06-03 14:17 ` [PATCH 4/5] Make kvm guest use the paravirt clocksource structs and functions Gerd Hoffmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).