* [PATCH 0/2 -v(many)] kvmclock
@ 2008-02-15 19:52 Glauber de Oliveira Costa
2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa
2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity
0 siblings, 2 replies; 4+ messages in thread
From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw)
To: kvm-devel; +Cc: chrisw, avi
I think this version addresses avi's last comments.
I'm not resending userspace since it is unchanged
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] [PATCH] kvmclock - the host part.
2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa
@ 2008-02-15 19:52 ` Glauber de Oliveira Costa
2008-02-15 19:52 ` [PATCH 2/2] [PATCH] kvmclock implementation, the guest part Glauber de Oliveira Costa
2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity
1 sibling, 1 reply; 4+ messages in thread
From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw)
To: kvm-devel; +Cc: chrisw, avi, Glauber de Oliveira Costa
This is the host part of kvm clocksource implementation. As it does
not include clockevents, it is a fairly simple implementation. We
only have to register a per-vcpu area, and start writting to it periodically.
The area is binary compatible with xen, as we use the same shadow_info structure.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kvm/x86.c | 96 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm_host.h | 7 +++
include/asm-x86/kvm_para.h | 25 +++++++++++
include/linux/kvm.h | 1
4 files changed, 128 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c910c7..5dfc21f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -19,6 +19,7 @@ #include "segment_descriptor.h"
#include "irq.h"
#include "mmu.h"
+#include <linux/clocksource.h>
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
@@ -424,7 +425,7 @@ static u32 msrs_to_save[] = {
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TIME_STAMP_COUNTER,
+ MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
};
static unsigned num_msrs_to_save;
@@ -482,6 +483,70 @@ static int do_set_msr(struct kvm_vcpu *v
return kvm_set_msr(vcpu, index, *data);
}
+static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+{
+ static int version;
+ struct kvm_wall_clock wc;
+ struct timespec wc_ts;
+
+ if (!wall_clock)
+ return
+
+ mutex_lock(&kvm->lock);
+
+ version++;
+ kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
+ wc_ts = current_kernel_time();
+ wc.wc_sec = wc_ts.tv_sec;
+ wc.wc_nsec = wc_ts.tv_nsec;
+ wc.wc_version = version;
+ kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+
+ version++;
+ kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
+ mutex_unlock(&kvm->lock);
+}
+
+static void kvm_write_guest_time(struct kvm_vcpu *v)
+{
+ struct timespec ts;
+ unsigned long flags;
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+ void *shared_kaddr;
+
+ if ((!vcpu->time_page))
+ return;
+
+ /* Keep irq disabled to prevent changes to the clock */
+ local_irq_save(flags);
+ kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
+ &vcpu->hv_clock.tsc_timestamp);
+ ktime_get_ts(&ts);
+ local_irq_restore(flags);
+
+ /* With all the info we got, fill in the values */
+
+ vcpu->hv_clock.system_time = ts.tv_nsec +
+ (NSEC_PER_SEC * (u64)ts.tv_sec);
+ /*
+ * The interface expects us to write an even number signaling that the
+ * update is finished. Since the guest won't see the intermediate
+ * state, we just write "2" at the end
+ */
+ vcpu->hv_clock.version = 2;
+
+ shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+
+ memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
+
+ kunmap_atomic(shared_kaddr, KM_USER0);
+
+ mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
@@ -511,6 +576,27 @@ int kvm_set_msr_common(struct kvm_vcpu *
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
break;
+ case MSR_KVM_WALL_CLOCK:
+ vcpu->kvm->arch.wall_clock = data;
+ kvm_write_wall_clock(vcpu->kvm, data);
+ break;
+ case MSR_KVM_SYSTEM_TIME: {
+ vcpu->arch.time = data & PAGE_MASK;
+ vcpu->arch.time_offset = data & ~PAGE_MASK;
+
+ vcpu->arch.hv_clock.tsc_to_system_mul =
+ clocksource_khz2mult(tsc_khz, 22);
+ vcpu->arch.hv_clock.tsc_shift = 22;
+
+ down_read(¤t->mm->mmap_sem);
+ vcpu->arch.time_page =
+ gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ up_read(¤t->mm->mmap_sem);
+
+ if (is_error_page(vcpu->arch.time_page))
+ vcpu->arch.time_page = NULL;
+ break;
+ }
default:
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
return 1;
@@ -569,6 +655,12 @@ int kvm_get_msr_common(struct kvm_vcpu *
case MSR_EFER:
data = vcpu->arch.shadow_efer;
break;
+ case MSR_KVM_WALL_CLOCK:
+ data = vcpu->kvm->arch.wall_clock;
+ break;
+ case MSR_KVM_SYSTEM_TIME:
+ data = vcpu->arch.time;
+ break;
default:
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -696,6 +788,7 @@ int kvm_dev_ioctl_check_extension(long e
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_CLOCKSOURCE:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -771,6 +864,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ kvm_write_guest_time(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index da61255..0c429c8 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -261,6 +261,11 @@ #define VCPU_MP_STATE_HALTED
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
+
+ gpa_t time;
+ struct kvm_vcpu_time_info hv_clock;
+ unsigned int time_offset;
+ struct page *time_page;
};
struct kvm_mem_alias {
@@ -287,6 +292,8 @@ struct kvm_arch{
int round_robin_prev_vcpu;
unsigned int tss_addr;
struct page *apic_access_page;
+
+ gpa_t wall_clock;
};
struct kvm_vm_stat {
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index c6f3fd8..481bf18 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -10,10 +10,35 @@ #define KVM_CPUID_SIGNATURE 0x40000000
* paravirtualization, the appropriate feature bit should be checked.
*/
#define KVM_CPUID_FEATURES 0x40000001
+#define KVM_FEATURE_CLOCKSOURCE 0
+
+#define MSR_KVM_WALL_CLOCK 0x11
+#define MSR_KVM_SYSTEM_TIME 0x12
#ifdef __KERNEL__
#include <asm/processor.h>
+/* xen binary-compatible interface. See xen headers for details */
+struct kvm_vcpu_time_info {
+ uint32_t version;
+ uint32_t pad0;
+ uint64_t tsc_timestamp;
+ uint64_t system_time;
+ uint32_t tsc_to_system_mul;
+ int8_t tsc_shift;
+ int8_t pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct kvm_wall_clock {
+ uint32_t wc_version;
+ uint32_t wc_sec;
+ uint32_t wc_nsec;
+};
+
+
+extern void kvmclock_init(void);
+
+
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
*/
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c1ec04f..94540b3 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -233,6 +233,7 @@ #define KVM_CAP_USER_MEMORY 3
#define KVM_CAP_SET_TSS_ADDR 4
#define KVM_CAP_VAPIC 6
#define KVM_CAP_EXT_CPUID 7
+#define KVM_CAP_CLOCKSOURCE 8
/*
* ioctls for VM fds
--
1.4.2
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] [PATCH] kvmclock implementation, the guest part.
2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa
@ 2008-02-15 19:52 ` Glauber de Oliveira Costa
0 siblings, 0 replies; 4+ messages in thread
From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw)
To: kvm-devel; +Cc: chrisw, avi, Glauber de Oliveira Costa
This is the guest part of kvm clock implementation
It does not do tsc-only timing, as tsc can have deltas
between cpus, and it did not seem worthy to me to keep
adjusting them.
We do use it, however, for fine-grained adjustment.
Other than that, time comes from the host.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/Kconfig | 10 +++
arch/x86/kernel/Makefile | 1
arch/x86/kernel/kvmclock.c | 161 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/setup_32.c | 5 +
arch/x86/kernel/setup_64.c | 5 +
5 files changed, 182 insertions(+), 0 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3be2305..cc2bc37 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -372,6 +372,16 @@ config VMI
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+config KVM_CLOCK
+ bool "KVM paravirtualized clock"
+ select PARAVIRT
+ help
+ Turning on this option will allow you to run a paravirtualized clock
+ when running over the KVM hypervisor. Instead of relying on a PIT
+ (or probably other) emulation by the underlying device model, the host
+ provides the guest with timing infrastructure such as time of day, and
+ system time
+
source "arch/x86/lguest/Kconfig"
config PARAVIRT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 76ec0f8..5b91b82 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 0000000..b8da3bf
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,161 @@
+/* KVM paravirtual clock driver. A clocksource implementation
+ Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/arch_hooks.h>
+#include <asm/msr.h>
+#include <xen/interface/xen.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+static int parse_no_kvmclock(char *arg)
+{
+ kvmclock = 0;
+ return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+struct shared_info shared_info __attribute__((__aligned__(PAGE_SIZE)));
+
+/* The hypervisor will put information about time periodically here */
+static struct kvm_vcpu_time_info hv_clock[NR_CPUS];
+#define get_clock(cpu, field) hv_clock[cpu].field
+
+static inline u64 kvm_get_delta(u64 last_tsc)
+{
+ int cpu = smp_processor_id();
+ u64 delta = native_read_tsc() - last_tsc;
+ return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
+}
+
+static struct kvm_wall_clock wall_clock;
+static cycle_t kvm_clock_read(void);
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+ u32 wc_sec, wc_nsec;
+ u64 delta;
+ struct timespec ts;
+ int version, nsec;
+ int low, high;
+
+ low = (int)__pa(&wall_clock);
+ high = ((u64)__pa(&wall_clock) >> 32);
+
+ delta = kvm_clock_read();
+
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+ do {
+ version = wall_clock.wc_version;
+ rmb();
+ wc_sec = wall_clock.wc_sec;
+ wc_nsec = wall_clock.wc_nsec;
+ rmb();
+ } while ((wall_clock.wc_version != version) || (version & 1));
+
+ delta = kvm_clock_read() - delta;
+ delta += wc_nsec;
+ nsec = do_div(delta, NSEC_PER_SEC);
+ set_normalized_timespec(&ts, wc_sec + delta, nsec);
+ /*
+ * Of all mechanisms of time adjustment I've tested, this one
+ * was the champion!
+ */
+ return ts.tv_sec + 1;
+}
+
+int kvm_set_wallclock(unsigned long now)
+{
+ return 0;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+static cycle_t kvm_clock_read(void)
+{
+ u64 last_tsc, now;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ last_tsc = get_clock(cpu, tsc_timestamp);
+ now = get_clock(cpu, system_time);
+
+ now += kvm_get_delta(last_tsc);
+ preempt_enable();
+
+ return now;
+}
+static struct clocksource kvm_clock = {
+ .name = "kvm-clock",
+ .read = kvm_clock_read,
+ .rating = 400,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 1 << KVM_SCALE,
+ .shift = KVM_SCALE,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int kvm_register_clock(void)
+{
+ int cpu = smp_processor_id();
+ int low, high;
+ low = (int)__pa(&hv_clock[cpu]);
+ high = ((u64)__pa(&hv_clock[cpu]) >> 32);
+
+ return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+static void kvm_setup_secondary_clock(void)
+{
+ /*
+ * Now that the first cpu already had this clocksource initialized,
+ * we shouldn't fail.
+ */
+ WARN_ON(kvm_register_clock());
+ /* ok, done with our trickery, call native */
+ setup_secondary_APIC_clock();
+}
+
+void __init kvmclock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ if (kvm_register_clock())
+ return;
+ pv_time_ops.get_wallclock = kvm_get_wallclock;
+ pv_time_ops.set_wallclock = kvm_set_wallclock;
+ pv_time_ops.sched_clock = kvm_clock_read;
+ pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+ clocksource_register(&kvm_clock);
+ }
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 691ab4c..988da57 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -46,6 +46,7 @@ #include <linux/dmi.h>
#include <linux/pfn.h>
#include <linux/pci.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <video/edid.h>
@@ -772,6 +773,10 @@ #endif
if (mtrr_trim_uncached_memory(max_pfn))
max_low_pfn = setup_memory();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_VMI
/*
* Must be after max_low_pfn is determined, and before kernel
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c0d8208..cdf9578 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -41,6 +41,7 @@ #include <linux/dma-mapping.h>
#include <linux/ctype.h>
#include <linux/uaccess.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
@@ -349,6 +350,10 @@ #endif
io_delay_init();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_SMP
/* setup to use the early static init tables during kernel startup */
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
--
1.4.2
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 0/2 -v(many)] kvmclock
2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa
2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa
@ 2008-02-17 9:48 ` Avi Kivity
1 sibling, 0 replies; 4+ messages in thread
From: Avi Kivity @ 2008-02-17 9:48 UTC (permalink / raw)
To: Glauber de Oliveira Costa; +Cc: kvm-devel, chrisw
Glauber de Oliveira Costa wrote:
> I think this version addresses avi's last comments.
> I'm not resending userspace since it is unchanged
>
Applied, thanks. Added structure packing for kvm_wall_clock.
--
Any sufficiently difficult bug is indistinguishable from a feature.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2008-02-17 9:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa
2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa
2008-02-15 19:52 ` [PATCH 2/2] [PATCH] kvmclock implementation, the guest part Glauber de Oliveira Costa
2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox