* [PATCH 0/2 -v(many)] kvmclock @ 2008-02-15 19:52 Glauber de Oliveira Costa 2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa 2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity 0 siblings, 2 replies; 4+ messages in thread From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw) To: kvm-devel; +Cc: chrisw, avi I think this version addresses avi's last comments. I'm not resending userspace since it is unchanged ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] [PATCH] kvmclock - the host part. 2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa @ 2008-02-15 19:52 ` Glauber de Oliveira Costa 2008-02-15 19:52 ` [PATCH 2/2] [PATCH] kvmclock implementation, the guest part Glauber de Oliveira Costa 2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity 1 sibling, 1 reply; 4+ messages in thread From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw) To: kvm-devel; +Cc: chrisw, avi, Glauber de Oliveira Costa This is the host part of kvm clocksource implementation. As it does not include clockevents, it is a fairly simple implementation. We only have to register a per-vcpu area, and start writting to it periodically. The area is binary compatible with xen, as we use the same shadow_info structure. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> --- arch/x86/kvm/x86.c | 96 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/kvm_host.h | 7 +++ include/asm-x86/kvm_para.h | 25 +++++++++++ include/linux/kvm.h | 1 4 files changed, 128 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c910c7..5dfc21f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -19,6 +19,7 @@ #include "segment_descriptor.h" #include "irq.h" #include "mmu.h" +#include <linux/clocksource.h> #include <linux/kvm.h> #include <linux/fs.h> #include <linux/vmalloc.h> @@ -424,7 +425,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TIME_STAMP_COUNTER, + MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, }; static unsigned num_msrs_to_save; @@ -482,6 +483,70 @@ static int do_set_msr(struct kvm_vcpu *v return kvm_set_msr(vcpu, index, *data); } +static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) +{ + static int version; + struct kvm_wall_clock wc; + struct timespec wc_ts; + + if (!wall_clock) + return + + mutex_lock(&kvm->lock); + + version++; + kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + + wc_ts = current_kernel_time(); + wc.wc_sec = wc_ts.tv_sec; + wc.wc_nsec = wc_ts.tv_nsec; + wc.wc_version = version; + kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); + + version++; + kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + + mutex_unlock(&kvm->lock); +} + +static void kvm_write_guest_time(struct kvm_vcpu *v) +{ + struct timespec ts; + unsigned long flags; + struct kvm_vcpu_arch *vcpu = &v->arch; + void *shared_kaddr; + + if ((!vcpu->time_page)) + return; + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, + &vcpu->hv_clock.tsc_timestamp); + ktime_get_ts(&ts); + local_irq_restore(flags); + + /* With all the info we got, fill in the values */ + + vcpu->hv_clock.system_time = ts.tv_nsec + + (NSEC_PER_SEC * (u64)ts.tv_sec); + /* + * The interface expects us to write an even number signaling that the + * update is finished. Since the guest won't see the intermediate + * state, we just write "2" at the end + */ + vcpu->hv_clock.version = 2; + + shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); + + memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); + + kunmap_atomic(shared_kaddr, KM_USER0); + + mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { @@ -511,6 +576,27 @@ int kvm_set_msr_common(struct kvm_vcpu * case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; + case MSR_KVM_WALL_CLOCK: + vcpu->kvm->arch.wall_clock = data; + kvm_write_wall_clock(vcpu->kvm, data); + break; + case MSR_KVM_SYSTEM_TIME: { + vcpu->arch.time = data & PAGE_MASK; + vcpu->arch.time_offset = data & ~PAGE_MASK; + + vcpu->arch.hv_clock.tsc_to_system_mul = + clocksource_khz2mult(tsc_khz, 22); + vcpu->arch.hv_clock.tsc_shift = 22; + + down_read(¤t->mm->mmap_sem); + vcpu->arch.time_page = + gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + if (is_error_page(vcpu->arch.time_page)) + vcpu->arch.time_page = NULL; + break; + } default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); return 1; @@ -569,6 +655,12 @@ int kvm_get_msr_common(struct kvm_vcpu * case MSR_EFER: data = vcpu->arch.shadow_efer; break; + case MSR_KVM_WALL_CLOCK: + data = vcpu->kvm->arch.wall_clock; + break; + case MSR_KVM_SYSTEM_TIME: + data = vcpu->arch.time; + break; default: pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; @@ -696,6 +788,7 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: r = 1; break; case KVM_CAP_VAPIC: @@ -771,6 +864,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); + kvm_write_guest_time(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index da61255..0c429c8 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -261,6 +261,11 @@ #define VCPU_MP_STATE_HALTED /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + + gpa_t time; + struct kvm_vcpu_time_info hv_clock; + unsigned int time_offset; + struct page *time_page; }; struct kvm_mem_alias { @@ -287,6 +292,8 @@ struct kvm_arch{ int round_robin_prev_vcpu; unsigned int tss_addr; struct page *apic_access_page; + + gpa_t wall_clock; }; struct kvm_vm_stat { diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index c6f3fd8..481bf18 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -10,10 +10,35 @@ #define KVM_CPUID_SIGNATURE 0x40000000 * paravirtualization, the appropriate feature bit should be checked. */ #define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 #ifdef __KERNEL__ #include <asm/processor.h> +/* xen binary-compatible interface. See xen headers for details */ +struct kvm_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct kvm_wall_clock { + uint32_t wc_version; + uint32_t wc_sec; + uint32_t wc_nsec; +}; + + +extern void kvmclock_init(void); + + /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c1ec04f..94540b3 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -233,6 +233,7 @@ #define KVM_CAP_USER_MEMORY 3 #define KVM_CAP_SET_TSS_ADDR 4 #define KVM_CAP_VAPIC 6 #define KVM_CAP_EXT_CPUID 7 +#define KVM_CAP_CLOCKSOURCE 8 /* * ioctls for VM fds -- 1.4.2 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] [PATCH] kvmclock implementation, the guest part. 2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa @ 2008-02-15 19:52 ` Glauber de Oliveira Costa 0 siblings, 0 replies; 4+ messages in thread From: Glauber de Oliveira Costa @ 2008-02-15 19:52 UTC (permalink / raw) To: kvm-devel; +Cc: chrisw, avi, Glauber de Oliveira Costa This is the guest part of kvm clock implementation It does not do tsc-only timing, as tsc can have deltas between cpus, and it did not seem worthy to me to keep adjusting them. We do use it, however, for fine-grained adjustment. Other than that, time comes from the host. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> --- arch/x86/Kconfig | 10 +++ arch/x86/kernel/Makefile | 1 arch/x86/kernel/kvmclock.c | 161 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_32.c | 5 + arch/x86/kernel/setup_64.c | 5 + 5 files changed, 182 insertions(+), 0 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3be2305..cc2bc37 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -372,6 +372,16 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. +config KVM_CLOCK + bool "KVM paravirtualized clock" + select PARAVIRT + help + Turning on this option will allow you to run a paravirtualized clock + when running over the KVM hypervisor. Instead of relying on a PIT + (or probably other) emulation by the underlying device model, the host + provides the guest with timing infrastructure such as time of day, and + system time + source "arch/x86/lguest/Kconfig" config PARAVIRT diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 76ec0f8..5b91b82 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o ifdef CONFIG_INPUT_PCSPKR diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c new file mode 100644 index 0000000..b8da3bf --- /dev/null +++ b/arch/x86/kernel/kvmclock.c @@ -0,0 +1,161 @@ +/* KVM paravirtual clock driver. A clocksource implementation + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <linux/clocksource.h> +#include <linux/kvm_para.h> +#include <asm/arch_hooks.h> +#include <asm/msr.h> +#include <xen/interface/xen.h> + +#define KVM_SCALE 22 + +static int kvmclock = 1; + +static int parse_no_kvmclock(char *arg) +{ + kvmclock = 0; + return 0; +} +early_param("no-kvmclock", parse_no_kvmclock); + +struct shared_info shared_info __attribute__((__aligned__(PAGE_SIZE))); + +/* The hypervisor will put information about time periodically here */ +static struct kvm_vcpu_time_info hv_clock[NR_CPUS]; +#define get_clock(cpu, field) hv_clock[cpu].field + +static inline u64 kvm_get_delta(u64 last_tsc) +{ + int cpu = smp_processor_id(); + u64 delta = native_read_tsc() - last_tsc; + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; +} + +static struct kvm_wall_clock wall_clock; +static cycle_t kvm_clock_read(void); +/* + * The wallclock is the time of day when we booted. Since then, some time may + * have elapsed since the hypervisor wrote the data. So we try to account for + * that with system time + */ +unsigned long kvm_get_wallclock(void) +{ + u32 wc_sec, wc_nsec; + u64 delta; + struct timespec ts; + int version, nsec; + int low, high; + + low = (int)__pa(&wall_clock); + high = ((u64)__pa(&wall_clock) >> 32); + + delta = kvm_clock_read(); + + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); + do { + version = wall_clock.wc_version; + rmb(); + wc_sec = wall_clock.wc_sec; + wc_nsec = wall_clock.wc_nsec; + rmb(); + } while ((wall_clock.wc_version != version) || (version & 1)); + + delta = kvm_clock_read() - delta; + delta += wc_nsec; + nsec = do_div(delta, NSEC_PER_SEC); + set_normalized_timespec(&ts, wc_sec + delta, nsec); + /* + * Of all mechanisms of time adjustment I've tested, this one + * was the champion! + */ + return ts.tv_sec + 1; +} + +int kvm_set_wallclock(unsigned long now) +{ + return 0; +} + +/* + * This is our read_clock function. The host puts an tsc timestamp each time + * it updates a new time. Without the tsc adjustment, we can have a situation + * in which a vcpu starts to run earlier (smaller system_time), but probes + * time later (compared to another vcpu), leading to backwards time + */ +static cycle_t kvm_clock_read(void) +{ + u64 last_tsc, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + + last_tsc = get_clock(cpu, tsc_timestamp); + now = get_clock(cpu, system_time); + + now += kvm_get_delta(last_tsc); + preempt_enable(); + + return now; +} +static struct clocksource kvm_clock = { + .name = "kvm-clock", + .read = kvm_clock_read, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << KVM_SCALE, + .shift = KVM_SCALE, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static int kvm_register_clock(void) +{ + int cpu = smp_processor_id(); + int low, high; + low = (int)__pa(&hv_clock[cpu]); + high = ((u64)__pa(&hv_clock[cpu]) >> 32); + + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); +} + +static void kvm_setup_secondary_clock(void) +{ + /* + * Now that the first cpu already had this clocksource initialized, + * we shouldn't fail. + */ + WARN_ON(kvm_register_clock()); + /* ok, done with our trickery, call native */ + setup_secondary_APIC_clock(); +} + +void __init kvmclock_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { + if (kvm_register_clock()) + return; + pv_time_ops.get_wallclock = kvm_get_wallclock; + pv_time_ops.set_wallclock = kvm_set_wallclock; + pv_time_ops.sched_clock = kvm_clock_read; + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + clocksource_register(&kvm_clock); + } +} diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 691ab4c..988da57 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -46,6 +46,7 @@ #include <linux/dmi.h> #include <linux/pfn.h> #include <linux/pci.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <video/edid.h> @@ -772,6 +773,10 @@ #endif if (mtrr_trim_uncached_memory(max_pfn)) max_low_pfn = setup_memory(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_VMI /* * Must be after max_low_pfn is determined, and before kernel diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c0d8208..cdf9578 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -41,6 +41,7 @@ #include <linux/dma-mapping.h> #include <linux/ctype.h> #include <linux/uaccess.h> #include <linux/init_ohci1394_dma.h> +#include <linux/kvm_para.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -349,6 +350,10 @@ #endif io_delay_init(); +#ifdef CONFIG_KVM_CLOCK + kvmclock_init(); +#endif + #ifdef CONFIG_SMP /* setup to use the early static init tables during kernel startup */ x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; -- 1.4.2 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 0/2 -v(many)] kvmclock 2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa 2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa @ 2008-02-17 9:48 ` Avi Kivity 1 sibling, 0 replies; 4+ messages in thread From: Avi Kivity @ 2008-02-17 9:48 UTC (permalink / raw) To: Glauber de Oliveira Costa; +Cc: kvm-devel, chrisw Glauber de Oliveira Costa wrote: > I think this version addresses avi's last comments. > I'm not resending userspace since it is unchanged > Applied, thanks. Added structure packing for kvm_wall_clock. -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2008-02-17 9:48 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-02-15 19:52 [PATCH 0/2 -v(many)] kvmclock Glauber de Oliveira Costa 2008-02-15 19:52 ` [PATCH 1/2] [PATCH] kvmclock - the host part Glauber de Oliveira Costa 2008-02-15 19:52 ` [PATCH 2/2] [PATCH] kvmclock implementation, the guest part Glauber de Oliveira Costa 2008-02-17 9:48 ` [PATCH 0/2 -v(many)] kvmclock Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox