* [patch 0/5] report stolen time via pvclock
@ 2009-10-16 4:08 Marcelo Tosatti
2009-10-16 4:08 ` [patch 1/5] KVM: x86: report stolen time Marcelo Tosatti
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel
Stolen time can be useful diagnostic information when available to
guests. Xen provides it for sometime, so recent vmstat versions
already display it.
Also increases guests sched_clock accuracy.
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 1/5] KVM: x86: report stolen time
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
@ 2009-10-16 4:08 ` Marcelo Tosatti
2009-10-16 4:08 ` [patch 2/5] pvclock: move code to pvclock.h Marcelo Tosatti
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel, Marcelo Tosatti
[-- Attachment #1: kvm-stolen-time-host --]
[-- Type: text/plain, Size: 4967 bytes --]
Report stolen time (run_delay field from schedstat) to guests via
pvclock.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/kvm_para.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_para.h
+++ kvm/arch/x86/include/asm/kvm_para.h
@@ -15,9 +15,11 @@
#define KVM_FEATURE_CLOCKSOURCE 0
#define KVM_FEATURE_NOP_IO_DELAY 1
#define KVM_FEATURE_MMU_OP 2
+#define KVM_FEATURE_RUNTIME_INFO 3
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
+#define MSR_KVM_RUN_TIME 0x13
#define KVM_MAX_MMU_OP_BATCH 32
@@ -50,6 +52,11 @@ struct kvm_mmu_op_release_pt {
#ifdef __KERNEL__
#include <asm/processor.h>
+struct kvm_vcpu_runtime_info {
+ u64 stolen_time; /* time spent starving */
+ u64 reserved[3]; /* for future use */
+};
+
extern void kvmclock_init(void);
Index: kvm/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_host.h
+++ kvm/arch/x86/include/asm/kvm_host.h
@@ -354,6 +354,10 @@ struct kvm_vcpu_arch {
unsigned int time_offset;
struct page *time_page;
+ bool stolen_time_enable;
+ struct kvm_vcpu_runtime_info stolen_time;
+ unsigned int stolen_time_offset;
+
bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending;
bool nmi_injected;
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -507,9 +507,9 @@ static inline u32 bit(int bitno)
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 2
+#define KVM_SAVE_MSRS_BEGIN 3
static u32 msrs_to_save[] = {
- MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+ MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_RUN_TIME,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_K6_STAR,
#ifdef CONFIG_X86_64
@@ -679,6 +679,7 @@ static void kvm_write_guest_time(struct
struct kvm_vcpu_arch *vcpu = &v->arch;
void *shared_kaddr;
unsigned long this_tsc_khz;
+ struct task_struct *task = current;
if ((!vcpu->time_page))
return;
@@ -700,6 +701,9 @@ static void kvm_write_guest_time(struct
vcpu->hv_clock.system_time = ts.tv_nsec +
(NSEC_PER_SEC * (u64)ts.tv_sec);
+
+ vcpu->stolen_time.stolen_time = task->sched_info.run_delay;
+
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
@@ -712,6 +716,10 @@ static void kvm_write_guest_time(struct
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
sizeof(vcpu->hv_clock));
+ if (vcpu->stolen_time_enable)
+ memcpy(shared_kaddr + vcpu->stolen_time_offset,
+ &vcpu->stolen_time, sizeof(vcpu->stolen_time));
+
kunmap_atomic(shared_kaddr, KM_USER0);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
@@ -937,6 +945,35 @@ int kvm_set_msr_common(struct kvm_vcpu *
kvm_request_guest_time_update(vcpu);
break;
}
+ case MSR_KVM_RUN_TIME: {
+ struct page *page;
+ unsigned int stolen_time_offset;
+
+ if (!vcpu->arch.time_page)
+ return 1;
+
+ /* we verify if the enable bit is set... */
+ if (!(data & 1))
+ break;
+
+ /* ...but clean it before doing the actual write */
+ stolen_time_offset = data & ~(PAGE_MASK | 1);
+
+ /* that it matches the hvclock page */
+ page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ if (is_error_page(page)) {
+ kvm_release_page_clean(page);
+ return 1;
+ }
+ if (page != vcpu->arch.time_page) {
+ kvm_release_page_clean(page);
+ return 1;
+ }
+ kvm_release_page_clean(page);
+ vcpu->arch.stolen_time_offset = stolen_time_offset;
+ vcpu->arch.stolen_time_enable = 1;
+ break;
+ }
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
@@ -1246,6 +1283,7 @@ int kvm_dev_ioctl_check_extension(long e
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
+ case KVM_CAP_PVCLOCK_RUNTIME:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
Index: kvm/arch/x86/kvm/Kconfig
===================================================================
--- kvm.orig/arch/x86/kvm/Kconfig
+++ kvm/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
+ select SCHEDSTATS
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -436,6 +436,7 @@ struct kvm_ioeventfd {
#endif
#define KVM_CAP_IOEVENTFD 36
#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#define KVM_CAP_PVCLOCK_RUNTIME 38
#ifdef KVM_CAP_IRQ_ROUTING
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 2/5] pvclock: move code to pvclock.h
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
2009-10-16 4:08 ` [patch 1/5] KVM: x86: report stolen time Marcelo Tosatti
@ 2009-10-16 4:08 ` Marcelo Tosatti
2009-10-16 4:08 ` [patch 3/5] kvmclock: stolen time aware sched_clock Marcelo Tosatti
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel, Marcelo Tosatti
[-- Attachment #1: pvclock-export --]
[-- Type: text/plain, Size: 2739 bytes --]
To be used by kvmclock.c.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/include/asm/pvclock.h
===================================================================
--- kvm.orig/arch/x86/include/asm/pvclock.h
+++ kvm/arch/x86/include/asm/pvclock.h
@@ -4,6 +4,20 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
+/*
+ * These are perodically updated
+ * xen: magic shared_info page
+ * kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ int tsc_shift;
+ u32 version;
+};
+
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -11,4 +25,8 @@ void pvclock_read_wallclock(struct pvclo
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
+u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow);
+unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+ struct pvclock_vcpu_time_info *src);
+
#endif /* _ASM_X86_PVCLOCK_H */
Index: kvm/arch/x86/kernel/pvclock.c
===================================================================
--- kvm.orig/arch/x86/kernel/pvclock.c
+++ kvm/arch/x86/kernel/pvclock.c
@@ -20,20 +20,6 @@
#include <asm/pvclock.h>
/*
- * These are perodically updated
- * xen: magic shared_info page
- * kvm: gpa registered via msr
- * and then copied here.
- */
-struct pvclock_shadow_time {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
-};
-
-/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
@@ -71,7 +57,7 @@ static inline u64 scale_delta(u64 delta,
return product;
}
-static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
{
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
@@ -81,8 +67,8 @@ static u64 pvclock_get_nsec_offset(struc
* Reads a consistent set of time-base values from hypervisor,
* into a shadow data area.
*/
-static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
- struct pvclock_vcpu_time_info *src)
+unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+ struct pvclock_vcpu_time_info *src)
{
do {
dst->version = src->version;
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 3/5] kvmclock: stolen time aware sched_clock
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
2009-10-16 4:08 ` [patch 1/5] KVM: x86: report stolen time Marcelo Tosatti
2009-10-16 4:08 ` [patch 2/5] pvclock: move code to pvclock.h Marcelo Tosatti
@ 2009-10-16 4:08 ` Marcelo Tosatti
2009-10-16 4:08 ` [patch 4/5] kvmclock: account stolen time Marcelo Tosatti
2009-10-16 4:08 ` [patch 5/5] qemu-kvm-x86: report pvclock runtime capability Marcelo Tosatti
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel, Marcelo Tosatti
[-- Attachment #1: kvm-stolen-time --]
[-- Type: text/plain, Size: 3531 bytes --]
sched_clock() should time the vcpu run time. Subtract stolen time from
realtime pvclock.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kernel/kvmclock.c
===================================================================
--- kvm.orig/arch/x86/kernel/kvmclock.c
+++ kvm/arch/x86/kernel/kvmclock.c
@@ -38,7 +38,16 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+struct time_info {
+ struct pvclock_vcpu_time_info hv_clock;
+ struct kvm_vcpu_runtime_info run_info;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct time_info, time_info);
+
+#define hv_clock time_info.hv_clock
+#define run_info time_info.run_info
+
static struct pvclock_wall_clock wall_clock;
/*
@@ -84,6 +93,40 @@ static cycle_t kvm_clock_get_cycles(stru
return kvm_clock_read();
}
+cycle_t kvm_runtime_read(struct pvclock_vcpu_time_info *src,
+ struct kvm_vcpu_runtime_info *rinfo)
+{
+ struct pvclock_shadow_time shadow;
+ unsigned version;
+ cycle_t ret, offset;
+ unsigned long long stolen;
+
+ do {
+ version = pvclock_get_time_values(&shadow, src);
+ barrier();
+ offset = pvclock_get_nsec_offset(&shadow);
+ stolen = rinfo->stolen_time;
+ ret = shadow.system_timestamp + offset - stolen;
+ barrier();
+ } while (version != src->version);
+
+ return ret;
+}
+
+static cycle_t kvm_clock_read_unstolen(void)
+{
+ struct pvclock_vcpu_time_info *src;
+ struct kvm_vcpu_runtime_info *rinfo;
+ cycle_t ret;
+
+ src = &get_cpu_var(hv_clock);
+ rinfo = &get_cpu_var(run_info);
+ ret = kvm_runtime_read(src, rinfo);
+ put_cpu_var(run_info);
+ put_cpu_var(hv_clock);
+ return ret;
+}
+
/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
@@ -133,14 +176,30 @@ static int kvm_register_clock(char *txt)
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
}
+static int kvm_register_run_info(char *txt)
+{
+ int cpu = smp_processor_id();
+ int low, high;
+
+ low = (int) __pa(&per_cpu(run_info, cpu)) | 1;
+ high = ((u64)__pa(&per_cpu(run_info, cpu)) >> 32);
+ printk(KERN_INFO "kvm-runtime-info: cpu %d, msr %x:%x, %s\n",
+ cpu, high, low, txt);
+ return native_write_msr_safe(MSR_KVM_RUN_TIME, low, high);
+}
+
#ifdef CONFIG_X86_LOCAL_APIC
static void __cpuinit kvm_setup_secondary_clock(void)
{
+ char *txt = "secondary cpu clock";
+
/*
* Now that the first cpu already had this clocksource initialized,
* we shouldn't fail.
*/
- WARN_ON(kvm_register_clock("secondary cpu clock"));
+ WARN_ON(kvm_register_clock(txt));
+ if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO))
+ kvm_register_run_info(txt);
/* ok, done with our trickery, call native */
setup_secondary_APIC_clock();
}
@@ -149,7 +208,11 @@ static void __cpuinit kvm_setup_secondar
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
- WARN_ON(kvm_register_clock("primary cpu clock"));
+ char *txt = "primary cpu clock";
+
+ WARN_ON(kvm_register_clock(txt));
+ if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO))
+ kvm_register_run_info(txt);
native_smp_prepare_boot_cpu();
}
#endif
@@ -204,4 +267,6 @@ void __init kvmclock_init(void)
pv_info.paravirt_enabled = 1;
pv_info.name = "KVM";
}
+ if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO))
+ pv_time_ops.sched_clock = kvm_clock_read_unstolen;
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 4/5] kvmclock: account stolen time
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
` (2 preceding siblings ...)
2009-10-16 4:08 ` [patch 3/5] kvmclock: stolen time aware sched_clock Marcelo Tosatti
@ 2009-10-16 4:08 ` Marcelo Tosatti
2009-10-16 4:08 ` [patch 5/5] qemu-kvm-x86: report pvclock runtime capability Marcelo Tosatti
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel, Marcelo Tosatti
[-- Attachment #1: kvm-accounts-stolen --]
[-- Type: text/plain, Size: 2425 bytes --]
Which makes stolen time information available in procfs/vmstat.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kernel/kvmclock.c
===================================================================
--- kvm.orig/arch/x86/kernel/kvmclock.c
+++ kvm/arch/x86/kernel/kvmclock.c
@@ -22,11 +22,14 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
+#include <asm/cputime.h>
#define KVM_SCALE 22
+#define NS_PER_TICK (1000000000LL / HZ)
static int kvmclock = 1;
@@ -50,6 +53,29 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(str
static struct pvclock_wall_clock wall_clock;
+static DEFINE_PER_CPU(u64, total_stolen);
+static DEFINE_PER_CPU(u64, residual_stolen);
+
+void kvm_account_steal_time(void)
+{
+ struct kvm_vcpu_runtime_info *rinfo;
+ cputime_t ticks;
+ u64 stolen_time, stolen_delta;
+
+ rinfo = &get_cpu_var(run_info);
+ stolen_time = rinfo->stolen_time;
+ stolen_delta = stolen_time - __get_cpu_var(total_stolen);
+
+ __get_cpu_var(total_stolen) = stolen_time;
+ put_cpu_var(rinfo);
+
+ stolen_delta += __get_cpu_var(residual_stolen);
+
+ ticks = iter_div_u64_rem(stolen_delta, NS_PER_TICK, &stolen_delta);
+ __get_cpu_var(residual_stolen) = stolen_delta;
+ account_steal_ticks(ticks);
+}
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
Index: kvm/kernel/sched.c
===================================================================
--- kvm.orig/kernel/sched.c
+++ kvm/kernel/sched.c
@@ -74,6 +74,9 @@
#include <asm/tlb.h>
#include <asm/irq_regs.h>
+#ifdef CONFIG_KVM_CLOCK
+#include <asm/kvm_para.h>
+#endif
#include "sched_cpupri.h"
@@ -5102,6 +5105,9 @@ void account_process_tick(struct task_st
one_jiffy_scaled);
else
account_idle_time(cputime_one_jiffy);
+#ifdef CONFIG_KVM_CLOCK
+ kvm_account_steal_time();
+#endif
}
/*
Index: kvm/arch/x86/include/asm/kvm_para.h
===================================================================
--- kvm.orig/arch/x86/include/asm/kvm_para.h
+++ kvm/arch/x86/include/asm/kvm_para.h
@@ -58,6 +58,7 @@ struct kvm_vcpu_runtime_info {
};
extern void kvmclock_init(void);
+extern void kvm_account_steal_time(void);
/* This instruction is vmcall. On non-VT architectures, it will generate a
^ permalink raw reply [flat|nested] 6+ messages in thread
* [patch 5/5] qemu-kvm-x86: report pvclock runtime capability
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
` (3 preceding siblings ...)
2009-10-16 4:08 ` [patch 4/5] kvmclock: account stolen time Marcelo Tosatti
@ 2009-10-16 4:08 ` Marcelo Tosatti
4 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2009-10-16 4:08 UTC (permalink / raw)
To: kvm; +Cc: riel, Marcelo Tosatti
[-- Attachment #1: pvclock-runtime-qemukvm --]
[-- Type: text/plain, Size: 420 bytes --]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index fffcfd8..0b8a858 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1223,6 +1223,9 @@ struct kvm_para_features {
#ifdef KVM_CAP_CR3_CACHE
{ KVM_CAP_CR3_CACHE, KVM_FEATURE_CR3_CACHE },
#endif
+#ifdef KVM_CAP_PVCLOCK_RUNTIME
+ { KVM_CAP_PVCLOCK_RUNTIME, KVM_FEATURE_RUNTIME_INFO },
+#endif
{ -1, -1 }
};
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-10-16 4:15 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-16 4:08 [patch 0/5] report stolen time via pvclock Marcelo Tosatti
2009-10-16 4:08 ` [patch 1/5] KVM: x86: report stolen time Marcelo Tosatti
2009-10-16 4:08 ` [patch 2/5] pvclock: move code to pvclock.h Marcelo Tosatti
2009-10-16 4:08 ` [patch 3/5] kvmclock: stolen time aware sched_clock Marcelo Tosatti
2009-10-16 4:08 ` [patch 4/5] kvmclock: account stolen time Marcelo Tosatti
2009-10-16 4:08 ` [patch 5/5] qemu-kvm-x86: report pvclock runtime capability Marcelo Tosatti
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).