From: David Ahern <dsahern@gmail.com>
To: Ingo Molnar <mingo@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Gleb Natapov <gleb@redhat.com>
Cc: LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: RFC: paravirtualizing perf_clock
Date: Sun, 27 Oct 2013 19:27:27 -0600 [thread overview]
Message-ID: <526DBD7F.1010807@gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 837 bytes --]
Often when debugging performance problems in a virtualized environment
you need to correlate what is happening in the guest with what is
happening in the host. To correlate events you need a common time basis
(or the ability to directly correlate the two).
The attached patch paravirtualizes perf_clock, pulling the timestamps in
VMs from the host using an MSR read if the option is available (exposed
via KVM feature flag). I realize this is not the correct end code but it
illustrates what I would like to see -- host and guests using the same
perf_clock so timestamps directly correlate.
Any suggestions on how to do this and without impacting performance. I
noticed the MSR path seems to take about twice as long as the current
implementation (which I believe results in rdtsc in the VM for x86 with
stable TSC).
David
[-- Attachment #2: 0001-perf-kvm-x86-Paravirtualize-perf_clock-for-x86_64.patch --]
[-- Type: text/plain, Size: 4239 bytes --]
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 94dc8ca434e0..5a023ddf085e 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -24,6 +24,7 @@
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
+#define KVM_FEATURE_PV_PERF_CLOCK 8
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -40,6 +41,7 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
+#define MSR_KVM_PV_PERF_CLOCK 0x4b564d05
struct kvm_steal_time {
__u64 steal;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d8449158cf9..fb7824a64823 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -25,6 +25,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
+#include <linux/kvm_para.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -34,6 +35,7 @@
#include <asm/timer.h>
#include <asm/desc.h>
#include <asm/ldt.h>
+#include <asm/kvm_para.h>
#include "perf_event.h"
@@ -52,6 +54,38 @@ u64 __read_mostly hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+#ifdef CONFIG_PARAVIRT
+
+static int have_pv_perf_clock;
+
+static void __init perf_clock_init(void)
+{
+ if (kvm_para_available() &&
+ kvm_para_has_feature(KVM_FEATURE_PV_PERF_CLOCK)) {
+ have_pv_perf_clock = 1;
+ }
+}
+
+u64 perf_clock(void)
+{
+ if (have_pv_perf_clock)
+ return native_read_msr(MSR_KVM_PV_PERF_CLOCK);
+
+ /* otherwise return local_clock */
+ return local_clock();
+}
+
+#else
+u64 perf_clock(void)
+{
+ return local_clock();
+}
+
+static inline void __init perf_clock_init(void)
+{
+}
+#endif
/*
* Propagate event elapsed time into the generic event.
* Can only be executed on the CPU where the event is active.
@@ -1496,6 +1530,8 @@ static int __init init_hw_perf_events(void)
struct x86_pmu_quirk *quirk;
int err;
+ perf_clock_init();
+
pr_info("Performance Events: ");
switch (boot_cpu_data.x86_vendor) {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..5b258a18f9c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -414,7 +414,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- (1 << KVM_FEATURE_PV_UNHALT);
+ (1 << KVM_FEATURE_PV_UNHALT) |
+ (1 << KVM_FEATURE_PV_PERF_CLOCK);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..61ec1f1c7d38 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2418,6 +2418,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_KVM_PV_EOI_EN:
data = vcpu->arch.pv_eoi.msr_val;
break;
+ case MSR_KVM_PV_PERF_CLOCK:
+ data = perf_clock();
+ break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8ba627c1d60..c8a51954ea9e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -865,4 +865,7 @@ _name##_show(struct device *dev, \
\
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
+#if defined(CONFIG_X86_64)
+u64 perf_clock(void);
+#endif
#endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d49a9d29334c..b073975af05a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -290,10 +290,18 @@ extern __weak const char *perf_pmu_name(void)
return "pmu";
}
+#if defined(CONFIG_X86_64)
+__weak u64 perf_clock(void)
+{
+ return local_clock();
+}
+EXPORT_SYMBOL(perf_clock);
+#else
static inline u64 perf_clock(void)
{
return local_clock();
}
+#endif
static inline struct perf_cpu_context *
__get_cpu_context(struct perf_event_context *ctx)
--
1.7.12.4 (Apple Git-37)
next reply other threads:[~2013-10-28 1:27 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-28 1:27 David Ahern [this message]
2013-10-28 13:00 ` RFC: paravirtualizing perf_clock Gleb Natapov
2013-10-28 13:15 ` Peter Zijlstra
2013-10-29 2:58 ` David Ahern
2013-10-29 13:23 ` Peter Zijlstra
2013-10-30 5:59 ` Masami Hiramatsu
2013-10-30 14:03 ` David Ahern
2013-10-31 8:09 ` Masami Hiramatsu
2013-10-31 16:45 ` David Ahern
2013-10-30 14:20 ` Gleb Natapov
2013-10-30 14:31 ` David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=526DBD7F.1010807@gmail.com \
--to=dsahern@gmail.com \
--cc=gleb@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.