From: Jiri Olsa <jolsa@redhat.com>
To: Ingo Molnar <mingo@kernel.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Andi Kleen <ak@linux.intel.com>
Cc: lkml <linux-kernel@vger.kernel.org>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>
Subject: [RFC,PATCH] VMWARE faults on accessing disabled counters
Date: Wed, 31 Aug 2016 14:03:58 +0200 [thread overview]
Message-ID: <20160831120358.GB9001@krava> (raw)
hi,
when booting under VMWARE we've got following dmesg lines:
[ 0.051567] perf_event_intel: CPUID marked event: 'cpu cycles' unavailable
[ 0.051567] perf_event_intel: CPUID marked event: 'instructions' unavailable
[ 0.051568] perf_event_intel: CPUID marked event: 'bus cycles' unavailable
[ 0.051568] perf_event_intel: CPUID marked event: 'cache references' unavailable
[ 0.051569] perf_event_intel: CPUID marked event: 'cache misses' unavailable
[ 0.051570] perf_event_intel: CPUID marked event: 'branch instructions' unavailable
[ 0.051570] perf_event_intel: CPUID marked event: 'branch misses' unavailable
that means all the architectural events are disabled by CPUID(0xa)
The kernel code sets intel_perfmon_event_map to prevent
those event to be configured by PERF_TYPE_HARDWARE pmu
type. However they can still be configured by via
PERF_TYPE_RAW type.
We're getting GP fault on VMWARE when reading cycles PMC
configured throgh the PERF_TYPE_RAW interface:
#4 [ffff88007c603e10] do_general_protection at ffffffff8163da9e
#5 [ffff88007c603e40] general_protection at ffffffff8163d3a8
[exception RIP: native_read_pmc+6]
RIP: ffffffff81058d66 RSP: ffff88007c603ef0 RFLAGS: 00010083
RAX: ffffffff81957ee0 RBX: 0000000000000000 RCX: 0000000040000002
RDX: 000000000ff8f719 RSI: ffff88007c617fa8 RDI: 0000000040000002
RBP: ffff88007c603ef0 R8: 00007ffde5053150 R9: 0000000000000000
R10: 00007ffde5052530 R11: 00007fbb22aedc70 R12: ffffffff80000001
R13: ffff880079b74400 R14: ffff880079b74578 R15: 0000000000000010
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000
#6 [ffff88007c603ef8] x86_perf_event_update at ffffffff81029e03
#7 [ffff88007c603f30] x86_pmu_read at ffffffff8102a079
#8 [ffff88007c603f40] __perf_event_read at ffffffff811590de
I couldn't find what real HW rdpmc does on this situation,
so I'm not sure if we actually want to prevent this.. patch
below tries to catch this case.
thanks,
jirka
---
arch/x86/events/core.c | 8 ++++-
arch/x86/events/intel/core.c | 72 ++++++++++++++++++++++++++++++++------------
arch/x86/events/perf_event.h | 6 ++++
3 files changed, 65 insertions(+), 21 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 473519100b11..d836c5922b12 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -534,8 +534,14 @@ int x86_pmu_hw_config(struct perf_event *event)
if (!event->attr.exclude_kernel)
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
- if (event->attr.type == PERF_TYPE_RAW)
+ if (event->attr.type == PERF_TYPE_RAW) {
+ u64 arch_config = event->attr.config & INTEL_ARCH_EVENT_MASK;
+
+ if (x86_pmu_event_disabled(arch_config))
+ return -ENOENT;
+
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+ }
if (event->attr.sample_period && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, event->attr.sample_period) >
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9049d62f34ae..99a83529c7ff 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -23,16 +23,22 @@
/*
* Intel PerfMon, used on Core and later.
*/
-static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
-{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
- [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
+struct intel_perfmon_event {
+ u64 config;
+ bool disabled;
+ u64 replacement;
+};
+
+static struct intel_perfmon_event intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = { .config = 0x003c },
+ [PERF_COUNT_HW_INSTRUCTIONS] = { .config = 0x00c0 },
+ [PERF_COUNT_HW_CACHE_REFERENCES] = { .config = 0x4f2e },
+ [PERF_COUNT_HW_CACHE_MISSES] = { .config = 0x412e },
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { .config = 0x00c4 },
+ [PERF_COUNT_HW_BRANCH_MISSES] = { .config = 0x00c5 },
+ [PERF_COUNT_HW_BUS_CYCLES] = { .config = 0x013c },
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = { .config = 0x0300 }, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -268,7 +274,31 @@ struct event_constraint intel_bdw_event_constraints[] = {
static u64 intel_pmu_event_map(int hw_event)
{
- return intel_perfmon_event_map[hw_event];
+ struct intel_perfmon_event *event = &intel_perfmon_event_map[hw_event];
+
+ if (event->disabled)
+ return event->config;
+ if (event->replacement)
+ return event->replacement;
+
+ return event->config;
+}
+
+static bool intel_pmu_event_disabled(int hw_event)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(intel_perfmon_event_map); i++) {
+ struct intel_perfmon_event *event = &intel_perfmon_event_map[hw_event];
+
+ if (event->config != hw_event)
+ continue;
+
+ if (event->disabled)
+ return true;
+ }
+
+ return false;
}
/*
@@ -3165,6 +3195,7 @@ static __initconst const struct x86_pmu core_pmu = {
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
+ .event_disabled = intel_pmu_event_disabled,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.free_running_flags = PEBS_FREERUNNING_FLAGS,
@@ -3205,6 +3236,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
+ .event_disabled = intel_pmu_event_disabled,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.free_running_flags = PEBS_FREERUNNING_FLAGS,
@@ -3357,7 +3389,7 @@ static __init void intel_arch_events_quirk(void)
/* disable event that reported as not presend by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
- intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+ intel_perfmon_event_map[intel_arch_events_map[bit].id].disabled = true;
pr_warn("CPUID marked event: \'%s\' unavailable\n",
intel_arch_events_map[bit].name);
}
@@ -3375,7 +3407,7 @@ static __init void intel_nehalem_quirk(void)
* branch-misses, but it's still much better than the
* architectural event which is often completely bogus:
*/
- intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+ intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES].replacement = 0x7f89;
ebx.split.no_branch_misses_retired = 0;
x86_pmu.events_maskl = ebx.full;
pr_info("CPU erratum AAJ80 worked around\n");
@@ -3543,10 +3575,10 @@ __init int intel_pmu_init(void)
x86_pmu.cpu_events = nhm_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
@@ -3630,10 +3662,10 @@ __init int intel_pmu_init(void)
x86_pmu.cpu_events = nhm_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
@@ -3667,10 +3699,10 @@ __init int intel_pmu_init(void)
x86_pmu.cpu_events = snb_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND].replacement =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
pr_cont("SandyBridge events, ");
@@ -3704,7 +3736,7 @@ __init int intel_pmu_init(void)
x86_pmu.cpu_events = snb_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND].replacement =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
pr_cont("IvyBridge events, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 01ddfeadaee6..69cca7dc8de4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -514,6 +514,7 @@ struct x86_pmu {
int (*addr_offset)(int index, bool eventsel);
int (*rdpmc_index)(int index);
u64 (*event_map)(int);
+ bool (*event_disabled)(int);
int max_events;
int num_counters;
int num_counters_fixed;
@@ -715,6 +716,11 @@ static inline int x86_pmu_rdpmc_index(int index)
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
+static inline bool x86_pmu_event_disabled(u64 config)
+{
+ return x86_pmu.event_disabled ? x86_pmu.event_disabled(config) : false;
+}
+
int x86_add_exclusive(unsigned int what);
void x86_del_exclusive(unsigned int what);
--
2.7.4
next reply other threads:[~2016-08-31 12:04 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-31 12:03 Jiri Olsa [this message]
2016-08-31 13:11 ` [RFC,PATCH] VMWARE faults on accessing disabled counters Peter Zijlstra
2016-08-31 13:19 ` Jiri Olsa
2016-08-31 13:41 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160831120358.GB9001@krava \
--to=jolsa@redhat.com \
--cc=a.p.zijlstra@chello.nl \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.