From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@elte.hu, linux-kernel@vger.kernel.org
Cc: paulus@samba.org, eranian@google.com, robert.richter@amd.com,
fweisbec@gmail.com, Masami Hiramatsu <mhiramat@redhat.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 10/11] perf, x86: use LBR for PEBS IP+1 fixup
Date: Wed, 03 Mar 2010 17:39:46 +0100 [thread overview]
Message-ID: <20100303164306.602529559@chello.nl> (raw)
In-Reply-To: 20100303163936.906011640@chello.nl
[-- Attachment #1: perf-pebs-lbr.patch --]
[-- Type: text/plain, Size: 6648 bytes --]
PEBS always reports the IP+1, that is the instruction after the one
that got sampled, cure this by using the LBR to reliably rewind the
instruction stream.
CC: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/kernel/cpu/perf_event.c | 70 ++++++++++++-------------
arch/x86/kernel/cpu/perf_event_intel.c | 4 -
arch/x86/kernel/cpu/perf_event_intel_ds.c | 81 +++++++++++++++++++++++++++++-
3 files changed, 116 insertions(+), 39 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -29,6 +29,41 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+ unsigned long offset, addr = (unsigned long)from;
+ int type = in_nmi() ? KM_NMI : KM_IRQ0;
+ unsigned long size, len = 0;
+ struct page *page;
+ void *map;
+ int ret;
+
+ do {
+ ret = __get_user_pages_fast(addr, 1, 0, &page);
+ if (!ret)
+ break;
+
+ offset = addr & (PAGE_SIZE - 1);
+ size = min(PAGE_SIZE - offset, n - len);
+
+ map = kmap_atomic(page, type);
+ memcpy(to, map+offset, size);
+ kunmap_atomic(map, type);
+ put_page(page);
+
+ len += size;
+ to += size;
+ addr += size;
+
+ } while (len < n);
+
+ return len;
+}
+
static u64 perf_event_mask __read_mostly;
struct event_constraint {
@@ -1516,41 +1551,6 @@ perf_callchain_kernel(struct pt_regs *re
dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
}
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
- unsigned long offset, addr = (unsigned long)from;
- int type = in_nmi() ? KM_NMI : KM_IRQ0;
- unsigned long size, len = 0;
- struct page *page;
- void *map;
- int ret;
-
- do {
- ret = __get_user_pages_fast(addr, 1, 0, &page);
- if (!ret)
- break;
-
- offset = addr & (PAGE_SIZE - 1);
- size = min(PAGE_SIZE - offset, n - len);
-
- map = kmap_atomic(page, type);
- memcpy(to, map+offset, size);
- kunmap_atomic(map, type);
- put_page(page);
-
- len += size;
- to += size;
- addr += size;
-
- } while (len < n);
-
- return len;
-}
-
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
unsigned long bytes;
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(stru
x86_pmu_disable_event(event);
if (unlikely(event->attr.precise))
- intel_pmu_pebs_disable(hwc);
+ intel_pmu_pebs_disable(event);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
intel_pmu_lbr_disable(event);
@@ -603,7 +603,7 @@ static void intel_pmu_enable_event(struc
}
if (unlikely(event->attr.precise))
- intel_pmu_pebs_enable(hwc);
+ intel_pmu_pebs_enable(event);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
intel_pmu_lbr_enable(event);
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event
return &emptyconstraint;
}
-static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
u64 val = cpuc->pebs_enabled;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
val |= 1ULL << hwc->idx;
wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+
+ intel_pmu_lbr_enable(event);
}
-static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
u64 val = cpuc->pebs_enabled;
val &= ~(1ULL << hwc->idx);
wrmsrl(MSR_IA32_PEBS_ENABLE, val);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+
+ intel_pmu_lbr_disable(event);
}
static void intel_pmu_pebs_enable_all(void)
@@ -415,6 +421,74 @@ do { \
#endif
+#include <asm/insn.h>
+
+#define MAX_INSN_SIZE 16
+
+static void intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+#if 0
+ /*
+ * Borken, makes the machine expode at times trying to
+ * derefence funny userspace addresses.
+ *
+ * Should we always fwd decode from @to, instead of trying
+ * to rewind as implemented?
+ */
+
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ unsigned long from = cpuc->lbr_entries[0].from;
+ unsigned long to = cpuc->lbr_entries[0].to;
+ unsigned long ip = regs->ip;
+ u8 buf[2*MAX_INSN_SIZE];
+ u8 *kaddr;
+ int i;
+
+ if (from && to) {
+ /*
+ * We sampled a branch insn, rewind using the LBR stack
+ */
+ if (ip == to) {
+ regs->ip = from;
+ return;
+ }
+ }
+
+ if (user_mode(regs)) {
+ int bytes = copy_from_user_nmi(buf,
+ (void __user *)(ip - MAX_INSN_SIZE),
+ 2*MAX_INSN_SIZE);
+
+ /*
+ * If we fail to copy the insn stream, give up
+ */
+ if (bytes != 2*MAX_INSN_SIZE)
+ return;
+
+ kaddr = buf;
+ } else
+ kaddr = (void *)(ip - MAX_INSN_SIZE);
+
+ /*
+ * Try to find the longest insn ending up at the given IP
+ */
+ for (i = MAX_INSN_SIZE; i > 0; i--) {
+ struct insn insn;
+
+ kernel_insn_init(&insn, kaddr + MAX_INSN_SIZE - i);
+ insn_get_length(&insn);
+ if (insn.length == i) {
+ regs->ip -= i;
+ return;
+ }
+ }
+
+ /*
+ * We failed to find a match for the previous insn.. give up
+ */
+#endif
+}
+
static int intel_pmu_save_and_restart(struct perf_event *event);
static void intel_pmu_disable_event(struct perf_event *event);
@@ -458,6 +532,8 @@ static void intel_pmu_drain_pebs_core(st
PEBS_TO_REGS(at, ®s);
+ intel_pmu_pebs_fixup_ip(®s);
+
if (perf_event_overflow(event, 1, data, ®s))
intel_pmu_disable_event(event);
@@ -519,6 +595,7 @@ static void intel_pmu_drain_pebs_nhm(str
data->period = event->hw.last_period;
PEBS_TO_REGS(at, ®s);
+ intel_pmu_pebs_fixup_ip(®s);
if (perf_event_overflow(event, 1, data, ®s))
intel_pmu_disable_event(event);
--
next prev parent reply other threads:[~2010-03-03 16:47 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-03 16:39 [RFC][PATCH 00/11] Another stab at PEBS and LBR support Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 01/11] perf, x86: Remove superfluous arguments to x86_perf_event_set_period() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 02/11] perf, x86: Remove superfluous arguments to x86_perf_event_update() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 03/11] perf, x86: Change x86_pmu.{enable,disable} calling convention Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 04/11] perf, x86: Use unlocked bitops Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 05/11] perf: Generic perf_sample_data initialization Peter Zijlstra
2010-03-03 16:49 ` David Miller
2010-03-03 21:14 ` Frederic Weisbecker
2010-03-05 8:44 ` Jean Pihet
2010-03-03 16:39 ` [RFC][PATCH 06/11] perf, x86: PEBS infrastructure Peter Zijlstra
2010-03-03 17:38 ` Robert Richter
2010-03-03 17:42 ` Peter Zijlstra
2010-03-04 8:50 ` Robert Richter
2010-03-03 16:39 ` [RFC][PATCH 07/11] perf: Provide PERF_SAMPLE_REGS Peter Zijlstra
2010-03-03 17:30 ` Stephane Eranian
2010-03-03 17:39 ` Peter Zijlstra
2010-03-03 17:49 ` Stephane Eranian
2010-03-03 17:55 ` David Miller
2010-03-03 18:18 ` Stephane Eranian
2010-03-03 19:18 ` Peter Zijlstra
2010-03-04 2:59 ` Ingo Molnar
2010-03-04 12:58 ` Arnaldo Carvalho de Melo
2010-03-03 22:02 ` Frederic Weisbecker
2010-03-04 8:58 ` Peter Zijlstra
2010-03-04 11:04 ` Ingo Molnar
2010-03-03 16:39 ` [RFC][PATCH 08/11] perf, x86: Implement simple LBR support Peter Zijlstra
2010-03-03 21:52 ` Stephane Eranian
2010-03-04 8:58 ` Peter Zijlstra
2010-03-03 21:57 ` Stephane Eranian
2010-03-04 8:58 ` Peter Zijlstra
2010-03-04 17:54 ` Stephane Eranian
2010-03-04 18:18 ` Peter Zijlstra
2010-03-04 20:23 ` Peter Zijlstra
2010-03-04 20:57 ` Stephane Eranian
2010-03-03 16:39 ` [RFC][PATCH 09/11] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK Peter Zijlstra
2010-03-03 21:08 ` Frederic Weisbecker
2010-03-03 16:39 ` Peter Zijlstra [this message]
2010-03-03 18:05 ` [RFC][PATCH 10/11] perf, x86: use LBR for PEBS IP+1 fixup Masami Hiramatsu
2010-03-03 19:37 ` Peter Zijlstra
2010-03-03 21:11 ` Masami Hiramatsu
2010-03-03 21:50 ` Stephane Eranian
2010-03-04 8:57 ` Peter Zijlstra
2010-03-09 1:41 ` Stephane Eranian
2010-03-03 16:39 ` [RFC][PATCH 11/11] perf, x86: Clean up IA32_PERF_CAPABILITIES usage Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100303164306.602529559@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=eranian@google.com \
--cc=fweisbec@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@redhat.com \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
--cc=robert.richter@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.