From: "tip-bot for Yan, Zheng" <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: mingo@kernel.org, tglx@linutronix.de,
linux-kernel@vger.kernel.org, kan.liang@intel.com,
vincent.weaver@maine.edu, zheng.z.yan@intel.com,
peterz@infradead.org, hpa@zytor.com, paulus@samba.org,
acme@kernel.org, luto@amacapital.net,
torvalds@linux-foundation.org
Subject: [tip:perf/core] perf/x86/intel: Add basic Haswell LBR call stack support
Date: Wed, 18 Feb 2015 09:14:49 -0800 [thread overview]
Message-ID: <tip-e9d7f7cd97c45e2c612d3b38be05b4cfb27939ee@git.kernel.org> (raw)
In-Reply-To: <1415156173-10035-5-git-send-email-kan.liang@intel.com>
Commit-ID: e9d7f7cd97c45e2c612d3b38be05b4cfb27939ee
Gitweb: http://git.kernel.org/tip/e9d7f7cd97c45e2c612d3b38be05b4cfb27939ee
Author: Yan, Zheng <zheng.z.yan@intel.com>
AuthorDate: Tue, 4 Nov 2014 21:56:00 -0500
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 18 Feb 2015 17:16:04 +0100
perf/x86/intel: Add basic Haswell LBR call stack support
Haswell has a new feature that utilizes the existing LBR facility to
record call chains. To enable this feature, bits (JCC, NEAR_IND_JMP,
NEAR_REL_JMP, FAR_BRANCH, EN_CALLSTACK) in LBR_SELECT must be set to 1,
bits (NEAR_REL_CALL, NEAR-IND_CALL, NEAR_RET) must be cleared. Due to
a hardware bug of Haswell, this feature doesn't work well with
FREEZE_LBRS_ON_PMI.
When the call stack feature is enabled, the LBR stack will capture
unfiltered call data normally, but as return instructions are executed,
the last captured branch record is flushed from the on-chip registers
in a last-in first-out (LIFO) manner. Thus, branch information relative
to leaf functions will not be captured, while preserving the call stack
information of the main line execution path.
This patch defines a separate lbr_sel map for Haswell. The map contains
a new entry for the call stack feature.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: eranian@google.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1415156173-10035-5-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/cpu/perf_event.h | 14 ++++-
arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
arch/x86/kernel/cpu/perf_event_intel_lbr.c | 91 ++++++++++++++++++++++--------
3 files changed, 83 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 949d008..c9a62c5 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -517,7 +517,11 @@ struct x86_pmu {
};
enum {
- PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE,
+
+ PERF_SAMPLE_BRANCH_CALL_STACK =
+ 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
};
#define x86_add_quirk(func_) \
@@ -551,6 +555,12 @@ static struct perf_pmu_events_attr event_attr_##v = { \
extern struct x86_pmu x86_pmu __read_mostly;
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+ return x86_pmu.lbr_sel_map &&
+ x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
int x86_perf_event_set_period(struct perf_event *event);
@@ -754,6 +764,8 @@ void intel_pmu_lbr_init_atom(void);
void intel_pmu_lbr_init_snb(void);
+void intel_pmu_lbr_init_hsw(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
int p4_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 424fbf7..a485ba1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2537,7 +2537,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_snb();
+ intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index c0e23c5..ac8279e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -39,6 +39,7 @@ static enum {
#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
#define LBR_FAR_BIT 8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT 9 /* enable call stack */
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
#define LBR_USER (1 << LBR_USER_BIT)
@@ -49,6 +50,7 @@ static enum {
#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
#define LBR_FAR (1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
#define LBR_PLM (LBR_KERNEL | LBR_USER)
@@ -74,24 +76,25 @@ static enum {
* x86control flow changes include branches, interrupts, traps, faults
*/
enum {
- X86_BR_NONE = 0, /* unknown */
-
- X86_BR_USER = 1 << 0, /* branch target is user */
- X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
-
- X86_BR_CALL = 1 << 2, /* call */
- X86_BR_RET = 1 << 3, /* return */
- X86_BR_SYSCALL = 1 << 4, /* syscall */
- X86_BR_SYSRET = 1 << 5, /* syscall return */
- X86_BR_INT = 1 << 6, /* sw interrupt */
- X86_BR_IRET = 1 << 7, /* return from interrupt */
- X86_BR_JCC = 1 << 8, /* conditional */
- X86_BR_JMP = 1 << 9, /* jump */
- X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
- X86_BR_IND_CALL = 1 << 11,/* indirect calls */
- X86_BR_ABORT = 1 << 12,/* transaction abort */
- X86_BR_IN_TX = 1 << 13,/* in transaction */
- X86_BR_NO_TX = 1 << 14,/* not in transaction */
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
+ X86_BR_CALL_STACK = 1 << 15,/* call stack */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -373,7 +376,7 @@ void intel_pmu_lbr_read(void)
* - in case there is no HW filter
* - in case the HW filter has errata or limitations
*/
-static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
{
u64 br_type = event->attr.branch_sample_type;
int mask = 0;
@@ -410,11 +413,21 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_COND)
mask |= X86_BR_JCC;
+ if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+ if (!x86_pmu_has_lbr_callstack())
+ return -EOPNOTSUPP;
+ if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+ return -EINVAL;
+ mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+ X86_BR_CALL_STACK;
+ }
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
*/
event->hw.branch_reg.reg = mask;
+ return 0;
}
/*
@@ -443,8 +456,12 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
reg = &event->hw.branch_reg;
reg->idx = EXTRA_REG_LBR;
- /* LBR_SELECT operates in suppress mode so invert mask */
- reg->config = ~mask & x86_pmu.lbr_sel_mask;
+ /*
+ * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+ * in suppress mode. So LBR_SELECT should be set to
+ * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+ */
+ reg->config = mask ^ x86_pmu.lbr_sel_mask;
return 0;
}
@@ -462,7 +479,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
/*
* setup SW LBR filter
*/
- intel_pmu_setup_sw_lbr_filter(event);
+ ret = intel_pmu_setup_sw_lbr_filter(event);
+ if (ret)
+ return ret;
/*
* setup HW LBR filter, if any
@@ -732,6 +751,20 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
};
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+ [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
+ [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_RETURN | LBR_CALL_STACK,
+};
+
/* core */
void __init intel_pmu_lbr_init_core(void)
{
@@ -788,6 +821,20 @@ void __init intel_pmu_lbr_init_snb(void)
pr_cont("16-deep LBR, ");
}
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
+
+ pr_cont("16-deep LBR, ");
+}
+
/* atom */
void __init intel_pmu_lbr_init_atom(void)
{
next prev parent reply other threads:[~2015-02-18 17:15 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-05 2:55 [PATCH V7 00/17] perf, x86: Haswell LBR call stack support Kan Liang
2014-11-05 2:55 ` [PATCH V7 01/17] perf, x86: Reduce lbr_sel_map size Kan Liang
2015-02-18 17:13 ` [tip:perf/core] perf/x86/intel: Reduce lbr_sel_map[] size tip-bot for Yan, Zheng
2014-11-05 2:55 ` [PATCH V7 02/17] perf, core: introduce pmu context switch callback Kan Liang
2015-02-18 17:14 ` [tip:perf/core] perf: Introduce " tip-bot for Yan, Zheng
2014-11-05 2:55 ` [PATCH V7 03/17] perf, x86: use context switch callback to flush LBR stack Kan Liang
2015-02-18 17:14 ` [tip:perf/core] perf/x86/intel: Use " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 04/17] perf, x86: Basic Haswell LBR call stack support Kan Liang
2015-02-18 17:14 ` tip-bot for Yan, Zheng [this message]
2014-11-05 2:56 ` [PATCH V7 05/17] perf, core: pmu specific data for perf task context Kan Liang
2015-02-18 17:15 ` [tip:perf/core] perf: Add " tip-bot for Yan, Zheng
2015-12-09 8:34 ` Peter Zijlstra
2015-12-09 14:59 ` Liang, Kan
2015-12-09 15:14 ` Peter Zijlstra
2015-12-09 15:25 ` Liang, Kan
2014-11-05 2:56 ` [PATCH V7 06/17] perf, core: always switch pmu specific data during context switch Kan Liang
2015-02-18 17:15 ` [tip:perf/core] perf: Always " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 07/17] perf, x86: allocate space for storing LBR stack Kan Liang
2015-02-18 17:15 ` [tip:perf/core] perf/x86/intel: Allocate " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 08/17] perf, x86: track number of events that use LBR callstack Kan Liang
2015-02-18 17:15 ` [tip:perf/core] perf/x86/intel: Track number of events that use the " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 09/17] perf, x86: Save/resotre LBR stack during context switch Kan Liang
2015-02-18 17:16 ` [tip:perf/core] perf/x86/intel: Save/ restore " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 10/17] perf, core: simplify need branch stack check Kan Liang
2015-02-18 17:16 ` [tip:perf/core] perf: Simplify the " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 11/17] perf, core: expose LBR call stack to user perf tool Kan Liang
2014-11-05 9:20 ` Peter Zijlstra
2014-11-05 2:56 ` [PATCH V7 12/17] perf, x86: re-organize code that implicitly enables LBR/PEBS Kan Liang
2015-02-18 17:16 ` [tip:perf/core] perf/x86/intel: Re-organize " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 13/17] perf, x86: enable LBR callstack when recording callchain Kan Liang
2014-11-05 9:21 ` Peter Zijlstra
2014-11-05 9:58 ` Stephane Eranian
2014-11-05 10:43 ` Peter Zijlstra
2014-11-05 10:57 ` Stephane Eranian
2014-11-05 12:49 ` Peter Zijlstra
2014-11-05 13:22 ` Stephane Eranian
2014-11-05 15:45 ` Peter Zijlstra
2014-11-05 15:53 ` Liang, Kan
2014-11-05 16:29 ` Peter Zijlstra
2014-11-05 17:52 ` Andi Kleen
2014-11-05 17:57 ` Andi Kleen
2014-11-05 17:40 ` Andi Kleen
2014-11-05 2:56 ` [PATCH V7 14/17] perf, x86: disable FREEZE_LBRS_ON_PMI when LBR operates in callstack mode Kan Liang
2015-02-18 17:17 ` [tip:perf/core] perf/x86/intel: Disable " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 15/17] perf, x86: Discard zero length call entries in LBR call stack Kan Liang
2015-02-18 17:17 ` [tip:perf/core] perf/x86/intel: " tip-bot for Yan, Zheng
2014-11-05 2:56 ` [PATCH V7 16/17] perf tools: handle LBR call stack data Kan Liang
2014-11-05 2:56 ` [PATCH V7 17/17] perf tools: choose to dump callchain from LBR and FP Kan Liang
2014-11-05 9:37 ` [PATCH V7 00/17] perf, x86: Haswell LBR call stack support Peter Zijlstra
2014-11-05 16:22 ` Liang, Kan
2014-11-05 16:27 ` Peter Zijlstra
2014-11-05 17:02 ` Liang, Kan
2015-02-18 17:17 ` [tip:perf/core] perf/x86/intel: Expose LBR callstack to user space tooling tip-bot for Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-e9d7f7cd97c45e2c612d3b38be05b4cfb27939ee@git.kernel.org \
--to=tipbot@zytor.com \
--cc=acme@kernel.org \
--cc=hpa@zytor.com \
--cc=kan.liang@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=mingo@kernel.org \
--cc=paulus@samba.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=vincent.weaver@maine.edu \
--cc=zheng.z.yan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox