* [PATCH 1/2] perf, x86: Revamp PEBS event selection
@ 2014-07-12 0:01 Andi Kleen
2014-07-12 0:01 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
2014-07-14 22:10 ` [PATCH 1/2] perf, x86: Revamp PEBS event selection Stephane Eranian
0 siblings, 2 replies; 17+ messages in thread
From: Andi Kleen @ 2014-07-12 0:01 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, eranian, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.
This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.
Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)
I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.
I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.
Cc: eranian@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
v3: More fixes. Rename some arguments (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/perf_event.h | 8 +++
arch/x86/kernel/cpu/perf_event.h | 18 +++++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++++++------------------------
3 files changed, 39 insertions(+), 75 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
ARCH_PERFMON_EVENTSEL_EDGE | \
ARCH_PERFMON_EVENTSEL_INV | \
ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS \
+ (ARCH_PERFMON_EVENTSEL_EDGE | \
+ ARCH_PERFMON_EVENTSEL_INV | \
+ ARCH_PERFMON_EVENTSEL_CMASK | \
+ ARCH_PERFMON_EVENTSEL_ANY | \
+ ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
+ HSW_IN_TX | \
+ HSW_IN_TX_CHECKPOINTED)
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..8f32af0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
#define INTEL_PLD_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
+ EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
+ __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..64b4be9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
- INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
};
@@ -624,68 +606,34 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
- INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
- INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
- /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
- /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
- INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
- /* MEM_UOPS_RETIRED.SPLIT_STORES */
- INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
- INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
- INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
- /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
- INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
- INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
- INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
- INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
-
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ /* We allow DATALA for all PEBS events, will be 0 if not supported */
+ INTEL_ALL_EVENT_CONSTRAINT_DATALA(0, 0xf),
EVENT_CONSTRAINT_END
};
--
1.9.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-12 0:01 [PATCH 1/2] perf, x86: Revamp PEBS event selection Andi Kleen
@ 2014-07-12 0:01 ` Andi Kleen
2014-07-14 18:04 ` Stephane Eranian
2014-07-14 22:10 ` [PATCH 1/2] perf, x86: Revamp PEBS event selection Stephane Eranian
1 sibling, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-12 0:01 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, eranian, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Haswell supports reporting the data address for a range
of PEBS events, including:
UOPS_RETIRED.ALL
MEM_UOPS_RETIRED.STLB_MISS_LOADS
MEM_UOPS_RETIRED.STLB_MISS_STORES
MEM_UOPS_RETIRED.LOCK_LOADS
MEM_UOPS_RETIRED.SPLIT_LOADS
MEM_UOPS_RETIRED.SPLIT_STORES
MEM_UOPS_RETIRED.ALL_LOADS
MEM_UOPS_RETIRED.ALL_STORES
MEM_LOAD_UOPS_RETIRED.L1_HIT
MEM_LOAD_UOPS_RETIRED.L2_HIT
MEM_LOAD_UOPS_RETIRED.L3_HIT
MEM_LOAD_UOPS_RETIRED.L1_MISS
MEM_LOAD_UOPS_RETIRED.L2_MISS
MEM_LOAD_UOPS_RETIRED.L3_MISS
MEM_LOAD_UOPS_RETIRED.HIT_LFB
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
This facility was already enabled earlier with the original Haswell
perf changes.
However these addresses were always reports as stores by perf, which is wrong,
as they could be loads too. The hardware does not distinguish loads and stores
for these instructions, so there's no (cheap) way for the profiler
to find out.
Change the type to PERF_MEM_OP_NA instead.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 64b4be9..13baa7c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -114,7 +114,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
- dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_op = PERF_MEM_OP_NA;
dse.mem_lvl = PERF_MEM_LVL_NA;
/*
--
1.9.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-12 0:01 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
@ 2014-07-14 18:04 ` Stephane Eranian
2014-07-14 19:24 ` Andi Kleen
0 siblings, 1 reply; 17+ messages in thread
From: Stephane Eranian @ 2014-07-14 18:04 UTC (permalink / raw)
To: Andi Kleen; +Cc: Peter Zijlstra, LKML, Andi Kleen
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Haswell supports reporting the data address for a range
> of PEBS events, including:
>
> UOPS_RETIRED.ALL
> MEM_UOPS_RETIRED.STLB_MISS_LOADS
> MEM_UOPS_RETIRED.STLB_MISS_STORES
> MEM_UOPS_RETIRED.LOCK_LOADS
> MEM_UOPS_RETIRED.SPLIT_LOADS
> MEM_UOPS_RETIRED.SPLIT_STORES
> MEM_UOPS_RETIRED.ALL_LOADS
> MEM_UOPS_RETIRED.ALL_STORES
> MEM_LOAD_UOPS_RETIRED.L1_HIT
> MEM_LOAD_UOPS_RETIRED.L2_HIT
> MEM_LOAD_UOPS_RETIRED.L3_HIT
> MEM_LOAD_UOPS_RETIRED.L1_MISS
> MEM_LOAD_UOPS_RETIRED.L2_MISS
> MEM_LOAD_UOPS_RETIRED.L3_MISS
> MEM_LOAD_UOPS_RETIRED.HIT_LFB
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
> MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
>
> This facility was already enabled earlier with the original Haswell
> perf changes.
>
> However these addresses were always reports as stores by perf, which is wrong,
> as they could be loads too. The hardware does not distinguish loads and stores
> for these instructions, so there's no (cheap) way for the profiler
> to find out.
>
> Change the type to PERF_MEM_OP_NA instead.
>
You could do better if you tagged the event during setup as load vs. store.
And then you could simply propagate the flag to the data source struct.
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 64b4be9..13baa7c 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -114,7 +114,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
> u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
>
> dse.val = 0;
> - dse.mem_op = PERF_MEM_OP_STORE;
> + dse.mem_op = PERF_MEM_OP_NA;
> dse.mem_lvl = PERF_MEM_LVL_NA;
>
> /*
> --
> 1.9.3
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 18:04 ` Stephane Eranian
@ 2014-07-14 19:24 ` Andi Kleen
2014-07-14 22:08 ` Stephane Eranian
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-14 19:24 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, Peter Zijlstra, LKML
> You could do better if you tagged the event during setup as load vs. store.
> And then you could simply propagate the flag to the data source struct.
This would require listing all PEBS events in the table again.
The whole point of the other patch was to get rid of that.
Besides it wouldn't work for a range of events (like UOPS_RETIRED.ALL)
-Andi
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 19:24 ` Andi Kleen
@ 2014-07-14 22:08 ` Stephane Eranian
2014-07-14 22:39 ` Andi Kleen
0 siblings, 1 reply; 17+ messages in thread
From: Stephane Eranian @ 2014-07-14 22:08 UTC (permalink / raw)
To: Andi Kleen; +Cc: Andi Kleen, Peter Zijlstra, LKML
Andi,
On Mon, Jul 14, 2014 at 9:24 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> You could do better if you tagged the event during setup as load vs. store.
>> And then you could simply propagate the flag to the data source struct.
>
> This would require listing all PEBS events in the table again.
> The whole point of the other patch was to get rid of that.
>
> Besides it wouldn't work for a range of events (like UOPS_RETIRED.ALL)
>
I have a problem with this patch.
It makes: perf mem -t store rec record OP_NA for the store.
It was recording OP_STORE before.
I think we need to keep LD/ST info. This is useful for analysis
especially if we collect loads/stores simultaneously.
Was working before for the mem-loads, mem-stores events.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 22:08 ` Stephane Eranian
@ 2014-07-14 22:39 ` Andi Kleen
2014-07-14 22:49 ` Stephane Eranian
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-14 22:39 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, Peter Zijlstra, LKML
> I have a problem with this patch.
>
> It makes: perf mem -t store rec record OP_NA for the store.
> It was recording OP_STORE before.
>
> I think we need to keep LD/ST info. This is useful for analysis
> especially if we collect loads/stores simultaneously.
>
> Was working before for the mem-loads, mem-stores events.
Ok. Would it be enough if it only worked for "mem-stores" and not
all PEBS events?
Otherwise have to go back to an even larger PEBS constraint
table for HSW than before.
-Andi
--
ak@linux.intel.com -- Speaking for myself only
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 22:39 ` Andi Kleen
@ 2014-07-14 22:49 ` Stephane Eranian
2014-07-14 22:50 ` Stephane Eranian
2014-07-19 0:49 ` Andi Kleen
0 siblings, 2 replies; 17+ messages in thread
From: Stephane Eranian @ 2014-07-14 22:49 UTC (permalink / raw)
To: Andi Kleen; +Cc: Andi Kleen, Peter Zijlstra, LKML
On Tue, Jul 15, 2014 at 12:39 AM, Andi Kleen <ak@linux.intel.com> wrote:
>> I have a problem with this patch.
>>
>> It makes: perf mem -t store rec record OP_NA for the store.
>> It was recording OP_STORE before.
>>
>> I think we need to keep LD/ST info. This is useful for analysis
>> especially if we collect loads/stores simultaneously.
>>
>> Was working before for the mem-loads, mem-stores events.
>
> Ok. Would it be enough if it only worked for "mem-stores" and not
> all PEBS events?
>
Ok, do that at a minimum.
> Otherwise have to go back to an even larger PEBS constraint
> table for HSW than before.
>
> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 22:49 ` Stephane Eranian
@ 2014-07-14 22:50 ` Stephane Eranian
2014-07-15 4:05 ` Andi Kleen
2014-07-19 0:49 ` Andi Kleen
1 sibling, 1 reply; 17+ messages in thread
From: Stephane Eranian @ 2014-07-14 22:50 UTC (permalink / raw)
To: Andi Kleen; +Cc: Andi Kleen, Peter Zijlstra, LKML
On Tue, Jul 15, 2014 at 12:49 AM, Stephane Eranian <eranian@google.com> wrote:
> On Tue, Jul 15, 2014 at 12:39 AM, Andi Kleen <ak@linux.intel.com> wrote:
>>> I have a problem with this patch.
>>>
>>> It makes: perf mem -t store rec record OP_NA for the store.
>>> It was recording OP_STORE before.
>>>
>>> I think we need to keep LD/ST info. This is useful for analysis
>>> especially if we collect loads/stores simultaneously.
>>>
>>> Was working before for the mem-loads, mem-stores events.
>>
>> Ok. Would it be enough if it only worked for "mem-stores" and not
>> all PEBS events?
>>
> Ok, do that at a minimum.
>
But if I recall the PEBS stores events were not that many to begin with.
>> Otherwise have to go back to an even larger PEBS constraint
>> table for HSW than before.
>>
>> -Andi
>>
>> --
>> ak@linux.intel.com -- Speaking for myself only
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 22:50 ` Stephane Eranian
@ 2014-07-15 4:05 ` Andi Kleen
2014-07-15 8:59 ` Peter Zijlstra
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-15 4:05 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, Peter Zijlstra, LKML
On Tue, Jul 15, 2014 at 12:50:27AM +0200, Stephane Eranian wrote:
> On Tue, Jul 15, 2014 at 12:49 AM, Stephane Eranian <eranian@google.com> wrote:
> > On Tue, Jul 15, 2014 at 12:39 AM, Andi Kleen <ak@linux.intel.com> wrote:
> >>> I have a problem with this patch.
> >>>
> >>> It makes: perf mem -t store rec record OP_NA for the store.
> >>> It was recording OP_STORE before.
> >>>
> >>> I think we need to keep LD/ST info. This is useful for analysis
> >>> especially if we collect loads/stores simultaneously.
> >>>
> >>> Was working before for the mem-loads, mem-stores events.
> >>
> >> Ok. Would it be enough if it only worked for "mem-stores" and not
> >> all PEBS events?
> >>
> > Ok, do that at a minimum.
> >
> But if I recall the PEBS stores events were not that many to begin with.
Yes, there are only three store events:
MEM_UOPS_RETIRED.STLB_MISS_STORES
MEM_UOPS_RETIRED.SPLIT_STORES
MEM_UOPS_RETIRED.ALL_STORES
These can be added.
But most others are loads, so if you wanted loads too (besides mem-loads)
it would be nearly a full list.
-Andi
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-15 4:05 ` Andi Kleen
@ 2014-07-15 8:59 ` Peter Zijlstra
2014-07-15 18:19 ` Stephane Eranian
0 siblings, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2014-07-15 8:59 UTC (permalink / raw)
To: Andi Kleen; +Cc: Stephane Eranian, Andi Kleen, LKML
[-- Attachment #1: Type: text/plain, Size: 1232 bytes --]
On Mon, Jul 14, 2014 at 09:05:42PM -0700, Andi Kleen wrote:
> Yes, there are only three store events:
>
> MEM_UOPS_RETIRED.STLB_MISS_STORES
> MEM_UOPS_RETIRED.SPLIT_STORES
> MEM_UOPS_RETIRED.ALL_STORES
>
> These can be added.
>
> But most others are loads, so if you wanted loads too (besides mem-loads)
> it would be nearly a full list.
Of that list you had earlier:
MEM_UOPS_RETIRED.STLB_MISS_LOADS
MEM_UOPS_RETIRED.STLB_MISS_STORES
MEM_UOPS_RETIRED.LOCK_LOADS
MEM_UOPS_RETIRED.SPLIT_LOADS
MEM_UOPS_RETIRED.SPLIT_STORES
MEM_UOPS_RETIRED.ALL_LOADS
MEM_UOPS_RETIRED.ALL_STORES
There's only 4 loads and (as you already said) 3 stores.
That's 7 events total, that's not nearly a full list.
The other events:
UOPS_RETIRED.ALL
MEM_LOAD_UOPS_RETIRED.L1_HIT
MEM_LOAD_UOPS_RETIRED.L2_HIT
MEM_LOAD_UOPS_RETIRED.L3_HIT
MEM_LOAD_UOPS_RETIRED.L1_MISS
MEM_LOAD_UOPS_RETIRED.L2_MISS
MEM_LOAD_UOPS_RETIRED.L3_MISS
MEM_LOAD_UOPS_RETIRED.HIT_LFB
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
are unclear on their type and should indeed be NA.
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-15 8:59 ` Peter Zijlstra
@ 2014-07-15 18:19 ` Stephane Eranian
2014-07-17 20:24 ` Stephane Eranian
0 siblings, 1 reply; 17+ messages in thread
From: Stephane Eranian @ 2014-07-15 18:19 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Andi Kleen, Andi Kleen, LKML
On Tue, Jul 15, 2014 at 10:59 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Jul 14, 2014 at 09:05:42PM -0700, Andi Kleen wrote:
>
>> Yes, there are only three store events:
>>
>> MEM_UOPS_RETIRED.STLB_MISS_STORES
>> MEM_UOPS_RETIRED.SPLIT_STORES
>> MEM_UOPS_RETIRED.ALL_STORES
>>
>> These can be added.
>>
>> But most others are loads, so if you wanted loads too (besides mem-loads)
>> it would be nearly a full list.
>
> Of that list you had earlier:
>
> MEM_UOPS_RETIRED.STLB_MISS_LOADS
> MEM_UOPS_RETIRED.STLB_MISS_STORES
> MEM_UOPS_RETIRED.LOCK_LOADS
> MEM_UOPS_RETIRED.SPLIT_LOADS
> MEM_UOPS_RETIRED.SPLIT_STORES
> MEM_UOPS_RETIRED.ALL_LOADS
> MEM_UOPS_RETIRED.ALL_STORES
>
> There's only 4 loads and (as you already said) 3 stores.
>
> That's 7 events total, that's not nearly a full list.
>
> The other events:
>
> UOPS_RETIRED.ALL
> MEM_LOAD_UOPS_RETIRED.L1_HIT
> MEM_LOAD_UOPS_RETIRED.L2_HIT
> MEM_LOAD_UOPS_RETIRED.L3_HIT
> MEM_LOAD_UOPS_RETIRED.L1_MISS
> MEM_LOAD_UOPS_RETIRED.L2_MISS
> MEM_LOAD_UOPS_RETIRED.L3_MISS
> MEM_LOAD_UOPS_RETIRED.HIT_LFB
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
> MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
>
Those are loads uops.
> are unclear on their type and should indeed be NA.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-15 18:19 ` Stephane Eranian
@ 2014-07-17 20:24 ` Stephane Eranian
0 siblings, 0 replies; 17+ messages in thread
From: Stephane Eranian @ 2014-07-17 20:24 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Andi Kleen, Andi Kleen, LKML
On Tue, Jul 15, 2014 at 8:19 PM, Stephane Eranian <eranian@google.com> wrote:
> On Tue, Jul 15, 2014 at 10:59 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>> On Mon, Jul 14, 2014 at 09:05:42PM -0700, Andi Kleen wrote:
>>
>>> Yes, there are only three store events:
>>>
>>> MEM_UOPS_RETIRED.STLB_MISS_STORES
>>> MEM_UOPS_RETIRED.SPLIT_STORES
>>> MEM_UOPS_RETIRED.ALL_STORES
>>>
>>> These can be added.
>>>
>>> But most others are loads, so if you wanted loads too (besides mem-loads)
>>> it would be nearly a full list.
>>
>> Of that list you had earlier:
>>
>> MEM_UOPS_RETIRED.STLB_MISS_LOADS
>> MEM_UOPS_RETIRED.STLB_MISS_STORES
>> MEM_UOPS_RETIRED.LOCK_LOADS
>> MEM_UOPS_RETIRED.SPLIT_LOADS
>> MEM_UOPS_RETIRED.SPLIT_STORES
>> MEM_UOPS_RETIRED.ALL_LOADS
>> MEM_UOPS_RETIRED.ALL_STORES
>>
>> There's only 4 loads and (as you already said) 3 stores.
>>
>> That's 7 events total, that's not nearly a full list.
>>
>> The other events:
>>
>> UOPS_RETIRED.ALL
>> MEM_LOAD_UOPS_RETIRED.L1_HIT
>> MEM_LOAD_UOPS_RETIRED.L2_HIT
>> MEM_LOAD_UOPS_RETIRED.L3_HIT
>> MEM_LOAD_UOPS_RETIRED.L1_MISS
>> MEM_LOAD_UOPS_RETIRED.L2_MISS
>> MEM_LOAD_UOPS_RETIRED.L3_MISS
>> MEM_LOAD_UOPS_RETIRED.HIT_LFB
>> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
>> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
>> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
>> MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
>> MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
>>
> Those are loads uops.
>
I suggest we add those back as loads. We cannot really loose
precision in the info returned.
>> are unclear on their type and should indeed be NA.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-14 22:49 ` Stephane Eranian
2014-07-14 22:50 ` Stephane Eranian
@ 2014-07-19 0:49 ` Andi Kleen
2014-07-21 21:15 ` Stephane Eranian
1 sibling, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-19 0:49 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Andi Kleen, Andi Kleen, Peter Zijlstra, LKML
On Tue, Jul 15, 2014 at 12:49:43AM +0200, Stephane Eranian wrote:
> On Tue, Jul 15, 2014 at 12:39 AM, Andi Kleen <ak@linux.intel.com> wrote:
> >> I have a problem with this patch.
> >>
> >> It makes: perf mem -t store rec record OP_NA for the store.
> >> It was recording OP_STORE before.
> >>
> >> I think we need to keep LD/ST info. This is useful for analysis
> >> especially if we collect loads/stores simultaneously.
> >>
> >> Was working before for the mem-loads, mem-stores events.
> >
> > Ok. Would it be enough if it only worked for "mem-stores" and not
> > all PEBS events?
> >
> Ok, do that at a minimum.
I fixed it now. However it turned out that "perf mem report"
actually not report mem_op, only mem_lvl.
You may want to fix that separately.
-Andi
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-19 0:49 ` Andi Kleen
@ 2014-07-21 21:15 ` Stephane Eranian
0 siblings, 0 replies; 17+ messages in thread
From: Stephane Eranian @ 2014-07-21 21:15 UTC (permalink / raw)
To: Andi Kleen; +Cc: Andi Kleen, Peter Zijlstra, LKML
On Sat, Jul 19, 2014 at 2:49 AM, Andi Kleen <andi@firstfloor.org> wrote:
> On Tue, Jul 15, 2014 at 12:49:43AM +0200, Stephane Eranian wrote:
>> On Tue, Jul 15, 2014 at 12:39 AM, Andi Kleen <ak@linux.intel.com> wrote:
>> >> I have a problem with this patch.
>> >>
>> >> It makes: perf mem -t store rec record OP_NA for the store.
>> >> It was recording OP_STORE before.
>> >>
>> >> I think we need to keep LD/ST info. This is useful for analysis
>> >> especially if we collect loads/stores simultaneously.
>> >>
>> >> Was working before for the mem-loads, mem-stores events.
>> >
>> > Ok. Would it be enough if it only worked for "mem-stores" and not
>> > all PEBS events?
>> >
>> Ok, do that at a minimum.
>
> I fixed it now. However it turned out that "perf mem report"
> actually not report mem_op, only mem_lvl.
>
> You may want to fix that separately.
>
It is because it currently record loads or store but not both simultaneously.
Once I allow load/store in a single run, it will print the mem_op.
> -Andi
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
2014-07-12 0:01 [PATCH 1/2] perf, x86: Revamp PEBS event selection Andi Kleen
2014-07-12 0:01 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
@ 2014-07-14 22:10 ` Stephane Eranian
1 sibling, 0 replies; 17+ messages in thread
From: Stephane Eranian @ 2014-07-14 22:10 UTC (permalink / raw)
To: Andi Kleen; +Cc: Peter Zijlstra, LKML, Andi Kleen
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> The basic idea is that it does not make sense to list all PEBS
> events individually. The list is very long, sometimes outdated
> and the hardware doesn't need it. If an event does not support
> PEBS it will just not count, there is no security issue.
>
> This vastly simplifies the PEBS event selection. It also
> speeds up the scheduling because the scheduler doesn't
> have to walk as many constraints.
>
> Bugs fixed:
> - We do not allow setting forbidden flags with PEBS anymore
> (SDM 18.9.4), except for the special cycle event.
> This is done using a new constraint macro that also
> matches on the event flags.
> - We now allow DataLA on all Haswell events, not just
> a small subset. In general all PEBS events that tag memory
> accesses support DataLA on Haswell. Otherwise the reported
> address is just zero. This allows address profiling
> on vastly more events.
> - We did not allow all PEBS events on Haswell:
> We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
> MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
>
> This includes the changes proposed by Stephane earlier and obsoletes
> his patchkit (except for some changes on pre Sandy Bridge/Silvermont
> CPUs)
>
> I only did Sandy Bridge and Silvermont and later so far, mostly because these
> are the parts I could directly confirm the hardware behavior with hardware
> architects. Also I do not believe the older CPUs have any
> missing events in their PEBS list, so there's no pressing
> need to change them.
>
> I did not implement the flag proposed by Peter to allow
> setting forbidden flags. If really needed this could
> be implemented on to of this patch.
>
> Cc: eranian@google.com
> v2: Fix broken store events on SNB/IVB (Stephane Eranian)
> v3: More fixes. Rename some arguments (Stephane Eranian)
> Update description.
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
Works now for me on SNB/HSW.
Reviewed-by: Stephane Eranian <eranian@google.com>
> ---
> arch/x86/include/asm/perf_event.h | 8 +++
> arch/x86/kernel/cpu/perf_event.h | 18 +++++--
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++++++------------------------
> 3 files changed, 39 insertions(+), 75 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8249df4..8dfc9fd 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -51,6 +51,14 @@
> ARCH_PERFMON_EVENTSEL_EDGE | \
> ARCH_PERFMON_EVENTSEL_INV | \
> ARCH_PERFMON_EVENTSEL_CMASK)
> +#define X86_ALL_EVENT_FLAGS \
> + (ARCH_PERFMON_EVENTSEL_EDGE | \
> + ARCH_PERFMON_EVENTSEL_INV | \
> + ARCH_PERFMON_EVENTSEL_CMASK | \
> + ARCH_PERFMON_EVENTSEL_ANY | \
> + ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
> + HSW_IN_TX | \
> + HSW_IN_TX_CHECKPOINTED)
> #define AMD64_RAW_EVENT_MASK \
> (X86_RAW_EVENT_MASK | \
> AMD64_EVENTSEL_EVENT)
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index a22a34e9..8f32af0 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -262,16 +262,24 @@ struct cpu_hw_events {
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> #define INTEL_PLD_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
>
> #define INTEL_PST_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
>
> -/* DataLA version of store sampling without extra enable bit. */
> -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +/* Event constraint, but match on all event flags too. */
> +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
> +
> +/* Check only flags, but allow all event/umask */
> +#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
> + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
> +
> +/* Same as above, but enable DataLA */
> +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
> + __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
>
> /*
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..64b4be9 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
> };
>
> struct event_constraint intel_slm_pebs_event_constraints[] = {
> - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
> - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
> - INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
> - INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
> EVENT_CONSTRAINT_END
> };
>
> @@ -624,68 +606,34 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
>
> struct event_constraint intel_snb_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
> INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
> - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
> - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
> EVENT_CONSTRAINT_END
> };
>
> struct event_constraint intel_ivb_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
> INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
> - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
> EVENT_CONSTRAINT_END
> };
>
> struct event_constraint intel_hsw_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> - INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> - INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
> - INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
> - /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
> - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
> - /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
> - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
> - /* MEM_UOPS_RETIRED.SPLIT_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
> - INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
> - /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
> - INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
> - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
> - INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
> - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
> - /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
> - INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
> - INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
> -
> + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + /* We allow DATALA for all PEBS events, will be 0 if not supported */
> + INTEL_ALL_EVENT_CONSTRAINT_DATALA(0, 0xf),
> EVENT_CONSTRAINT_END
> };
>
> --
> 1.9.3
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1/2] perf, x86: Revamp PEBS event selection v2
@ 2014-07-07 22:37 Andi Kleen
2014-07-07 22:37 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-07-07 22:37 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, Andi Kleen, eranian
From: Andi Kleen <ak@linux.intel.com>
The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.
This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.
Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)
I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.
I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.
Cc: eranian@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/perf_event.h | 8 +++
arch/x86/kernel/cpu/perf_event.h | 18 +++++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++++++------------------------
3 files changed, 39 insertions(+), 75 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
ARCH_PERFMON_EVENTSEL_EDGE | \
ARCH_PERFMON_EVENTSEL_INV | \
ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS \
+ (ARCH_PERFMON_EVENTSEL_EDGE | \
+ ARCH_PERFMON_EVENTSEL_INV | \
+ ARCH_PERFMON_EVENTSEL_CMASK | \
+ ARCH_PERFMON_EVENTSEL_ANY | \
+ ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
+ HSW_IN_TX | \
+ HSW_IN_TX_CHECKPOINTED)
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..70273e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
#define INTEL_PLD_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \
+ EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+ __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..0e22ce6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
- INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0x1),
EVENT_CONSTRAINT_END
};
@@ -624,68 +606,34 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
- INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
- INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
- /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
- /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
- INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
- /* MEM_UOPS_RETIRED.SPLIT_STORES */
- INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
- INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
- INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
- /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
- INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
- INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
- INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
- INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
-
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ /* We allow DATALA for all PEBS events, will be 0 if not supported */
+ INTEL_ALL_EVENT_CONSTRAINT_DATALA(0, 0xf),
EVENT_CONSTRAINT_END
};
--
1.9.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-07-07 22:37 [PATCH 1/2] perf, x86: Revamp PEBS event selection v2 Andi Kleen
@ 2014-07-07 22:37 ` Andi Kleen
0 siblings, 0 replies; 17+ messages in thread
From: Andi Kleen @ 2014-07-07 22:37 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Haswell supports reporting the data address for a range
of PEBS events, including:
UOPS_RETIRED.ALL
MEM_UOPS_RETIRED.STLB_MISS_LOADS
MEM_UOPS_RETIRED.STLB_MISS_STORES
MEM_UOPS_RETIRED.LOCK_LOADS
MEM_UOPS_RETIRED.SPLIT_LOADS
MEM_UOPS_RETIRED.SPLIT_STORES
MEM_UOPS_RETIRED.ALL_LOADS
MEM_UOPS_RETIRED.ALL_STORES
MEM_LOAD_UOPS_RETIRED.L1_HIT
MEM_LOAD_UOPS_RETIRED.L2_HIT
MEM_LOAD_UOPS_RETIRED.L3_HIT
MEM_LOAD_UOPS_RETIRED.L1_MISS
MEM_LOAD_UOPS_RETIRED.L2_MISS
MEM_LOAD_UOPS_RETIRED.L3_MISS
MEM_LOAD_UOPS_RETIRED.HIT_LFB
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
This facility was already enabled earlier with the original Haswell
perf changes.
However these addresses were always reports as stores by perf, which is wrong,
as they could be loads too. The hardware does not distinguish loads and stores
for these instructions, so there's no (cheap) way for the profiler
to find out.
Change the type to PERF_MEM_OP_NA instead.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0e22ce6..c5d83ab 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -114,7 +114,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
- dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_op = PERF_MEM_OP_NA;
dse.mem_lvl = PERF_MEM_LVL_NA;
/*
--
1.9.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 1/2] perf, x86: Disallow setting undefined bits for PEBS events
@ 2014-03-13 21:22 Andi Kleen
2014-03-13 21:22 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2014-03-13 21:22 UTC (permalink / raw)
To: mingo; +Cc: linux-kernel, peterz, eranian, acme, namhyung, jolsa, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
The SDM forbids setting various event qualifiers with PEBS
events. The magic cycles:pp event uses it, but it has caused
problems in the past. We continue allowing it for cycles:pp,
but forbid it for all other events to follow the SDM.
SDM Vol 3 18.8.4:
"PEBS events are only valid when the following fields of
IA32_PERFEVTSELx are all zero: AnyThread, Edge, Invert, Cmask."
One visible change from this is that the cycles:pp event
can now only be set with "cycles:pp", but not with
raw form. If that was a problem we can allow it again.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.h | 2 +-
arch/x86/kernel/cpu/perf_event_intel.c | 26 +++++++++++++++++++++++---
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 1cca3d8..cf1eda1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -461,7 +461,7 @@ struct x86_pmu {
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
- void (*pebs_aliases)(struct perf_event *event);
+ void (*pebs_aliases)(struct perf_event *event, bool *changed);
int max_pebs_events;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index bf0a64c..f42562e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1691,7 +1691,7 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
intel_put_shared_regs_event_constraints(cpuc, event);
}
-static void intel_pebs_aliases_core2(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event, bool *changed)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
@@ -1716,10 +1716,11 @@ static void intel_pebs_aliases_core2(struct perf_event *event)
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
+ *changed = true;
}
}
-static void intel_pebs_aliases_snb(struct perf_event *event)
+static void intel_pebs_aliases_snb(struct perf_event *event, bool *changed)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
@@ -1744,18 +1745,26 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
+ *changed = true;
}
}
+#define ARCH_PERFMON_NOT_WITH_PEBS \
+ (ARCH_PERFMON_EVENTSEL_ANY | \
+ ARCH_PERFMON_EVENTSEL_CMASK | \
+ ARCH_PERFMON_EVENTSEL_EDGE | \
+ ARCH_PERFMON_EVENTSEL_INV)
+
static int intel_pmu_hw_config(struct perf_event *event)
{
+ bool changed = false;
int ret = x86_pmu_hw_config(event);
if (ret)
return ret;
if (event->attr.precise_ip && x86_pmu.pebs_aliases)
- x86_pmu.pebs_aliases(event);
+ x86_pmu.pebs_aliases(event, &changed);
if (intel_pmu_needs_lbr_smpl(event)) {
ret = intel_pmu_setup_lbr_filter(event);
@@ -1766,6 +1775,17 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (event->attr.type != PERF_TYPE_RAW)
return 0;
+ /*
+ * SDM Vol 3 18.8.4:
+ * "PEBS events are only valid when the following fields of
+ * IA32_PERFEVTSELx are all zero: AnyThread, Edge, Invert, Cmask.
+ *
+ * We only make an exception for the magic pebs aliases.
+ */
+ if (event->attr.precise_ip && !changed &&
+ (event->attr.config & ARCH_PERFMON_NOT_WITH_PEBS))
+ return -EINVAL;
+
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;
--
1.8.5.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store
2014-03-13 21:22 [PATCH 1/2] perf, x86: Disallow setting undefined bits for PEBS events Andi Kleen
@ 2014-03-13 21:22 ` Andi Kleen
0 siblings, 0 replies; 17+ messages in thread
From: Andi Kleen @ 2014-03-13 21:22 UTC (permalink / raw)
To: mingo; +Cc: linux-kernel, peterz, eranian, acme, namhyung, jolsa, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Haswell supports reporting the data address for a range
of events, including UOPS_RETIRED.ALL and some load
events. Currently these addresses were always marked
as stores, which is wrong, as they could be loads too.
Change it to NA instead.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 77bdb09..7a319cf 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -113,7 +113,7 @@ static u64 precise_store_data_hsw(u64 status)
union perf_mem_data_src dse;
dse.val = 0;
- dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_op = PERF_MEM_OP_NA;
dse.mem_lvl = PERF_MEM_LVL_NA;
if (status & 1)
dse.mem_lvl = PERF_MEM_LVL_L1;
--
1.8.5.3
^ permalink raw reply related [flat|nested] 17+ messages in thread
end of thread, other threads:[~2014-07-21 21:15 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-12 0:01 [PATCH 1/2] perf, x86: Revamp PEBS event selection Andi Kleen
2014-07-12 0:01 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
2014-07-14 18:04 ` Stephane Eranian
2014-07-14 19:24 ` Andi Kleen
2014-07-14 22:08 ` Stephane Eranian
2014-07-14 22:39 ` Andi Kleen
2014-07-14 22:49 ` Stephane Eranian
2014-07-14 22:50 ` Stephane Eranian
2014-07-15 4:05 ` Andi Kleen
2014-07-15 8:59 ` Peter Zijlstra
2014-07-15 18:19 ` Stephane Eranian
2014-07-17 20:24 ` Stephane Eranian
2014-07-19 0:49 ` Andi Kleen
2014-07-21 21:15 ` Stephane Eranian
2014-07-14 22:10 ` [PATCH 1/2] perf, x86: Revamp PEBS event selection Stephane Eranian
-- strict thread matches above, loose matches on Subject: below --
2014-07-07 22:37 [PATCH 1/2] perf, x86: Revamp PEBS event selection v2 Andi Kleen
2014-07-07 22:37 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
2014-03-13 21:22 [PATCH 1/2] perf, x86: Disallow setting undefined bits for PEBS events Andi Kleen
2014-03-13 21:22 ` [PATCH 2/2] perf, x86: Don't mark DataLA addresses as store Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).