* [PATCH v2]: fix Haswell precise store data source encoding
@ 2014-05-15 15:56 Stephane Eranian
2014-05-15 19:56 ` Don Zickus
2014-05-19 12:55 ` [tip:perf/core] " tip-bot for Stephane Eranian
0 siblings, 2 replies; 6+ messages in thread
From: Stephane Eranian @ 2014-05-15 15:56 UTC (permalink / raw)
To: linux-kernel; +Cc: peterz, mingo, acme, jolsa, jmario, dzickus, ak
This patch fixes a bug in precise_store_data_hsw() whereby
it would set the data source memory level to the wrong value.
As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
Address Information in PEBS Record), when status bit 0 is set
this is a L1 hit, otherwise this is a L1 miss.
This patch encodes the memory level according to the specification.
In V2, we added the filtering on the store events.
Only the following events produce L1 information:
* MEM_UOPS_RETIRED.STLB_MISS_STORES
* MEM_UOPS_RETIRED.LOCK_STORES
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
Signed-off-by: Stephane Eranian <eranian@google.com>
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ae96cfa..980970c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
return val;
}
-static u64 precise_store_data_hsw(u64 status)
+static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{
union perf_mem_data_src dse;
+ u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA;
+
+ /*
+ * L1 info only valid for following events:
+ *
+ * MEM_UOPS_RETIRED.STLB_MISS_STORES
+ * MEM_UOPS_RETIRED.LOCK_STORES
+ * MEM_UOPS_RETIRED.SPLIT_STORES
+ * MEM_UOPS_RETIRED.ALL_STORES
+ */
+ if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
+ return dse.mem_lvl;
+
if (status & 1)
- dse.mem_lvl = PERF_MEM_LVL_L1;
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ else
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+
/* Nothing else supported. Sorry. */
return dse.val;
}
@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val =
- precise_store_data_hsw(pebs->dse);
+ precise_store_data_hsw(event, pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH v2]: fix Haswell precise store data source encoding
2014-05-15 15:56 [PATCH v2]: fix Haswell precise store data source encoding Stephane Eranian
@ 2014-05-15 19:56 ` Don Zickus
2014-05-15 20:08 ` Stephane Eranian
2014-05-15 21:22 ` Andi Kleen
2014-05-19 12:55 ` [tip:perf/core] " tip-bot for Stephane Eranian
1 sibling, 2 replies; 6+ messages in thread
From: Don Zickus @ 2014-05-15 19:56 UTC (permalink / raw)
To: Stephane Eranian; +Cc: linux-kernel, peterz, mingo, acme, jolsa, jmario, ak
On Thu, May 15, 2014 at 05:56:44PM +0200, Stephane Eranian wrote:
>
> This patch fixes a bug in precise_store_data_hsw() whereby
> it would set the data source memory level to the wrong value.
>
> As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
> Address Information in PEBS Record), when status bit 0 is set
> this is a L1 hit, otherwise this is a L1 miss.
>
> This patch encodes the memory level according to the specification.
>
> In V2, we added the filtering on the store events.
> Only the following events produce L1 information:
> * MEM_UOPS_RETIRED.STLB_MISS_STORES
> * MEM_UOPS_RETIRED.LOCK_STORES
> * MEM_UOPS_RETIRED.SPLIT_STORES
> * MEM_UOPS_RETIRED.ALL_STORES
This worked great on our Haswell-EX box. I was a little surprised to find
out it did until I realized on Ivy Bridge 'mem-store' was a 0x02cd but on
Haswell it is now a 0x82d0. Go generic event types! :-)
Looking at the SDM documentation it does say something about
'UOPS_RETIRED.ALL' supporting stores too but can't find that event. Is
that a typo, much like the 0x02 umask for stores on the D0 event is
missing from the documentation? Just wanted to make sure we are not
missing one more case.
Thanks for the quick patch Stephane!
Tested-and-Reviewed-by: Don Zickus <dzickus@redhat.com>
>
> Signed-off-by: Stephane Eranian <eranian@google.com>
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index ae96cfa..980970c 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
> return val;
> }
>
> -static u64 precise_store_data_hsw(u64 status)
> +static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
> {
> union perf_mem_data_src dse;
> + u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
>
> dse.val = 0;
> dse.mem_op = PERF_MEM_OP_STORE;
> dse.mem_lvl = PERF_MEM_LVL_NA;
> +
> + /*
> + * L1 info only valid for following events:
> + *
> + * MEM_UOPS_RETIRED.STLB_MISS_STORES
> + * MEM_UOPS_RETIRED.LOCK_STORES
> + * MEM_UOPS_RETIRED.SPLIT_STORES
> + * MEM_UOPS_RETIRED.ALL_STORES
> + */
> + if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
> + return dse.mem_lvl;
> +
> if (status & 1)
> - dse.mem_lvl = PERF_MEM_LVL_L1;
> + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> + else
> + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
> +
> /* Nothing else supported. Sorry. */
> return dse.val;
> }
> @@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
> data.data_src.val = load_latency_data(pebs->dse);
> else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
> data.data_src.val =
> - precise_store_data_hsw(pebs->dse);
> + precise_store_data_hsw(event, pebs->dse);
> else
> data.data_src.val = precise_store_data(pebs->dse);
> }
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v2]: fix Haswell precise store data source encoding
2014-05-15 19:56 ` Don Zickus
@ 2014-05-15 20:08 ` Stephane Eranian
2014-05-15 20:27 ` Don Zickus
2014-05-15 21:22 ` Andi Kleen
1 sibling, 1 reply; 6+ messages in thread
From: Stephane Eranian @ 2014-05-15 20:08 UTC (permalink / raw)
To: Don Zickus
Cc: LKML, Peter Zijlstra, mingo@elte.hu, Arnaldo Carvalho de Melo,
Jiri Olsa, Joe Mario, ak@linux.intel.com
On Thu, May 15, 2014 at 9:56 PM, Don Zickus <dzickus@redhat.com> wrote:
> On Thu, May 15, 2014 at 05:56:44PM +0200, Stephane Eranian wrote:
>>
>> This patch fixes a bug in precise_store_data_hsw() whereby
>> it would set the data source memory level to the wrong value.
>>
>> As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
>> Address Information in PEBS Record), when status bit 0 is set
>> this is a L1 hit, otherwise this is a L1 miss.
>>
>> This patch encodes the memory level according to the specification.
>>
>> In V2, we added the filtering on the store events.
>> Only the following events produce L1 information:
>> * MEM_UOPS_RETIRED.STLB_MISS_STORES
>> * MEM_UOPS_RETIRED.LOCK_STORES
>> * MEM_UOPS_RETIRED.SPLIT_STORES
>> * MEM_UOPS_RETIRED.ALL_STORES
>
> This worked great on our Haswell-EX box. I was a little surprised to find
> out it did until I realized on Ivy Bridge 'mem-store' was a 0x02cd but on
> Haswell it is now a 0x82d0. Go generic event types! :-)
>
> Looking at the SDM documentation it does say something about
> 'UOPS_RETIRED.ALL' supporting stores too but can't find that event. Is
> that a typo, much like the 0x02 umask for stores on the D0 event is
> missing from the documentation? Just wanted to make sure we are not
> missing one more case.
>
But uops_retired.all does not generate the L1 info. That is why it is not
there in my patch.
> Thanks for the quick patch Stephane!
>
> Tested-and-Reviewed-by: Don Zickus <dzickus@redhat.com>
>
>>
>> Signed-off-by: Stephane Eranian <eranian@google.com>
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> index ae96cfa..980970c 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> @@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
>> return val;
>> }
>>
>> -static u64 precise_store_data_hsw(u64 status)
>> +static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
>> {
>> union perf_mem_data_src dse;
>> + u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
>>
>> dse.val = 0;
>> dse.mem_op = PERF_MEM_OP_STORE;
>> dse.mem_lvl = PERF_MEM_LVL_NA;
>> +
>> + /*
>> + * L1 info only valid for following events:
>> + *
>> + * MEM_UOPS_RETIRED.STLB_MISS_STORES
>> + * MEM_UOPS_RETIRED.LOCK_STORES
>> + * MEM_UOPS_RETIRED.SPLIT_STORES
>> + * MEM_UOPS_RETIRED.ALL_STORES
>> + */
>> + if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
>> + return dse.mem_lvl;
>> +
>> if (status & 1)
>> - dse.mem_lvl = PERF_MEM_LVL_L1;
>> + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
>> + else
>> + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
>> +
>> /* Nothing else supported. Sorry. */
>> return dse.val;
>> }
>> @@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>> data.data_src.val = load_latency_data(pebs->dse);
>> else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
>> data.data_src.val =
>> - precise_store_data_hsw(pebs->dse);
>> + precise_store_data_hsw(event, pebs->dse);
>> else
>> data.data_src.val = precise_store_data(pebs->dse);
>> }
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v2]: fix Haswell precise store data source encoding
2014-05-15 20:08 ` Stephane Eranian
@ 2014-05-15 20:27 ` Don Zickus
0 siblings, 0 replies; 6+ messages in thread
From: Don Zickus @ 2014-05-15 20:27 UTC (permalink / raw)
To: Stephane Eranian
Cc: LKML, Peter Zijlstra, mingo@elte.hu, Arnaldo Carvalho de Melo,
Jiri Olsa, Joe Mario, ak@linux.intel.com
On Thu, May 15, 2014 at 10:08:51PM +0200, Stephane Eranian wrote:
> On Thu, May 15, 2014 at 9:56 PM, Don Zickus <dzickus@redhat.com> wrote:
> > On Thu, May 15, 2014 at 05:56:44PM +0200, Stephane Eranian wrote:
> >>
> >> This patch fixes a bug in precise_store_data_hsw() whereby
> >> it would set the data source memory level to the wrong value.
> >>
> >> As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
> >> Address Information in PEBS Record), when status bit 0 is set
> >> this is a L1 hit, otherwise this is a L1 miss.
> >>
> >> This patch encodes the memory level according to the specification.
> >>
> >> In V2, we added the filtering on the store events.
> >> Only the following events produce L1 information:
> >> * MEM_UOPS_RETIRED.STLB_MISS_STORES
> >> * MEM_UOPS_RETIRED.LOCK_STORES
> >> * MEM_UOPS_RETIRED.SPLIT_STORES
> >> * MEM_UOPS_RETIRED.ALL_STORES
> >
> > This worked great on our Haswell-EX box. I was a little surprised to find
> > out it did until I realized on Ivy Bridge 'mem-store' was a 0x02cd but on
> > Haswell it is now a 0x82d0. Go generic event types! :-)
> >
> > Looking at the SDM documentation it does say something about
> > 'UOPS_RETIRED.ALL' supporting stores too but can't find that event. Is
> > that a typo, much like the 0x02 umask for stores on the D0 event is
> > missing from the documentation? Just wanted to make sure we are not
> > missing one more case.
> >
> But uops_retired.all does not generate the L1 info. That is why it is not
> there in my patch.
Ok.
Cheers,
Don
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2]: fix Haswell precise store data source encoding
2014-05-15 19:56 ` Don Zickus
2014-05-15 20:08 ` Stephane Eranian
@ 2014-05-15 21:22 ` Andi Kleen
1 sibling, 0 replies; 6+ messages in thread
From: Andi Kleen @ 2014-05-15 21:22 UTC (permalink / raw)
To: Don Zickus
Cc: Stephane Eranian, linux-kernel, peterz, mingo, acme, jolsa,
jmario
> Looking at the SDM documentation it does say something about
> 'UOPS_RETIRED.ALL' supporting stores too but can't find that event. Is
> that a typo, much like the 0x02 umask for stores on the D0 event is
> missing from the documentation? Just wanted to make sure we are not
> missing one more case.
There are some problems in the HSW perf PEBS list.
I posted a patch for this, but so far it's stuck in the usual
perf patch limbo:
http://lkml.iu.edu/hypermail/linux/kernel/1404.2/01509.html
-Andi
^ permalink raw reply [flat|nested] 6+ messages in thread
* [tip:perf/core] fix Haswell precise store data source encoding
2014-05-15 15:56 [PATCH v2]: fix Haswell precise store data source encoding Stephane Eranian
2014-05-15 19:56 ` Don Zickus
@ 2014-05-19 12:55 ` tip-bot for Stephane Eranian
1 sibling, 0 replies; 6+ messages in thread
From: tip-bot for Stephane Eranian @ 2014-05-19 12:55 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, peterz, tglx, dzickus
Commit-ID: 722e76e60f2775c21b087ff12c5e678cf0ebcaaf
Gitweb: http://git.kernel.org/tip/722e76e60f2775c21b087ff12c5e678cf0ebcaaf
Author: Stephane Eranian <eranian@google.com>
AuthorDate: Thu, 15 May 2014 17:56:44 +0200
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Mon, 19 May 2014 21:52:59 +0900
fix Haswell precise store data source encoding
This patch fixes a bug in precise_store_data_hsw() whereby
it would set the data source memory level to the wrong value.
As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear
Address Information in PEBS Record), when status bit 0 is set
this is a L1 hit, otherwise this is a L1 miss.
This patch encodes the memory level according to the specification.
In V2, we added the filtering on the store events.
Only the following events produce L1 information:
* MEM_UOPS_RETIRED.STLB_MISS_STORES
* MEM_UOPS_RETIRED.LOCK_STORES
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
Cc: mingo@elte.hu
Cc: acme@ghostprotocols.net
Cc: jolsa@redhat.com
Cc: jmario@redhat.com
Cc: ak@linux.intel.com
Tested-and-Reviewed-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140515155644.GA3884@quad
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ae96cfa..980970c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
return val;
}
-static u64 precise_store_data_hsw(u64 status)
+static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{
union perf_mem_data_src dse;
+ u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA;
+
+ /*
+ * L1 info only valid for following events:
+ *
+ * MEM_UOPS_RETIRED.STLB_MISS_STORES
+ * MEM_UOPS_RETIRED.LOCK_STORES
+ * MEM_UOPS_RETIRED.SPLIT_STORES
+ * MEM_UOPS_RETIRED.ALL_STORES
+ */
+ if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
+ return dse.mem_lvl;
+
if (status & 1)
- dse.mem_lvl = PERF_MEM_LVL_L1;
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ else
+ dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+
/* Nothing else supported. Sorry. */
return dse.val;
}
@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val =
- precise_store_data_hsw(pebs->dse);
+ precise_store_data_hsw(event, pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-05-19 12:56 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-15 15:56 [PATCH v2]: fix Haswell precise store data source encoding Stephane Eranian
2014-05-15 19:56 ` Don Zickus
2014-05-15 20:08 ` Stephane Eranian
2014-05-15 20:27 ` Don Zickus
2014-05-15 21:22 ` Andi Kleen
2014-05-19 12:55 ` [tip:perf/core] " tip-bot for Stephane Eranian
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox