From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, a.p.zijlstra@chello.nl, eranian@google.com,
acme@redhat.com, Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 19/31] perf, x86: Support weight samples for PEBS
Date: Thu, 27 Sep 2012 21:31:24 -0700 [thread overview]
Message-ID: <1348806696-31170-20-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1348806696-31170-1-git-send-email-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
When a weighted sample is requested, first try to report the TSX abort cost
on Haswell. If that is not available report the memory latency. This
allows profiling both by abort cost and by memory latencies.
Memory latencies requires enabling a different PEBS mode (LL).
When both address and weight is requested address wins.
The LL mode only works for memory related PEBS events, so add a
separate event constraint table for those.
I only did this for Haswell for now, but it could be added
for several other Intel CPUs too by just adding the right
table for them.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.h | 4 ++
arch/x86/kernel/cpu/perf_event_intel.c | 4 ++
arch/x86/kernel/cpu/perf_event_intel_ds.c | 47 +++++++++++++++++++++++++++-
3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8550601..724a141 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -168,6 +168,7 @@ struct cpu_hw_events {
u64 perf_ctr_virt_mask;
void *kfree_on_online;
+ u8 *memory_latency_events;
};
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
@@ -392,6 +393,7 @@ struct x86_pmu {
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
+ struct event_constraint *memory_lat_events;
/*
* Intel LBR
@@ -596,6 +598,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
extern struct event_constraint intel_hsw_pebs_event_constraints[];
+extern struct event_constraint intel_hsw_memory_latency_events[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 83ced1a..2c4cbf3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1637,6 +1637,9 @@ static int hsw_hw_config(struct perf_event *event)
if (ret)
return ret;
+ /* PEBS cannot capture both */
+ if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+ event->attr.sample_type &= ~PERF_SAMPLE_WEIGHT;
if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
return 0;
if (event->attr.intx)
@@ -2161,6 +2164,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.memory_lat_events = intel_hsw_memory_latency_events;
pr_cont("Haswell events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 81fc14a..930bc65 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -442,6 +442,17 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+/* Subset of PEBS events supporting memory latency. Not used for scheduling */
+
+struct event_constraint intel_hsw_memory_latency_events[] = {
+ INTEL_EVENT_CONSTRAINT(0xcd, 0), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0), /* MEM_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -459,6 +470,21 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
return &emptyconstraint;
}
+static bool is_memory_lat_event(struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ if (x86_pmu.intel_cap.pebs_format < 1)
+ return false;
+ if (!x86_pmu.memory_lat_events)
+ return false;
+ for_each_event_constraint(c, x86_pmu.memory_lat_events) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return true;
+ }
+ return false;
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -466,7 +492,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
- cpuc->pebs_enabled |= 1ULL << hwc->idx;
+ /* When weight is requested enable LL instead of normal PEBS */
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ is_memory_lat_event(event))
+ cpuc->pebs_enabled |= 1ULL << (32 + hwc->idx);
+ else
+ cpuc->pebs_enabled |= 1ULL << hwc->idx;
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -474,7 +505,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ is_memory_lat_event(event))
+ cpuc->pebs_enabled &= ~(1ULL << (32 + hwc->idx));
+ else
+ cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -627,6 +662,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
x86_pmu.intel_cap.pebs_format >= 2)
data.addr = ((struct pebs_record_v2 *)pebs)->nhm.dla;
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ x86_pmu.intel_cap.pebs_format >= 2) {
+ data.weight = ((struct pebs_record_v2 *)pebs)->tsx_tuning &
+ 0xffffffff;
+ if (!data.weight)
+ data.weight = ((struct pebs_record_v2 *)pebs)->nhm.lat;
+ }
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
--
1.7.7.6
next prev parent reply other threads:[~2012-09-28 4:34 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-28 4:31 perf PMU support for Haswell Andi Kleen
2012-09-28 4:31 ` [PATCH 01/31] perf, x86: Add PEBSv2 record support Andi Kleen
2012-09-28 8:43 ` Peter Zijlstra
2012-09-28 8:54 ` Stephane Eranian
2012-09-28 9:28 ` Peter Zijlstra
2012-09-28 11:33 ` Stephane Eranian
2012-09-28 14:42 ` Andi Kleen
2012-09-28 4:31 ` [PATCH 02/31] perf, x86: Basic Haswell PMU support Andi Kleen
2012-09-28 9:05 ` Peter Zijlstra
2012-09-28 14:58 ` Andi Kleen
[not found] ` <CABPqkBQ90Crh+EpRQq0Y+xUvrj5vzrX_=SpJQyR4p8uFR_Hr=Q@mail.gmail.com>
2012-09-28 15:21 ` Peter Zijlstra
2012-09-28 15:23 ` Andi Kleen
2012-09-28 4:31 ` [PATCH 03/31] perf, x86: Basic Haswell PEBS support Andi Kleen
2012-09-28 8:50 ` Peter Zijlstra
2012-09-28 4:31 ` [PATCH 04/31] perf, core: Add generic intx/intx_checkpointed counter modifiers Andi Kleen
2012-09-28 9:02 ` Peter Zijlstra
2012-09-28 11:35 ` Stephane Eranian
2012-09-28 14:53 ` Andi Kleen
2012-09-28 15:19 ` Peter Zijlstra
2012-09-28 15:29 ` Andi Kleen
2012-09-28 15:36 ` Peter Zijlstra
2012-09-28 15:23 ` Peter Zijlstra
2012-09-28 15:37 ` Andi Kleen
2012-09-28 4:31 ` [PATCH 05/31] perf, tools: Add :c,:t event modifiers in perf tools Andi Kleen
2012-09-28 4:31 ` [PATCH 06/31] perf, tools: Add intx/intx_checkpoint to perf script and header printing Andi Kleen
2012-09-28 4:31 ` [PATCH 07/31] perf, x86: Implement the :t and :c qualifiers for Haswell Andi Kleen
2012-09-28 4:31 ` [PATCH 08/31] perf, x86: Report PEBS event in a raw format Andi Kleen
2012-09-28 8:54 ` Peter Zijlstra
2012-09-28 8:57 ` Stephane Eranian
2012-09-28 4:31 ` [PATCH 09/31] perf, kvm: Support :t and :c perf modifiers in KVM arch perfmon emulation Andi Kleen
2012-09-28 4:31 ` [PATCH 10/31] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2012-09-28 4:31 ` [PATCH 11/31] perf, x86: Support Haswell v4 LBR format Andi Kleen
2012-09-28 4:31 ` [PATCH 12/31] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2012-09-28 4:31 ` [PATCH 13/31] perf, x86: Support LBR filtering by INTX/NOTX/ABORT Andi Kleen
2012-09-28 4:31 ` [PATCH 14/31] perf, tools: Add abort,notx,intx branch filter options to perf report -j Andi Kleen
2012-09-28 4:31 ` [PATCH 15/31] perf, tools: Support sorting by intx, abort branch flags Andi Kleen
2012-09-28 4:31 ` [PATCH 16/31] perf, x86: Support full width counting on Haswell Andi Kleen
2012-09-28 4:31 ` [PATCH 17/31] perf, x86: Avoid checkpointed counters causing excessive TSX aborts Andi Kleen
2012-09-28 4:31 ` [PATCH 18/31] perf, core: Add a concept of a weightened sample Andi Kleen
2012-09-28 9:06 ` Stephane Eranian
2012-09-28 14:57 ` Andi Kleen
2012-09-28 17:09 ` Stephane Eranian
2012-09-28 4:31 ` Andi Kleen [this message]
2012-09-28 4:31 ` [PATCH 20/31] perf, tools: Add support for weight Andi Kleen
2012-09-28 4:31 ` [PATCH 21/31] perf, tools: Handle XBEGIN like a jump Andi Kleen
2012-09-28 4:31 ` [PATCH 22/31] perf, core: Define generic hardware transaction events Andi Kleen
2012-09-28 9:33 ` Peter Zijlstra
2012-09-28 4:31 ` [PATCH 23/31] perf, tools: Add support for generic transaction events to perf userspace Andi Kleen
2012-09-28 4:31 ` [PATCH 24/31] perf, x86: Add the Haswell implementation of the generic transaction events Andi Kleen
2012-09-28 4:31 ` [PATCH 25/31] perf, tools: Add perf stat --transaction Andi Kleen
2012-09-28 4:31 ` [PATCH 26/31] perf, x86: Support for printing PMU state on spurious PMIs Andi Kleen
2012-09-28 9:36 ` Peter Zijlstra
2012-09-28 11:39 ` Stephane Eranian
2012-09-28 4:31 ` [PATCH 27/31] perf, core: Add generic transaction flags Andi Kleen
2012-09-28 4:31 ` [PATCH 28/31] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2012-09-28 4:31 ` [PATCH 29/31] perf, tools: Add support for record transaction flags Andi Kleen
2012-09-28 4:31 ` [PATCH 30/31] perf, tools: Point --sort documentation to --help Andi Kleen
2012-09-28 4:31 ` [PATCH 31/31] perf, tools: Add browser support for transaction flags Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1348806696-31170-20-git-send-email-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=eranian@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).