From: Andi Kleen <andi@firstfloor.org>
To: mingo@kernel.org
Cc: linux-kernel@vger.kernel.org, a.p.zijlstra@chello.nl,
akpm@linux-foundation.org, acme@redhat.com, eranian@google.com,
jolsa@redhat.com, namhyung@kernel.org,
Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 05/18] perf, x86: Support weight samples for PEBS
Date: Fri, 25 Jan 2013 14:32:59 -0800 [thread overview]
Message-ID: <1359153192-13409-6-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1359153192-13409-1-git-send-email-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
When a weighted sample is requested, first try to report the TSX abort cost
on Haswell. If that is not available report the memory latency. This
allows profiling both by abort cost and by memory latencies.
Memory latencies requires enabling a different PEBS mode (LL).
When both address and weight is requested address wins.
The LL mode only works for memory related PEBS events, so add a
separate event constraint table for those.
I only did this for Haswell for now, but it could be added
for several other Intel CPUs too by just adding the right
table for them.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.h | 4 ++
arch/x86/kernel/cpu/perf_event_intel.c | 4 ++
arch/x86/kernel/cpu/perf_event_intel_ds.c | 47 +++++++++++++++++++++++++++-
3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ce2a863..d55e502 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -168,6 +168,7 @@ struct cpu_hw_events {
u64 perf_ctr_virt_mask;
void *kfree_on_online;
+ u8 *memory_latency_events;
};
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
@@ -390,6 +391,7 @@ struct x86_pmu {
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
+ struct event_constraint *memory_lat_events;
/*
* Intel LBR
@@ -599,6 +601,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
extern struct event_constraint intel_hsw_pebs_event_constraints[];
+extern struct event_constraint intel_hsw_memory_latency_events[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6899f57..d8acedd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1619,6 +1619,9 @@ static int hsw_hw_config(struct perf_event *event)
if (ret)
return ret;
+ /* PEBS cannot capture both */
+ if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+ event->attr.sample_type &= ~PERF_SAMPLE_WEIGHT;
if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
return 0;
event->hw.config |= event->attr.config & (HSW_INTX|HSW_INTX_CHECKPOINTED);
@@ -2225,6 +2228,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.format_attrs = intel_hsw_formats_attr;
+ x86_pmu.memory_lat_events = intel_hsw_memory_latency_events;
pr_cont("Haswell events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index aa0f5fa..3094caa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -456,6 +456,17 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+/* Subset of PEBS events supporting memory latency. Not used for scheduling */
+
+struct event_constraint intel_hsw_memory_latency_events[] = {
+ INTEL_EVENT_CONSTRAINT(0xcd, 0), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0), /* MEM_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -473,6 +484,21 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
return &emptyconstraint;
}
+static bool is_memory_lat_event(struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ if (x86_pmu.intel_cap.pebs_format < 1)
+ return false;
+ if (!x86_pmu.memory_lat_events)
+ return false;
+ for_each_event_constraint(c, x86_pmu.memory_lat_events) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return true;
+ }
+ return false;
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -480,7 +506,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
- cpuc->pebs_enabled |= 1ULL << hwc->idx;
+ /* When weight is requested enable LL instead of normal PEBS */
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ is_memory_lat_event(event))
+ cpuc->pebs_enabled |= 1ULL << (32 + hwc->idx);
+ else
+ cpuc->pebs_enabled |= 1ULL << hwc->idx;
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -488,7 +519,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ is_memory_lat_event(event))
+ cpuc->pebs_enabled &= ~(1ULL << (32 + hwc->idx));
+ else
+ cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -634,6 +669,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
x86_pmu.intel_cap.pebs_format >= 2)
data.addr = ((struct pebs_record_v2 *)pebs)->nhm.dla;
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+ x86_pmu.intel_cap.pebs_format >= 2) {
+ data.weight = ((struct pebs_record_v2 *)pebs)->tsx_tuning &
+ 0xffffffff;
+ if (!data.weight)
+ data.weight = ((struct pebs_record_v2 *)pebs)->nhm.lat;
+ }
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
--
1.7.7.6
next prev parent reply other threads:[~2013-01-25 22:36 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-25 22:32 perf PMU support for Haswell: Extended functionality v1 Andi Kleen
2013-01-25 22:32 ` [PATCH 01/18] perf, tools: Support sorting by intx, abort branch flags v2 Andi Kleen
2013-01-25 22:32 ` [PATCH 02/18] perf, kvm: Support the intx/intx_cp modifiers in KVM arch perfmon emulation v5 Andi Kleen
2013-01-27 15:24 ` Gleb Natapov
2013-01-25 22:32 ` [PATCH 03/18] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2013-01-25 22:32 ` [PATCH 04/18] perf, core: Add a concept of a weightened sample v2 Andi Kleen
2013-01-25 22:32 ` Andi Kleen [this message]
2013-01-25 22:33 ` [PATCH 06/18] perf, tools: Add support for weight v8 Andi Kleen
2013-01-25 22:33 ` [PATCH 07/18] perf, core: Add generic transaction flags v3 Andi Kleen
2013-01-25 22:33 ` [PATCH 08/18] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2013-01-25 22:33 ` [PATCH 09/18] perf, tools: Add support for record transaction flags v3 Andi Kleen
2013-01-25 22:33 ` [PATCH 10/18] perf, tools: Add browser support for transaction flags v6 Andi Kleen
2013-01-25 22:33 ` [PATCH 11/18] tools, perf: Add a precise event qualifier v2 Andi Kleen
2013-01-25 22:33 ` [PATCH 12/18] perf, x86: improve sysfs event mapping with event string Andi Kleen
2013-01-25 22:33 ` [PATCH 13/18] perf, x86: Support CPU specific sysfs events Andi Kleen
2013-01-25 22:33 ` [PATCH 14/18] perf, x86: Add Haswell TSX event aliases v2 Andi Kleen
2013-01-25 22:33 ` [PATCH 15/18] perf, tools: Add perf stat --transaction v3 Andi Kleen
2013-01-25 22:33 ` [PATCH 16/18] perf, x86: Add a Haswell precise instructions event v2 Andi Kleen
2013-01-25 22:33 ` [PATCH 17/18] perf, tools: Default to cpu// for events v5 Andi Kleen
2013-01-25 22:33 ` [PATCH 18/18] perf, tools: List kernel supplied event aliases in perf list v3 Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1359153192-13409-6-git-send-email-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=eranian@google.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox