All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org
Cc: acme@redhat.com, peterz@infradead.org, jolsa@redhat.com,
	eranian@google.com, mingo@kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 15/33] perf, x86: Support weight samples for PEBS
Date: Fri, 26 Oct 2012 13:29:57 -0700	[thread overview]
Message-ID: <1351283415-13170-16-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1351283415-13170-1-git-send-email-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

When a weighted sample is requested, first try to report the TSX abort cost
on Haswell. If that is not available report the memory latency. This
allows profiling both by abort cost and by memory latencies.

Memory latencies requires enabling a different PEBS mode (LL).
When both address and weight is requested address wins.

The LL mode only works for memory related PEBS events, so add a
separate event constraint table for those.

I only did this for Haswell for now, but it could be added
for several other Intel CPUs too by just adding the right
table for them.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.h          |    4 ++
 arch/x86/kernel/cpu/perf_event_intel.c    |    4 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c |   47 +++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 17cb08f..89394e1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -168,6 +168,7 @@ struct cpu_hw_events {
 	u64				perf_ctr_virt_mask;
 
 	void				*kfree_on_online;
+	u8				*memory_latency_events;
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w, o) {\
@@ -388,6 +389,7 @@ struct x86_pmu {
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
 	int 		max_pebs_events;
+	struct event_constraint *memory_lat_events;
 
 	/*
 	 * Intel LBR
@@ -594,6 +596,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
 
 extern struct event_constraint intel_hsw_pebs_event_constraints[];
 
+extern struct event_constraint intel_hsw_memory_latency_events[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index bbd00cc..3a7b962 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1644,6 +1644,9 @@ static int hsw_hw_config(struct perf_event *event)
 
 	if (ret)
 		return ret;
+	/* PEBS cannot capture both */
+	if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+		event->attr.sample_type &= ~PERF_SAMPLE_WEIGHT;
 	if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
 		return 0;
 	event->hw.config |= event->attr.config & (HSW_INTX|HSW_INTX_CHECKPOINTED);
@@ -2220,6 +2223,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		x86_pmu.format_attrs = intel_hsw_formats_attr;
+		x86_pmu.memory_lat_events = intel_hsw_memory_latency_events;
 		pr_cont("Haswell events, ");
 		break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index aa0f5fa..3094caa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -456,6 +456,17 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+/* Subset of PEBS events supporting memory latency. Not used for scheduling */
+
+struct event_constraint intel_hsw_memory_latency_events[] = {
+	INTEL_EVENT_CONSTRAINT(0xcd, 0), /* MEM_TRANS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0), /* MEM_UOPS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0), /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd3, 0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -473,6 +484,21 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
+static bool is_memory_lat_event(struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (x86_pmu.intel_cap.pebs_format < 1)
+		return false;
+	if (!x86_pmu.memory_lat_events)
+		return false;
+	for_each_event_constraint(c, x86_pmu.memory_lat_events) {
+		if ((event->hw.config & c->cmask) == c->code)
+			return true;
+	}
+	return false;
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -480,7 +506,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-	cpuc->pebs_enabled |= 1ULL << hwc->idx;
+	/* When weight is requested enable LL instead of normal PEBS */
+	if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+		is_memory_lat_event(event))
+		cpuc->pebs_enabled |= 1ULL << (32 + hwc->idx);
+	else
+		cpuc->pebs_enabled |= 1ULL << hwc->idx;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
@@ -488,7 +519,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 
-	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+	if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+		is_memory_lat_event(event))
+		cpuc->pebs_enabled &= ~(1ULL << (32 + hwc->idx));
+	else
+		cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -634,6 +669,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		x86_pmu.intel_cap.pebs_format >= 2)
 		data.addr = ((struct pebs_record_v2 *)pebs)->nhm.dla;
 
+	if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
+	    x86_pmu.intel_cap.pebs_format >= 2) {
+		data.weight = ((struct pebs_record_v2 *)pebs)->tsx_tuning &
+				0xffffffff;
+		if (!data.weight)
+			data.weight = ((struct pebs_record_v2 *)pebs)->nhm.lat;
+	}
+
 	if (has_branch_stack(event))
 		data.br_stack = &cpuc->lbr_stack;
 
-- 
1.7.7.6


  parent reply	other threads:[~2012-10-26 20:34 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-26 20:29 perf PMU support for Haswell v4 Andi Kleen
2012-10-26 20:29 ` [PATCH 01/33] perf, x86: Add PEBSv2 record support Andi Kleen
2012-10-29 10:08   ` Namhyung Kim
2012-10-29 10:13     ` Andi Kleen
2012-10-29 10:23     ` Peter Zijlstra
2012-10-26 20:29 ` [PATCH 02/33] perf, x86: Basic Haswell PMU support v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 03/33] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
2012-10-26 20:29 ` [PATCH 04/33] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 05/33] perf, kvm: Support the intx/intx_cp modifiers in KVM arch perfmon emulation v3 Andi Kleen
2012-10-30  9:25   ` Gleb Natapov
2012-10-26 20:29 ` [PATCH 06/33] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2012-10-26 20:29 ` [PATCH 07/33] perf, x86: Support Haswell v4 LBR format Andi Kleen
2012-10-26 20:29 ` [PATCH 08/33] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2012-10-26 20:29 ` [PATCH 09/33] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 10/33] perf, tools: Add abort,notx,intx branch filter options to perf report -j v2 Andi Kleen
2012-10-29 10:19   ` Namhyung Kim
2012-10-26 20:29 ` [PATCH 11/33] perf, tools: Support sorting by intx, abort branch flags Andi Kleen
2012-10-26 20:29 ` [PATCH 12/33] perf, x86: Support full width counting Andi Kleen
2012-10-26 20:29 ` [PATCH 13/33] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
2012-10-26 20:29 ` [PATCH 14/33] perf, core: Add a concept of a weightened sample Andi Kleen
2012-10-26 20:29 ` Andi Kleen [this message]
2012-10-26 20:29 ` [PATCH 16/33] perf, tools: Add support for weight v2 Andi Kleen
2012-10-29 10:44   ` Namhyung Kim
2012-10-29 11:02     ` Andi Kleen
2012-10-26 20:29 ` [PATCH 17/33] perf, tools: Handle XBEGIN like a jump Andi Kleen
2012-10-26 20:30 ` [PATCH 18/33] perf, x86: Support for printing PMU state on spurious PMIs v3 Andi Kleen
2012-10-26 20:30 ` [PATCH 19/33] perf, core: Add generic transaction flags Andi Kleen
2012-10-26 20:30 ` [PATCH 20/33] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2012-10-26 20:30 ` [PATCH 21/33] perf, tools: Add support for record transaction flags Andi Kleen
2012-10-29 10:49   ` Namhyung Kim
2012-10-26 20:30 ` [PATCH 22/33] perf, tools: Point --sort documentation to --help Andi Kleen
2012-10-26 20:30 ` [PATCH 23/33] perf, tools: Add browser support for transaction flags Andi Kleen
2012-10-26 20:30 ` [PATCH 24/33] perf, tools: Move parse_events error printing to parse_events_options Andi Kleen
2012-10-27 19:08   ` Jiri Olsa
2012-10-30 11:58   ` [tip:perf/core] perf " tip-bot for Andi Kleen
2012-10-26 20:30 ` [PATCH 25/33] perf, tools: Support events with - in the name Andi Kleen
2012-10-27 19:32   ` Jiri Olsa
2012-10-26 20:30 ` [PATCH 26/33] perf, x86: Report the arch perfmon events in sysfs Andi Kleen
2012-10-26 20:30 ` [PATCH 27/33] tools, perf: Add a precise event qualifier Andi Kleen
2012-10-27 19:35   ` Jiri Olsa
2012-10-28 19:13     ` Andi Kleen
2012-10-28 19:24       ` Jiri Olsa
2012-10-28 20:06         ` Andi Kleen
2012-10-26 20:30 ` [PATCH 28/33] perf, x86: Add Haswell TSX event aliases Andi Kleen
2012-10-26 20:30 ` [PATCH 29/33] perf, tools: Add perf stat --transaction v2 Andi Kleen
2012-10-26 20:30 ` [PATCH 30/33] perf, x86: Add a Haswell precise instructions event Andi Kleen
2012-10-26 20:30 ` [PATCH 31/33] perf, tools: Support generic events as pmu event names v2 Andi Kleen
2012-10-27 19:42   ` Jiri Olsa
2012-10-28 19:12     ` Andi Kleen
2012-10-29  9:23       ` Peter Zijlstra
2012-10-26 20:30 ` [PATCH 32/33] perf, tools: Default to cpu// for events v2 Andi Kleen
2012-10-27 20:16   ` Jiri Olsa
2012-10-26 20:30 ` [PATCH 33/33] perf, tools: List kernel supplied event aliases in perf list v2 Andi Kleen
2012-10-27 20:20   ` Jiri Olsa
2012-10-28 19:05     ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351283415-13170-16-git-send-email-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.