public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@elte.hu, linux-kernel@vger.kernel.org
Cc: paulus@samba.org, eranian@google.com, robert.richter@amd.com,
	fweisbec@gmail.com, Arnaldo Carvalho de Melo <acme@infradead.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 14/14] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK
Date: Thu, 04 Mar 2010 15:01:00 +0100	[thread overview]
Message-ID: <20100304140100.998642700@chello.nl> (raw)
In-Reply-To: 20100304140046.596569763@chello.nl

[-- Attachment #1: perf-sample-lbr.patch --]
[-- Type: text/plain, Size: 8845 bytes --]

Not for merging until there's a sensible use case implemented in
tools/perf as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 arch/x86/kernel/cpu/perf_event.c           |    3 +-
 arch/x86/kernel/cpu/perf_event_intel.c     |   10 ++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c  |   20 ++++++---------
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |    4 ++-
 include/linux/perf_event.h                 |   15 ++++++++---
 kernel/perf_event.c                        |   38 ++++++++++++++++++++++-------
 6 files changed, 62 insertions(+), 28 deletions(-)

Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -125,8 +125,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
 };
 
 /*
@@ -399,9 +400,13 @@ enum perf_event_type {
 	 *
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
-	 *	{ u64			nr,
+	 *	{ u64			nr;
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
+	 * 	{ u64			nr;
+	 * 	  { u64 from, to, flags;
+	 * 	  }			lbr[nr];  } && PERF_SAMPLE_BRANCH_STACK
+	 *
 	 *	#
 	 *	# The RAW record below is opaque data wrt the ABI
 	 *	#
@@ -817,13 +822,15 @@ struct perf_sample_data {
 	u64				period;
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
+	struct perf_branch_stack	*branches;
 };
 
 static inline
 void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
-	data->addr = addr;
-	data->raw  = NULL;
+	data->addr     = addr;
+	data->raw      = NULL;
+	data->branches = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -3178,12 +3178,9 @@ void perf_output_sample(struct perf_outp
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (data->callchain) {
-			int size = 1;
+			int size = sizeof(u64);
 
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
+			size += data->callchain->nr * sizeof(u64);
 
 			perf_output_copy(handle, data->callchain, size);
 		} else {
@@ -3192,6 +3189,20 @@ void perf_output_sample(struct perf_outp
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		if (data->branches) {
+			int size = sizeof(u64);
+
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+
+			perf_output_copy(handle, data->branches, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
 	if (sample_type & PERF_SAMPLE_RAW) {
 		if (data->raw) {
 			perf_output_put(handle, data->raw->size);
@@ -3274,14 +3285,25 @@ void perf_prepare_sample(struct perf_eve
 		header->size += perf_event_read_size(event);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
+		int size = sizeof(u64);
 
 		data->callchain = perf_callchain(regs);
 
 		if (data->callchain)
-			size += data->callchain->nr;
+			size += data->callchain->nr * sizeof(u64);
+
+		header->size += size;
+	}
 
-		header->size += size * sizeof(u64);
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		int size = sizeof(u64);
+
+		if (data->branches) {
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+		}
+
+		header->size += size;
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -548,6 +548,9 @@ static void intel_pmu_disable_event(stru
 
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_disable(event);
+
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -602,6 +605,9 @@ static void intel_pmu_enable_event(struc
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_enable(event);
 
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_enable(event);
+
 	__x86_pmu_enable_event(hwc);
 }
 
@@ -677,13 +683,13 @@ again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 
-	intel_pmu_lbr_read();
+	intel_pmu_lbr_read(&data);
 
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */
 	if (__test_and_clear_bit(62, (unsigned long *)&status))
-		x86_pmu.drain_pebs(regs);
+		x86_pmu.drain_pebs(&data, regs);
 
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -448,13 +448,12 @@ static int intel_pmu_pebs_fixup_ip(struc
 static int intel_pmu_save_and_restart(struct perf_event *event);
 static void intel_pmu_disable_event(struct perf_event *event);
 
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_core(struct perf_sample_data *data, struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
 	struct pebs_record_core *at, *top;
-	struct perf_sample_data data;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
 	int n;
@@ -475,8 +474,7 @@ static void intel_pmu_drain_pebs_core(st
 	if (!intel_pmu_save_and_restart(event))
 		goto out;
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	data->period = event->hw.last_period;
 
 	n = top - at;
 
@@ -492,7 +490,7 @@ static void intel_pmu_drain_pebs_core(st
 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 		raw.size = x86_pmu.pebs_record_size;
 		raw.data = at;
-		data.raw = &raw;
+		data->raw = &raw;
 	}
 
 	/*
@@ -515,19 +513,18 @@ static void intel_pmu_drain_pebs_core(st
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
-	if (perf_event_overflow(event, 1, &data, &regs))
+	if (perf_event_overflow(event, 1, data, &regs))
 		intel_pmu_disable_event(event);
 
 out:
 	intel_pmu_pebs_enable_all();
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_nhm(struct perf_sample_data *data, struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct pebs_record_nhm *at, *top;
-	struct perf_sample_data data;
 	struct perf_event *event = NULL;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
@@ -575,13 +572,12 @@ static void intel_pmu_drain_pebs_nhm(str
 		if (!intel_pmu_save_and_restart(event))
 			continue;
 
-		perf_sample_data_init(&data, 0);
-		data.period = event->hw.last_period;
+		data->period = event->hw.last_period;
 
 		if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 			raw.size = x86_pmu.pebs_record_size;
 			raw.data = at;
-			data.raw = &raw;
+			data->raw = &raw;
 		}
 
 		/*
@@ -597,7 +593,7 @@ static void intel_pmu_drain_pebs_nhm(str
 		else
 			regs.flags &= ~PERF_EFLAGS_EXACT;
 
-		if (perf_event_overflow(event, 1, &data, &regs))
+		if (perf_event_overflow(event, 1, data, &regs))
 			intel_pmu_disable_event(event);
 	}
 out:
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -209,7 +209,8 @@ struct x86_pmu {
 	 */
 	int		bts, pebs;
 	int		pebs_record_size;
-	void		(*drain_pebs)(struct pt_regs *regs);
+	void		(*drain_pebs)(struct perf_sample_data *data,
+				      struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 
 	/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -178,7 +178,7 @@ static void intel_pmu_lbr_read_64(struct
 	cpuc->lbr_stack.nr = i;
 }
 
-static void intel_pmu_lbr_read(void)
+static void intel_pmu_lbr_read(struct perf_sample_data *data)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -189,6 +189,8 @@ static void intel_pmu_lbr_read(void)
 		intel_pmu_lbr_read_32(cpuc);
 	else
 		intel_pmu_lbr_read_64(cpuc);
+
+	data->branches = &cpuc->lbr_stack;
 }
 
 static void intel_pmu_lbr_init_core(void)

-- 


      parent reply	other threads:[~2010-03-04 14:04 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-04 14:00 [PATCH 00/14] PEBS and LBR support Peter Zijlstra
2010-03-04 14:00 ` [PATCH 01/14] perf, x86: Remove superfluous arguments to x86_perf_event_set_period() Peter Zijlstra
2010-03-10 13:10   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 02/14] perf, x86: Remove superfluous arguments to x86_perf_event_update() Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 03/14] perf, x86: Change x86_pmu.{enable,disable} calling convention Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 04/14] perf, x86: Use unlocked bitops Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 05/14] perf: Generic perf_sample_data initialization Peter Zijlstra
2010-03-10 13:09   ` [tip:perf/urgent] perf: Provide generic " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 06/14] perf, x86: PEBS infrastructure Peter Zijlstra
2010-03-05  6:19   ` Paul Mackerras
2010-03-05  9:20     ` Peter Zijlstra
2010-03-05 19:11       ` Stephane Eranian
2010-03-05 19:39         ` Peter Zijlstra
2010-03-05 19:51           ` Stephane Eranian
2010-03-10 13:18   ` [tip:perf/pebs] perf, x86: Add " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 07/14] perf: Add attr->precise support to raw event parsing Peter Zijlstra
2010-03-10 13:18   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 08/14] perf, x86: Implement simple LBR support Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 09/14] perf, x86: use LBR for PEBS IP+1 fixup Peter Zijlstra
2010-03-04 16:21   ` Masami Hiramatsu
2010-03-04 17:54     ` Peter Zijlstra
2010-03-04 20:54       ` Masami Hiramatsu
2010-03-04 20:58         ` Masami Hiramatsu
2010-03-04 21:08         ` Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 10/14] perf-top: Show the percentage of successfull PEBS-fixups Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 11/14] perf, x86: Clean up IA32_PERF_CAPABILITIES usage Peter Zijlstra
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 12/14] perf, x86: Expose the full PEBS record using PERF_SAMPLE_RAW Peter Zijlstra
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 13/14] x86: Move MAX_INSN_SIZE into asm/insn.h Peter Zijlstra
2010-03-04 15:30   ` Masami Hiramatsu
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:01 ` Peter Zijlstra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100304140100.998642700@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=robert.richter@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox