All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@elte.hu, linux-kernel@vger.kernel.org
Cc: paulus@samba.org, eranian@google.com, robert.richter@amd.com,
	fweisbec@gmail.com, Arnaldo Carvalho de Melo <acme@infradead.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 14/14] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK
Date: Thu, 04 Mar 2010 15:01:00 +0100	[thread overview]
Message-ID: <20100304140100.998642700@chello.nl> (raw)
In-Reply-To: 20100304140046.596569763@chello.nl

[-- Attachment #1: perf-sample-lbr.patch --]
[-- Type: text/plain, Size: 8845 bytes --]

Not for merging until there's a sensible use case implemented in
tools/perf as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 arch/x86/kernel/cpu/perf_event.c           |    3 +-
 arch/x86/kernel/cpu/perf_event_intel.c     |   10 ++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c  |   20 ++++++---------
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |    4 ++-
 include/linux/perf_event.h                 |   15 ++++++++---
 kernel/perf_event.c                        |   38 ++++++++++++++++++++++-------
 6 files changed, 62 insertions(+), 28 deletions(-)

Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -125,8 +125,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
 };
 
 /*
@@ -399,9 +400,13 @@ enum perf_event_type {
 	 *
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
-	 *	{ u64			nr,
+	 *	{ u64			nr;
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
+	 * 	{ u64			nr;
+	 * 	  { u64 from, to, flags;
+	 * 	  }			lbr[nr];  } && PERF_SAMPLE_BRANCH_STACK
+	 *
 	 *	#
 	 *	# The RAW record below is opaque data wrt the ABI
 	 *	#
@@ -817,13 +822,15 @@ struct perf_sample_data {
 	u64				period;
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
+	struct perf_branch_stack	*branches;
 };
 
 static inline
 void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
-	data->addr = addr;
-	data->raw  = NULL;
+	data->addr     = addr;
+	data->raw      = NULL;
+	data->branches = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -3178,12 +3178,9 @@ void perf_output_sample(struct perf_outp
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (data->callchain) {
-			int size = 1;
+			int size = sizeof(u64);
 
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
+			size += data->callchain->nr * sizeof(u64);
 
 			perf_output_copy(handle, data->callchain, size);
 		} else {
@@ -3192,6 +3189,20 @@ void perf_output_sample(struct perf_outp
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		if (data->branches) {
+			int size = sizeof(u64);
+
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+
+			perf_output_copy(handle, data->branches, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
 	if (sample_type & PERF_SAMPLE_RAW) {
 		if (data->raw) {
 			perf_output_put(handle, data->raw->size);
@@ -3274,14 +3285,25 @@ void perf_prepare_sample(struct perf_eve
 		header->size += perf_event_read_size(event);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
+		int size = sizeof(u64);
 
 		data->callchain = perf_callchain(regs);
 
 		if (data->callchain)
-			size += data->callchain->nr;
+			size += data->callchain->nr * sizeof(u64);
+
+		header->size += size;
+	}
 
-		header->size += size * sizeof(u64);
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		int size = sizeof(u64);
+
+		if (data->branches) {
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+		}
+
+		header->size += size;
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -548,6 +548,9 @@ static void intel_pmu_disable_event(stru
 
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_disable(event);
+
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -602,6 +605,9 @@ static void intel_pmu_enable_event(struc
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_enable(event);
 
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_enable(event);
+
 	__x86_pmu_enable_event(hwc);
 }
 
@@ -677,13 +683,13 @@ again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 
-	intel_pmu_lbr_read();
+	intel_pmu_lbr_read(&data);
 
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */
 	if (__test_and_clear_bit(62, (unsigned long *)&status))
-		x86_pmu.drain_pebs(regs);
+		x86_pmu.drain_pebs(&data, regs);
 
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -448,13 +448,12 @@ static int intel_pmu_pebs_fixup_ip(struc
 static int intel_pmu_save_and_restart(struct perf_event *event);
 static void intel_pmu_disable_event(struct perf_event *event);
 
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_core(struct perf_sample_data *data, struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
 	struct pebs_record_core *at, *top;
-	struct perf_sample_data data;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
 	int n;
@@ -475,8 +474,7 @@ static void intel_pmu_drain_pebs_core(st
 	if (!intel_pmu_save_and_restart(event))
 		goto out;
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	data->period = event->hw.last_period;
 
 	n = top - at;
 
@@ -492,7 +490,7 @@ static void intel_pmu_drain_pebs_core(st
 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 		raw.size = x86_pmu.pebs_record_size;
 		raw.data = at;
-		data.raw = &raw;
+		data->raw = &raw;
 	}
 
 	/*
@@ -515,19 +513,18 @@ static void intel_pmu_drain_pebs_core(st
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
-	if (perf_event_overflow(event, 1, &data, &regs))
+	if (perf_event_overflow(event, 1, data, &regs))
 		intel_pmu_disable_event(event);
 
 out:
 	intel_pmu_pebs_enable_all();
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_nhm(struct perf_sample_data *data, struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct pebs_record_nhm *at, *top;
-	struct perf_sample_data data;
 	struct perf_event *event = NULL;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
@@ -575,13 +572,12 @@ static void intel_pmu_drain_pebs_nhm(str
 		if (!intel_pmu_save_and_restart(event))
 			continue;
 
-		perf_sample_data_init(&data, 0);
-		data.period = event->hw.last_period;
+		data->period = event->hw.last_period;
 
 		if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 			raw.size = x86_pmu.pebs_record_size;
 			raw.data = at;
-			data.raw = &raw;
+			data->raw = &raw;
 		}
 
 		/*
@@ -597,7 +593,7 @@ static void intel_pmu_drain_pebs_nhm(str
 		else
 			regs.flags &= ~PERF_EFLAGS_EXACT;
 
-		if (perf_event_overflow(event, 1, &data, &regs))
+		if (perf_event_overflow(event, 1, data, &regs))
 			intel_pmu_disable_event(event);
 	}
 out:
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -209,7 +209,8 @@ struct x86_pmu {
 	 */
 	int		bts, pebs;
 	int		pebs_record_size;
-	void		(*drain_pebs)(struct pt_regs *regs);
+	void		(*drain_pebs)(struct perf_sample_data *data,
+				      struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 
 	/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -178,7 +178,7 @@ static void intel_pmu_lbr_read_64(struct
 	cpuc->lbr_stack.nr = i;
 }
 
-static void intel_pmu_lbr_read(void)
+static void intel_pmu_lbr_read(struct perf_sample_data *data)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -189,6 +189,8 @@ static void intel_pmu_lbr_read(void)
 		intel_pmu_lbr_read_32(cpuc);
 	else
 		intel_pmu_lbr_read_64(cpuc);
+
+	data->branches = &cpuc->lbr_stack;
 }
 
 static void intel_pmu_lbr_init_core(void)

-- 


      parent reply	other threads:[~2010-03-04 14:04 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-04 14:00 [PATCH 00/14] PEBS and LBR support Peter Zijlstra
2010-03-04 14:00 ` [PATCH 01/14] perf, x86: Remove superfluous arguments to x86_perf_event_set_period() Peter Zijlstra
2010-03-10 13:10   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 02/14] perf, x86: Remove superfluous arguments to x86_perf_event_update() Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 03/14] perf, x86: Change x86_pmu.{enable,disable} calling convention Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 04/14] perf, x86: Use unlocked bitops Peter Zijlstra
2010-03-10 13:11   ` [tip:perf/urgent] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 05/14] perf: Generic perf_sample_data initialization Peter Zijlstra
2010-03-10 13:09   ` [tip:perf/urgent] perf: Provide generic " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 06/14] perf, x86: PEBS infrastructure Peter Zijlstra
2010-03-05  6:19   ` Paul Mackerras
2010-03-05  9:20     ` Peter Zijlstra
2010-03-05 19:11       ` Stephane Eranian
2010-03-05 19:39         ` Peter Zijlstra
2010-03-05 19:51           ` Stephane Eranian
2010-03-10 13:18   ` [tip:perf/pebs] perf, x86: Add " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 07/14] perf: Add attr->precise support to raw event parsing Peter Zijlstra
2010-03-10 13:18   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 08/14] perf, x86: Implement simple LBR support Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 09/14] perf, x86: use LBR for PEBS IP+1 fixup Peter Zijlstra
2010-03-04 16:21   ` Masami Hiramatsu
2010-03-04 17:54     ` Peter Zijlstra
2010-03-04 20:54       ` Masami Hiramatsu
2010-03-04 20:58         ` Masami Hiramatsu
2010-03-04 21:08         ` Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 10/14] perf-top: Show the percentage of successfull PEBS-fixups Peter Zijlstra
2010-03-10 13:19   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 11/14] perf, x86: Clean up IA32_PERF_CAPABILITIES usage Peter Zijlstra
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 12/14] perf, x86: Expose the full PEBS record using PERF_SAMPLE_RAW Peter Zijlstra
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:00 ` [PATCH 13/14] x86: Move MAX_INSN_SIZE into asm/insn.h Peter Zijlstra
2010-03-04 15:30   ` Masami Hiramatsu
2010-03-10 13:20   ` [tip:perf/pebs] " tip-bot for Peter Zijlstra
2010-03-04 14:01 ` Peter Zijlstra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100304140100.998642700@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=robert.richter@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.