All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@elte.hu, linux-kernel@vger.kernel.org
Cc: paulus@samba.org, eranian@google.com, robert.richter@amd.com,
	fweisbec@gmail.com, Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 09/11] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK
Date: Wed, 03 Mar 2010 17:39:45 +0100	[thread overview]
Message-ID: <20100303164306.526626387@chello.nl> (raw)
In-Reply-To: 20100303163936.906011640@chello.nl

[-- Attachment #1: perf-sample-lbr.patch --]
[-- Type: text/plain, Size: 9664 bytes --]


Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/kernel/cpu/perf_event.c           |   14 +++-------
 arch/x86/kernel/cpu/perf_event_intel.c     |   10 ++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c  |   16 ++++--------
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |   20 ++++++++-------
 include/linux/perf_event.h                 |   27 +++++++++++++++++---
 kernel/perf_event.c                        |   38 ++++++++++++++++++++++-------
 6 files changed, 83 insertions(+), 42 deletions(-)

Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -126,8 +126,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
 	PERF_SAMPLE_REGS			= 1U << 11,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 12,
 
-	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 13,		/* non-ABI */
 };
 
 /*
@@ -395,9 +396,14 @@ enum perf_event_type {
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 * 	{ struct pt_regs	regs;	  } && PERF_SAMPLE_REGS
 	 *
-	 *	{ u64			nr,
+	 *	{ u64			nr;
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
+	 * 	{ u64			nr;
+	 * 	  { u64 from, to, flags;
+	 * 	  }			lbr[nr];  } && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 *
 	 *	#
 	 *	# The RAW record below is opaque data wrt the ABI
 	 *	#
@@ -469,6 +475,17 @@ struct perf_raw_record {
 	void				*data;
 };
 
+struct perf_branch_entry {
+	__u64				from;
+	__u64				to;
+	__u64				flags;
+};
+
+struct perf_branch_stack {
+	__u64				nr;
+	struct perf_branch_entry	entries[0];
+};
+
 struct task_struct;
 
 /**
@@ -803,13 +820,15 @@ struct perf_sample_data {
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct pt_regs			*regs;
+	struct perf_branch_stack	*branches;
 };
 
 static inline
 void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
-	data->addr = addr;
-	data->raw  = NULL;
+	data->addr     = addr;
+	data->raw      = NULL;
+	data->branches = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -3189,12 +3189,9 @@ void perf_output_sample(struct perf_outp
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (data->callchain) {
-			int size = 1;
+			int size = sizeof(u64);
 
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
+			size += data->callchain->nr * sizeof(u64);
 
 			perf_output_copy(handle, data->callchain, size);
 		} else {
@@ -3203,6 +3200,20 @@ void perf_output_sample(struct perf_outp
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		if (data->branches) {
+			int size = sizeof(u64);
+
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+
+			perf_output_copy(handle, data->branches, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
 	if (sample_type & PERF_SAMPLE_RAW) {
 		if (data->raw) {
 			perf_output_put(handle, data->raw->size);
@@ -3291,14 +3302,25 @@ void perf_prepare_sample(struct perf_eve
 	}
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
+		int size = sizeof(u64);
 
 		data->callchain = perf_callchain(regs);
 
 		if (data->callchain)
-			size += data->callchain->nr;
+			size += data->callchain->nr * sizeof(u64);
+
+		header->size += size;
+	}
 
-		header->size += size * sizeof(u64);
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		int size = sizeof(u64);
+
+		if (data->branches) {
+			size += data->branches->nr *
+				sizeof(struct perf_branch_entry);
+		}
+
+		header->size += size;
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -50,10 +50,6 @@ struct amd_nb {
 
 #define MAX_LBR_ENTRIES		16
 
-struct lbr_entry {
-	u64	from, to, flags;
-};
-
 struct cpu_hw_events {
 	/*
 	 * Generic x86 PMC bits
@@ -78,10 +74,10 @@ struct cpu_hw_events {
 	/*
 	 * Intel LBR bits
 	 */
-	int			lbr_users;
-	int			lbr_entries;
-	struct lbr_entry	lbr_stack[MAX_LBR_ENTRIES];
-	void			*lbr_context;
+	int				lbr_users;
+	void				*lbr_context;
+	struct perf_branch_stack	lbr_stack;
+	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 
 	/*
 	 * AMD specific bits
@@ -166,7 +162,7 @@ struct x86_pmu {
 	 */
 	int		bts, pebs;
 	int		pebs_record_size;
-	void		(*drain_pebs)(void);
+	void		(*drain_pebs)(struct perf_sample_data *data);
 	struct event_constraint *pebs_constraints;
 
 	/*
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -138,11 +138,11 @@ static void intel_pmu_lbr_read_32(struct
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
 
-		cpuc->lbr_stack[i].from  = msr_lastbranch.from;
-		cpuc->lbr_stack[i].to    = msr_lastbranch.to;
-		cpuc->lbr_stack[i].flags = 0;
+		cpuc->lbr_entries[i].from  = msr_lastbranch.from;
+		cpuc->lbr_entries[i].to    = msr_lastbranch.to;
+		cpuc->lbr_entries[i].flags = 0;
 	}
-	cpuc->lbr_entries = i;
+	cpuc->lbr_stack.nr = i;
 }
 
 #define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
@@ -170,14 +170,14 @@ static void intel_pmu_lbr_read_64(struct
 			from = (u64)((((s64)from) << 1) >> 1);
 		}
 
-		cpuc->lbr_stack[i].from  = from;
-		cpuc->lbr_stack[i].to    = to;
-		cpuc->lbr_stack[i].flags = flags;
+		cpuc->lbr_entries[i].from  = from;
+		cpuc->lbr_entries[i].to    = to;
+		cpuc->lbr_entries[i].flags = flags;
 	}
-	cpuc->lbr_entries = i;
+	cpuc->lbr_stack.nr = i;
 }
 
-static void intel_pmu_lbr_read(void)
+static void intel_pmu_lbr_read(struct perf_sample_data *data)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -188,6 +188,8 @@ static void intel_pmu_lbr_read(void)
 		intel_pmu_lbr_read_32(cpuc);
 	else
 		intel_pmu_lbr_read_64(cpuc);
+
+	data->branches = &cpuc->lbr_stack;
 }
 
 static int intel_pmu_lbr_format(void)
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
@@ -548,6 +548,9 @@ static void intel_pmu_disable_event(stru
 
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_disable(hwc);
+
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -602,6 +605,9 @@ static void intel_pmu_enable_event(struc
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_enable(hwc);
 
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		intel_pmu_lbr_enable(event);
+
 	__x86_pmu_enable_event(hwc);
 }
 
@@ -677,13 +683,13 @@ again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 
-	intel_pmu_lbr_read();
+	intel_pmu_lbr_read(&data);
 
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */
 	if (__test_and_clear_bit(62, (unsigned long *)&status))
-		x86_pmu.drain_pebs();
+		x86_pmu.drain_pebs(&data);
 
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -418,13 +418,12 @@ do {						\
 static int intel_pmu_save_and_restart(struct perf_event *event);
 static void intel_pmu_disable_event(struct perf_event *event);
 
-static void intel_pmu_drain_pebs_core(void)
+static void intel_pmu_drain_pebs_core(struct perf_sample_data *data)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
 	struct pebs_record_core *at, *top;
-	struct perf_sample_data data;
 	struct pt_regs regs;
 	int n;
 
@@ -444,8 +443,7 @@ static void intel_pmu_drain_pebs_core(vo
 	if (!intel_pmu_save_and_restart(event))
 		goto out;
 
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
+	data->period = event->hw.last_period;
 
 	n = top - at;
 
@@ -460,19 +458,18 @@ static void intel_pmu_drain_pebs_core(vo
 
 	PEBS_TO_REGS(at, &regs);
 
-	if (perf_event_overflow(event, 1, &data, &regs))
+	if (perf_event_overflow(event, 1, data, &regs))
 		intel_pmu_disable_event(event);
 
 out:
 	intel_pmu_pebs_enable_all();
 }
 
-static void intel_pmu_drain_pebs_nhm(void)
+static void intel_pmu_drain_pebs_nhm(struct perf_sample_data *data)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct pebs_record_nhm *at, *top;
-	struct perf_sample_data data;
 	struct perf_event *event = NULL;
 	struct pt_regs regs;
 	int bit, n;
@@ -519,12 +516,11 @@ static void intel_pmu_drain_pebs_nhm(voi
 		if (!intel_pmu_save_and_restart(event))
 			continue;
 
-		perf_sample_data_init(&data, 0);
-		data.period = event->hw.last_period;
+		data->period = event->hw.last_period;
 
 		PEBS_TO_REGS(at, &regs);
 
-		if (perf_event_overflow(event, 1, &data, &regs))
+		if (perf_event_overflow(event, 1, data, &regs))
 			intel_pmu_disable_event(event);
 	}
 out:

-- 


  parent reply	other threads:[~2010-03-03 16:47 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-03 16:39 [RFC][PATCH 00/11] Another stab at PEBS and LBR support Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 01/11] perf, x86: Remove superfluous arguments to x86_perf_event_set_period() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 02/11] perf, x86: Remove superfluous arguments to x86_perf_event_update() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 03/11] perf, x86: Change x86_pmu.{enable,disable} calling convention Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 04/11] perf, x86: Use unlocked bitops Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 05/11] perf: Generic perf_sample_data initialization Peter Zijlstra
2010-03-03 16:49   ` David Miller
2010-03-03 21:14   ` Frederic Weisbecker
2010-03-05  8:44   ` Jean Pihet
2010-03-03 16:39 ` [RFC][PATCH 06/11] perf, x86: PEBS infrastructure Peter Zijlstra
2010-03-03 17:38   ` Robert Richter
2010-03-03 17:42     ` Peter Zijlstra
2010-03-04  8:50       ` Robert Richter
2010-03-03 16:39 ` [RFC][PATCH 07/11] perf: Provide PERF_SAMPLE_REGS Peter Zijlstra
2010-03-03 17:30   ` Stephane Eranian
2010-03-03 17:39     ` Peter Zijlstra
2010-03-03 17:49       ` Stephane Eranian
2010-03-03 17:55         ` David Miller
2010-03-03 18:18           ` Stephane Eranian
2010-03-03 19:18           ` Peter Zijlstra
2010-03-04  2:59           ` Ingo Molnar
2010-03-04 12:58             ` Arnaldo Carvalho de Melo
2010-03-03 22:02   ` Frederic Weisbecker
2010-03-04  8:58     ` Peter Zijlstra
2010-03-04 11:04       ` Ingo Molnar
2010-03-03 16:39 ` [RFC][PATCH 08/11] perf, x86: Implement simple LBR support Peter Zijlstra
2010-03-03 21:52   ` Stephane Eranian
2010-03-04  8:58     ` Peter Zijlstra
2010-03-03 21:57   ` Stephane Eranian
2010-03-04  8:58     ` Peter Zijlstra
2010-03-04 17:54       ` Stephane Eranian
2010-03-04 18:18         ` Peter Zijlstra
2010-03-04 20:23           ` Peter Zijlstra
2010-03-04 20:57             ` Stephane Eranian
2010-03-03 16:39 ` Peter Zijlstra [this message]
2010-03-03 21:08   ` [RFC][PATCH 09/11] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK Frederic Weisbecker
2010-03-03 16:39 ` [RFC][PATCH 10/11] perf, x86: use LBR for PEBS IP+1 fixup Peter Zijlstra
2010-03-03 18:05   ` Masami Hiramatsu
2010-03-03 19:37     ` Peter Zijlstra
2010-03-03 21:11       ` Masami Hiramatsu
2010-03-03 21:50         ` Stephane Eranian
2010-03-04  8:57           ` Peter Zijlstra
2010-03-09  1:41             ` Stephane Eranian
2010-03-03 16:39 ` [RFC][PATCH 11/11] perf, x86: Clean up IA32_PERF_CAPABILITIES usage Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100303164306.526626387@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=robert.richter@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.