public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK
@ 2010-04-07 12:45 Stephane Eranian
  2010-04-07 13:15 ` Peter Zijlstra
  0 siblings, 1 reply; 4+ messages in thread
From: Stephane Eranian @ 2010-04-07 12:45 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian, eranian

	This patch exposes the branch trace buffer to users for sampling.
	There are measurements where it is very useful to couple the
	instruction address with some path information, e.g, basic
	block profiling.

	On recent Intel processors, the branch stack is implemented using
	the LBR registers. LBR was already used to fixup PEBS. This
	patch still allows PEBS fixups with LBR and also exposes LBR
	to applications.

	There is a new PERF_SAMPLE_BRANCH_STACK sample type. It creates
	a sample in the buffer which has the following layout:

	   { u64 nr;
	      { u64 from, to, flags } lbr[nr]; } && PERF_SAMPLE_BRANCH_STACK
 	   };

	Refer to include/linux/perf_event.h to figure out the layout ordering
	information.

	LBR is configured by default to record ALL taken branches.  On some
	processors, it is possible to filter the type of branches. This will
	be supported in a subsequent patch.

	On other processors, the sample type is allowed but will generate a
	sample where nr=0 as is the case with other sampling types.

	Signed-off-by: Stephane Eranian <eranian@google.com>

--
 arch/x86/kernel/cpu/perf_event_intel.c    |   13 +++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_ds.c |    5 +++++
 include/linux/perf_event.h                |    8 +++++++-
 kernel/perf_event.c                       |   25 +++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f168b40..6b8aa7d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -562,8 +562,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
 	x86_pmu_disable_event(event);
 
+	/*
+	 * PEBS implies LBR
+	 */
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_disable(event);
+	else if (unlikely(event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+		intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -614,8 +619,13 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		return;
 	}
 
+	/*
+	 * PEBS implies LBR
+	 */
 	if (unlikely(event->attr.precise))
 		intel_pmu_pebs_enable(event);
+	else if (unlikely(event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+		intel_pmu_lbr_enable(event);
 
 	__x86_pmu_enable_event(hwc);
 }
@@ -710,6 +720,9 @@ again:
 
 		data.period = event->hw.last_period;
 
+		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+			data.br_stack = &cpuc->lbr_stack;
+
 		if (perf_event_overflow(event, 1, &data, regs))
 			x86_pmu_stop(event);
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 2fea362..ed62d35 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -505,6 +505,8 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 		data.raw = &raw;
 	}
 
+	if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+		data.br_stack = &cpuc->lbr_stack;
 	/*
 	 * We use the interrupt regs as a base because the PEBS record
 	 * does not contain a full regs set, specifically it seems to
@@ -591,6 +593,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			data.raw = &raw;
 		}
 
+		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
+			data.br_stack = &cpuc->lbr_stack;
+
 		/*
 		 * See the comment in intel_pmu_drain_pebs_core()
 		 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6e96cc8..3a8288f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -125,8 +125,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
 };
 
 /*
@@ -415,6 +416,9 @@ enum perf_event_type {
 	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 *
+	 *	{ u64 nr;
+	 *	  { u64 from, to, flags } lbr[nr]; } && PERF_SAMPLE_BRANCH_STACK
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -819,6 +823,7 @@ struct perf_sample_data {
 	u64				period;
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
+	struct perf_branch_stack	*br_stack;
 };
 
 static inline
@@ -826,6 +831,7 @@ void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
 	data->addr = addr;
 	data->raw  = NULL;
+	data->br_stack = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 63fbce1..8143e77 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3211,6 +3211,22 @@ void perf_output_sample(struct perf_output_handle *handle,
 			perf_output_put(handle, raw);
 		}
 	}
+
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		if (data->br_stack) {
+			size_t size;
+
+			size = data->br_stack->nr
+			     * sizeof(struct perf_branch_entry);
+
+			perf_output_put(handle, data->br_stack->nr);
+			perf_output_copy(handle, data->br_stack->entries, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -3298,6 +3314,15 @@ void perf_prepare_sample(struct perf_event_header *header,
 		WARN_ON_ONCE(size & (sizeof(u64)-1));
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		int size = sizeof(u64);
+		if (data->br_stack) {
+			size += data->br_stack->nr
+			      * sizeof(struct perf_branch_entry);
+		}
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event, int nmi,

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK
  2010-04-07 12:45 [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK Stephane Eranian
@ 2010-04-07 13:15 ` Peter Zijlstra
  2010-04-07 16:48   ` Stephane Eranian
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Zijlstra @ 2010-04-07 13:15 UTC (permalink / raw)
  To: eranian
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, 2010-04-07 at 14:45 +0200, Stephane Eranian wrote:
>         LBR is configured by default to record ALL taken branches.  On some
>         processors, it is possible to filter the type of branches. This will
>         be supported in a subsequent patch.
> 
>         On other processors, the sample type is allowed but will generate a
>         sample where nr=0 as is the case with other sampling types.

Right, so I already posted a patch like that:
  http://lkml.org/lkml/2010/3/4/160

and the reason its not merged is because there is no perf use-case for
it. Ingo wants to avoid merging ABI bits for which there is no userspace
around. We already have a few such things and we find that its too easy
to regress on those part.

So if you want this, please also implement a useful use-case in perf.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK
  2010-04-07 13:15 ` Peter Zijlstra
@ 2010-04-07 16:48   ` Stephane Eranian
  2010-04-07 16:54     ` Peter Zijlstra
  0 siblings, 1 reply; 4+ messages in thread
From: Stephane Eranian @ 2010-04-07 16:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, Apr 7, 2010 at 3:15 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, 2010-04-07 at 14:45 +0200, Stephane Eranian wrote:
>>         LBR is configured by default to record ALL taken branches.  On some
>>         processors, it is possible to filter the type of branches. This will
>>         be supported in a subsequent patch.
>>
>>         On other processors, the sample type is allowed but will generate a
>>         sample where nr=0 as is the case with other sampling types.
>
> Right, so I already posted a patch like that:
>  http://lkml.org/lkml/2010/3/4/160
>
> and the reason its not merged is because there is no perf use-case for
> it. Ingo wants to avoid merging ABI bits for which there is no userspace
> around. We already have a few such things and we find that its too easy
> to regress on those part.
>
Then, why didn't you extend perf to leverage your patch?

I think that forcing all features to be included in perf in not a very
attractive approach. It can't be the only approach. There are many usage
models of PMU data. You want to encourage the development of as
many tools and libraries as possible. It helps with validation too. There are
bugs in your implementation which are not exposed simply because perf
does not need the features. But that does not mean those features are
not useful.

To encourage developers, you need to build simple examples of how
each feature can be used. You don't necessarily need a fully featured
tool. This is what I am doing with libpfm4. People can learn from the
examples and built their own custom tools and libraries.

If I post a patch to enable LBR sampling, it is because I have a user level
test program to validate it and demonstrate what you can get out.

LBR data typically has lots of post-processing. It is best suited for offline
processing. You could use perf to collect the data and dump a binary
output file. I can take a look at that.

I also assume that the same reason is holding up my randomization
patch. yet I think this is an important feature.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK
  2010-04-07 16:48   ` Stephane Eranian
@ 2010-04-07 16:54     ` Peter Zijlstra
  0 siblings, 0 replies; 4+ messages in thread
From: Peter Zijlstra @ 2010-04-07 16:54 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, mingo, paulus, davem, fweisbec, robert.richter,
	perfmon2-devel, eranian

On Wed, 2010-04-07 at 18:48 +0200, Stephane Eranian wrote:
> Then, why didn't you extend perf to leverage your patch?
> 
Because I couldn't come up with a sensible use case.


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-04-07 16:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-07 12:45 [PATCH] perf_events: add PERF_SAMPLE_BRANCH_STACK Stephane Eranian
2010-04-07 13:15 ` Peter Zijlstra
2010-04-07 16:48   ` Stephane Eranian
2010-04-07 16:54     ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox