public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf: Add branch stack support to perf script
@ 2012-03-15 20:08 Arun Sharma
  2012-03-16  1:28 ` Arun Sharma
  0 siblings, 1 reply; 2+ messages in thread
From: Arun Sharma @ 2012-03-15 20:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Arun Sharma, Stephane Eranian, peterz, acme, ravitillo, vweaver1,
	khandual, dsahern, Ingo Molnar, Namhyung Kim

This makes it easier to write tools that consume the
branch stack data.

Sample usage:

perf record -jany_call,u -o perf.data.anycallu -- ./foo
perf script -i perf.data.anycallu -fip,sym

The samples seem to have the sense of call/return
inverted. If the original callgraph was a -> b -> c
I get samples like:

from	to
c	b
b	a

To restore the normal sense, I'm printing them as:

to => from

Signed-off-by: Arun Sharma <asharma@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: acme@redhat.com
Cc: ravitillo@lbl.gov
Cc: vweaver1@eecs.utk.edu
Cc: khandual@linux.vnet.ibm.com
Cc: dsahern@gmail.com
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung.kim@lge.com>
---
 tools/perf/builtin-script.c |   56 ++++++++++++++++++++++++++++++++++++++----
 1 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d4ce733..99ad1d5 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -318,6 +318,11 @@ static bool is_bts_event(struct perf_event_attr *attr)
 		(attr->sample_period == 1));
 }
 
+static bool is_branch_stack_event(struct perf_event_attr *attr)
+{
+	return (attr->sample_type & PERF_SAMPLE_BRANCH_STACK);
+}
+
 static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
 {
 	if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -326,31 +331,32 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
 	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
 		return true;
 
-	if (is_bts_event(attr))
+	if (is_bts_event(attr) || is_branch_stack_event(attr))
 		return true;
 
 	return false;
 }
 
-static void print_sample_addr(union perf_event *event,
+static void __print_sample_addr(union perf_event *event,
 			  struct perf_sample *sample,
 			  struct machine *machine,
 			  struct thread *thread,
-			  struct perf_event_attr *attr)
+			  struct perf_event_attr *attr,
+			  u64 addr)
 {
 	struct addr_location al;
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	printf("%16" PRIx64, sample->addr);
+	printf("%16" PRIx64, addr);
 
 	if (!sample_addr_correlates_sym(attr))
 		return;
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      sample->addr, &al);
+			      addr, &al);
 	if (!al.map)
 		thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
-				      sample->addr, &al);
+				      addr, &al);
 
 	al.cpu = sample->cpu;
 	al.sym = NULL;
@@ -373,6 +379,17 @@ static void print_sample_addr(union perf_event *event,
 	}
 }
 
+static void print_sample_addr(union perf_event *event,
+			  struct perf_sample *sample,
+			  struct machine *machine,
+			  struct thread *thread,
+			  struct perf_event_attr *attr)
+			  
+{
+	return __print_sample_addr(event, sample, machine, thread, attr,
+				   sample->addr);
+}
+
 static void print_sample_bts(union perf_event *event,
 			     struct perf_sample *sample,
 			     struct perf_evsel *evsel,
@@ -401,6 +418,28 @@ static void print_sample_bts(union perf_event *event,
 	printf("\n");
 }
 
+static void print_sample_branch_stack(union perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_evsel *evsel,
+			     struct machine *machine,
+			     struct thread *thread)
+{
+	uint64_t i;
+	struct perf_event_attr *attr = &evsel->attr;
+
+	printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
+
+	for (i = 0; i < sample->branch_stack->nr; i++) {
+		printf("\t..... %2"PRIu64": ", i); 
+		__print_sample_addr(event, sample, machine, thread, attr,
+			            sample->branch_stack->entries[i].to);
+		printf(" => ");
+		__print_sample_addr(event, sample, machine, thread, attr,
+			            sample->branch_stack->entries[i].from);
+		printf("\n");
+	}
+}
+
 static void process_event(union perf_event *event __unused,
 			  struct perf_sample *sample,
 			  struct perf_evsel *evsel,
@@ -417,6 +456,11 @@ static void process_event(union perf_event *event __unused,
 	if (is_bts_event(attr)) {
 		print_sample_bts(event, sample, evsel, machine, thread);
 		return;
+	} 
+
+	if (is_branch_stack_event(attr)) {
+		print_sample_branch_stack(event, sample, evsel, machine, thread);
+		return;
 	}
 
 	if (PRINT_FIELD(TRACE))
-- 
1.7.8.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] perf: Add branch stack support to perf script
  2012-03-15 20:08 [PATCH] perf: Add branch stack support to perf script Arun Sharma
@ 2012-03-16  1:28 ` Arun Sharma
  0 siblings, 0 replies; 2+ messages in thread
From: Arun Sharma @ 2012-03-16  1:28 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: linux-kernel, peterz, acme, ravitillo, vweaver1, khandual,
	dsahern, Ingo Molnar, Namhyung Kim

On 3/15/12 1:08 PM, Arun Sharma wrote:

> The samples seem to have the sense of call/return
> inverted. If the original callgraph was a ->  b ->  c
> I get samples like:
>
> from	to
> c	b
> b	a
>
> To restore the normal sense, I'm printing them as:
>
> to =>  from

I debugged this some more and something seems to be wrong with the the 
way the kernel maps PERF_SAMPLE_BRANCH_ANY_CALL to MSR_LBR_SELECT.

perf record -aj any_call,u -F 1 --   sleep 300 &

msr[0x1c8] = 0x1d

perf record -aj any_ret,u -F 1 --   sleep 300 &

msr[0x1c8] = 0x1ad

Stephane: does this give a clue about what may be wrong? It doesn't 
match the kernel code:

static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
...
[PERF_SAMPLE_BRANCH_ANY_CALL] =
          LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
};

It'd be nice to have *_lbr_sel_map[] in the same order as
enum perf_branch_sample_type. Right now, the call and return entries are 
reversed. Which shouldn't matter in theory, since the initializer has an 
explicit index.

I also got a kernel hang running the two perf record lines above 
simultaneously (without the -F 1).

  -Arun

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-03-16  1:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-15 20:08 [PATCH] perf: Add branch stack support to perf script Arun Sharma
2012-03-16  1:28 ` Arun Sharma

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox