* [PATCH] perf: Add branch stack support to perf script
@ 2012-03-15 20:08 Arun Sharma
2012-03-16 1:28 ` Arun Sharma
0 siblings, 1 reply; 2+ messages in thread
From: Arun Sharma @ 2012-03-15 20:08 UTC (permalink / raw)
To: linux-kernel
Cc: Arun Sharma, Stephane Eranian, peterz, acme, ravitillo, vweaver1,
khandual, dsahern, Ingo Molnar, Namhyung Kim
This makes it easier to write tools that consume the
branch stack data.
Sample usage:
perf record -jany_call,u -o perf.data.anycallu -- ./foo
perf script -i perf.data.anycallu -fip,sym
The samples seem to have the sense of call/return
inverted. If the original callgraph was a -> b -> c
I get samples like:
from to
c b
b a
To restore the normal sense, I'm printing them as:
to => from
Signed-off-by: Arun Sharma <asharma@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: acme@redhat.com
Cc: ravitillo@lbl.gov
Cc: vweaver1@eecs.utk.edu
Cc: khandual@linux.vnet.ibm.com
Cc: dsahern@gmail.com
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung.kim@lge.com>
---
tools/perf/builtin-script.c | 56 ++++++++++++++++++++++++++++++++++++++----
1 files changed, 50 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d4ce733..99ad1d5 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -318,6 +318,11 @@ static bool is_bts_event(struct perf_event_attr *attr)
(attr->sample_period == 1));
}
+static bool is_branch_stack_event(struct perf_event_attr *attr)
+{
+ return (attr->sample_type & PERF_SAMPLE_BRANCH_STACK);
+}
+
static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
{
if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -326,31 +331,32 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
(attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
return true;
- if (is_bts_event(attr))
+ if (is_bts_event(attr) || is_branch_stack_event(attr))
return true;
return false;
}
-static void print_sample_addr(union perf_event *event,
+static void __print_sample_addr(union perf_event *event,
struct perf_sample *sample,
struct machine *machine,
struct thread *thread,
- struct perf_event_attr *attr)
+ struct perf_event_attr *attr,
+ u64 addr)
{
struct addr_location al;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- printf("%16" PRIx64, sample->addr);
+ printf("%16" PRIx64, addr);
if (!sample_addr_correlates_sym(attr))
return;
thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
- sample->addr, &al);
+ addr, &al);
if (!al.map)
thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
- sample->addr, &al);
+ addr, &al);
al.cpu = sample->cpu;
al.sym = NULL;
@@ -373,6 +379,17 @@ static void print_sample_addr(union perf_event *event,
}
}
+static void print_sample_addr(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread,
+ struct perf_event_attr *attr)
+
+{
+ return __print_sample_addr(event, sample, machine, thread, attr,
+ sample->addr);
+}
+
static void print_sample_bts(union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
@@ -401,6 +418,28 @@ static void print_sample_bts(union perf_event *event,
printf("\n");
}
+static void print_sample_branch_stack(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct thread *thread)
+{
+ uint64_t i;
+ struct perf_event_attr *attr = &evsel->attr;
+
+ printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
+
+ for (i = 0; i < sample->branch_stack->nr; i++) {
+ printf("\t..... %2"PRIu64": ", i);
+ __print_sample_addr(event, sample, machine, thread, attr,
+ sample->branch_stack->entries[i].to);
+ printf(" => ");
+ __print_sample_addr(event, sample, machine, thread, attr,
+ sample->branch_stack->entries[i].from);
+ printf("\n");
+ }
+}
+
static void process_event(union perf_event *event __unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
@@ -417,6 +456,11 @@ static void process_event(union perf_event *event __unused,
if (is_bts_event(attr)) {
print_sample_bts(event, sample, evsel, machine, thread);
return;
+ }
+
+ if (is_branch_stack_event(attr)) {
+ print_sample_branch_stack(event, sample, evsel, machine, thread);
+ return;
}
if (PRINT_FIELD(TRACE))
--
1.7.8.4
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH] perf: Add branch stack support to perf script
2012-03-15 20:08 [PATCH] perf: Add branch stack support to perf script Arun Sharma
@ 2012-03-16 1:28 ` Arun Sharma
0 siblings, 0 replies; 2+ messages in thread
From: Arun Sharma @ 2012-03-16 1:28 UTC (permalink / raw)
To: Stephane Eranian
Cc: linux-kernel, peterz, acme, ravitillo, vweaver1, khandual,
dsahern, Ingo Molnar, Namhyung Kim
On 3/15/12 1:08 PM, Arun Sharma wrote:
> The samples seem to have the sense of call/return
> inverted. If the original callgraph was a -> b -> c
> I get samples like:
>
> from to
> c b
> b a
>
> To restore the normal sense, I'm printing them as:
>
> to => from
I debugged this some more and something seems to be wrong with the the
way the kernel maps PERF_SAMPLE_BRANCH_ANY_CALL to MSR_LBR_SELECT.
perf record -aj any_call,u -F 1 -- sleep 300 &
msr[0x1c8] = 0x1d
perf record -aj any_ret,u -F 1 -- sleep 300 &
msr[0x1c8] = 0x1ad
Stephane: does this give a clue about what may be wrong? It doesn't
match the kernel code:
static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
...
[PERF_SAMPLE_BRANCH_ANY_CALL] =
LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
};
It'd be nice to have *_lbr_sel_map[] in the same order as
enum perf_branch_sample_type. Right now, the call and return entries are
reversed. Which shouldn't matter in theory, since the initializer has an
explicit index.
I also got a kernel hang running the two perf record lines above
simultaneously (without the -F 1).
-Arun
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-03-16 1:28 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-15 20:08 [PATCH] perf: Add branch stack support to perf script Arun Sharma
2012-03-16 1:28 ` Arun Sharma
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox