* [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso
@ 2017-06-19 16:38 Mark Santaniello
2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello
2017-06-20 9:04 ` [tip:perf/core] perf script: Support -F brstack,dso and brstacksym,dso tip-bot for Mark Santaniello
0 siblings, 2 replies; 6+ messages in thread
From: Mark Santaniello @ 2017-06-19 16:38 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Alexander Shishkin
Cc: linux-kernel, Mark Santaniello
Perf script can report the dso for "addr" and "ip" fields.
This adds the same support for the "brstack" and "brstacksym" fields.
This can be helpful for AutoFDO: we can ignore LBR entries unless the
source and target address are both in the target module we are about to
build.
I built a small test akin to "while(1) { do_nothing(); }" where the
do_nothing function is loaded from a dso:
~/src/burncpu$ cat burncpu.cpp
#include <dlfcn.h>
int main() {
void* handle = dlopen("./dso.so", RTLD_LAZY);
if (!handle) return -1;
typedef void (*fp)();
fp do_nothing = (fp) dlsym(handle, "do_nothing");
while(1) {
do_nothing();
}
}
~/src/burncpu$ cat dso.cpp
extern "C" void do_nothing() {}
~/src/burncpu$ cat build.sh
#!/bin/bash
g++ -shared dso.cpp -o dso.so
g++ burncpu.cpp -o burncpu -ldl
I sampled the execution with perf record -b. Using the new perf script
functionality I can easily find cases where there was a transition from one
dso to another:
$ ./perf record -a -b -- sleep 5
[ perf record: Woken up 55 times to write data ]
[ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ]
$ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0
$ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \
> | grep burncpu | grep dso.so | head -n 1
do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0
Signed-off-by: Mark Santaniello <marksan@fb.com>
---
tools/perf/builtin-script.c | 61 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 49 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4761b0d..6a7033b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
"selected.\n");
return -EINVAL;
}
- if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
- pr_err("Display of DSO requested but neither sample IP nor "
- "sample address\nis selected. Hence, no addresses to convert "
- "to DSO.\n");
+ if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+ !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
+ pr_err("Display of DSO requested but none of sample IP, sample address, "
+ "brstack\nor brstacksym are selected. Hence, no addresses to "
+ "convert to DSO.\n");
return -EINVAL;
}
if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br)
return br->flags.predicted ? 'P' : 'M';
}
-static void print_sample_brstack(struct perf_sample *sample)
+static void print_sample_brstack(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr)
{
struct branch_stack *br = sample->branch_stack;
- u64 i;
+ struct addr_location alf, alt;
+ u64 i, from, to;
if (!(br && br->nr))
return;
for (i = 0; i < br->nr; i++) {
- printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
- br->entries[i].from,
- br->entries[i].to,
+ from = br->entries[i].from;
+ to = br->entries[i].to;
+
+ if (PRINT_FIELD(DSO)) {
+ memset(&alf, 0, sizeof(alf));
+ memset(&alt, 0, sizeof(alt));
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+ }
+
+ printf("0x%"PRIx64, from);
+ if (PRINT_FIELD(DSO)) {
+ printf("(");
+ map__fprintf_dsoname(alf.map, stdout);
+ printf(")");
+ }
+
+ printf("/0x%"PRIx64, to);
+ if (PRINT_FIELD(DSO)) {
+ printf("(");
+ map__fprintf_dsoname(alt.map, stdout);
+ printf(")");
+ }
+
+ printf("/%c/%c/%c/%d ",
mispred_str( br->entries + i),
br->entries[i].flags.in_tx? 'X' : '-',
br->entries[i].flags.abort? 'A' : '-',
@@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample)
}
static void print_sample_brstacksym(struct perf_sample *sample,
- struct thread *thread)
+ struct thread *thread,
+ struct perf_event_attr *attr)
{
struct branch_stack *br = sample->branch_stack;
struct addr_location alf, alt;
@@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample,
alt.sym = map__find_symbol(alt.map, alt.addr);
symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+ if (PRINT_FIELD(DSO)) {
+ printf("(");
+ map__fprintf_dsoname(alf.map, stdout);
+ printf(")");
+ }
putchar('/');
symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+ if (PRINT_FIELD(DSO)) {
+ printf("(");
+ map__fprintf_dsoname(alt.map, stdout);
+ printf(")");
+ }
printf("/%c/%c/%c/%d ",
mispred_str( br->entries + i),
br->entries[i].flags.in_tx? 'X' : '-',
@@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script,
print_sample_iregs(sample, attr);
if (PRINT_FIELD(BRSTACK))
- print_sample_brstack(sample);
+ print_sample_brstack(sample, thread, attr);
else if (PRINT_FIELD(BRSTACKSYM))
- print_sample_brstacksym(sample, thread);
+ print_sample_brstacksym(sample, thread, attr);
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample);
--
2.9.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/2] Support perf script -F brstackoff,dso 2017-06-19 16:38 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello @ 2017-06-19 16:38 ` Mark Santaniello 2017-06-19 18:12 ` Arnaldo Carvalho de Melo 2017-06-20 9:04 ` [tip:perf/core] perf script: Support " tip-bot for Mark Santaniello 2017-06-20 9:04 ` [tip:perf/core] perf script: Support -F brstack,dso and brstacksym,dso tip-bot for Mark Santaniello 1 sibling, 2 replies; 6+ messages in thread From: Mark Santaniello @ 2017-06-19 16:38 UTC (permalink / raw) To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Alexander Shishkin Cc: linux-kernel, Mark Santaniello The idea here is to make AutoFDO easier in cloud environment with ASLR. It's easiest to show how this is useful by example. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: ~/src/burncpu$ cat burncpu.cpp #include <dlfcn.h> int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } ~/src/burncpu$ cat dso.cpp extern "C" void do_nothing() {} ~/src/burncpu$ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution of this program with perf record -b. Using the existing "brstack,dso", we get absolute addresses that are affected by ASLR, and could be different on different hosts. The address does not uniquely identify a branch/target in the binary: $ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \ > | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Using the existing "brstacksym,dso" is a little better, because the symbol plus offset and dso name *does* uniquely identify a branch/target in the binary. Ultimately, however, AutoFDO wants a simple offset into the binary, so we'd have to undo all the work perf did to symbolize in the first place: $ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \ > | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 With the new "brstackoff,dso" we get what we need: a simple offset into a specific dso/binary that uniquely identifies a branch/target: $ ./perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' \ > | grep burncpu | grep dso.so | head -n 1 0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello <marksan@fb.com> --- tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6a7033b..1effc64 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -85,6 +85,7 @@ enum perf_output_field { PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, PERF_OUTPUT_BRSTACKINSN = 1U << 23, + PERF_OUTPUT_BRSTACKOFF = 1U << 24, }; struct output_option { @@ -115,6 +116,7 @@ struct output_option { {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, + {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, }; /* default set to maintain compatibility with current format */ @@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; } if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && - !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { - pr_err("Display of DSO requested but none of sample IP, sample address, " - "brstack\nor brstacksym are selected. Hence, no addresses to " - "convert to DSO.\n"); + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) { + pr_err("Display of DSO requested but no address to convert. Select\n" + "sample IP, sample address, brstack, brstacksym, or brstackoff.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +static void print_sample_brstackoff(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + if (alf.map && !alf.map->dso->adjust_symbols) + from = map__map_ip(alf.map, from); + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + if (alt.map && !alt.map->dso->adjust_symbols) + to = map__map_ip(alt.map, to); + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + printf("/%c/%c/%c/%d ", + mispred_str(br->entries + i), + br->entries[i].flags.in_tx ? 'X' : '-', + br->entries[i].flags.abort ? 'A' : '-', + br->entries[i].flags.cycles); + } +} #define MAXBB 16384UL static int grab_bb(u8 *buffer, u64 start, u64 end, @@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script, print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) print_sample_brstacksym(sample, thread, attr); + else if (PRINT_FIELD(BRSTACKOFF)) + print_sample_brstackoff(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); -- 2.9.3 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] Support perf script -F brstackoff,dso 2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello @ 2017-06-19 18:12 ` Arnaldo Carvalho de Melo 2017-06-20 15:10 ` Mark Santaniello 2017-06-20 9:04 ` [tip:perf/core] perf script: Support " tip-bot for Mark Santaniello 1 sibling, 1 reply; 6+ messages in thread From: Arnaldo Carvalho de Melo @ 2017-06-19 18:12 UTC (permalink / raw) To: Mark Santaniello Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, linux-kernel Em Mon, Jun 19, 2017 at 09:38:25AM -0700, Mark Santaniello escreveu: > With the new "brstackoff,dso" we get what we need: a simple offset into a > specific dso/binary that uniquely identifies a branch/target: You forgot to update tools/perf/Documentation/perf-script.txt about 'brstackoff', I added a line using the above explanation, please check later and improve/expand it. Thanks, - Arnaldo ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] Support perf script -F brstackoff,dso 2017-06-19 18:12 ` Arnaldo Carvalho de Melo @ 2017-06-20 15:10 ` Mark Santaniello 0 siblings, 0 replies; 6+ messages in thread From: Mark Santaniello @ 2017-06-20 15:10 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, linux-kernel@vger.kernel.org On 6/19/17, 11:12 AM, "Arnaldo Carvalho de Melo" <acme@kernel.org> wrote: Em Mon, Jun 19, 2017 at 09:38:25AM -0700, Mark Santaniello escreveu: > With the new "brstackoff,dso" we get what we need: a simple offset into a > specific dso/binary that uniquely identifies a branch/target: You forgot to update tools/perf/Documentation/perf-script.txt about 'brstackoff', I added a line using the above explanation, please check later and improve/expand it. Thanks, - Arnaldo Arnaldo, Thanks very much. Will do. -Mark ^ permalink raw reply [flat|nested] 6+ messages in thread
* [tip:perf/core] perf script: Support -F brstackoff,dso 2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello 2017-06-19 18:12 ` Arnaldo Carvalho de Melo @ 2017-06-20 9:04 ` tip-bot for Mark Santaniello 1 sibling, 0 replies; 6+ messages in thread From: tip-bot for Mark Santaniello @ 2017-06-20 9:04 UTC (permalink / raw) To: linux-tip-commits Cc: alexander.shishkin, linux-kernel, tglx, hpa, mingo, marksan, acme, peterz Commit-ID: 106dacd86f042968e0bb974490fcb9cd017cd03a Gitweb: http://git.kernel.org/tip/106dacd86f042968e0bb974490fcb9cd017cd03a Author: Mark Santaniello <marksan@fb.com> AuthorDate: Mon, 19 Jun 2017 09:38:25 -0700 Committer: Arnaldo Carvalho de Melo <acme@redhat.com> CommitDate: Mon, 19 Jun 2017 22:05:46 -0300 perf script: Support -F brstackoff,dso The idea here is to make AutoFDO easier in cloud environment with ASLR. It's easiest to show how this is useful by example. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: $ cat burncpu.cpp #include <dlfcn.h> int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } $ cat dso.cpp extern "C" void do_nothing() {} $ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution of this program with perf record -b. Using the existing "brstack,dso", we get absolute addresses that are affected by ASLR, and could be different on different hosts. The address does not uniquely identify a branch/target in the binary: $ perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Using the existing "brstacksym,dso" is a little better, because the symbol plus offset and dso name *does* uniquely identify a branch/target in the binary. Ultimately, however, AutoFDO wants a simple offset into the binary, so we'd have to undo all the work perf did to symbolize in the first place: $ perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 With the new "brstackoff,dso" we get what we need: a simple offset into a specific dso/binary that uniquely identifies a branch/target: $ perf script -F brstackoff,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello <marksan@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20170619163825.2012979-2-marksan@fb.com [ Updated documentation about 'brstackoff' using text from above ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-script.txt | 4 ++- tools/perf/builtin-script.c | 56 +++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 3eca8c0..e2468ed 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -211,6 +211,8 @@ OPTIONS is printed. This is the full execution path leading to the sample. This is only supported when the sample was recorded with perf record -b or -j any. + The brstackoff field will print an offset into a specific dso/binary. + -k:: --vmlinux=<file>:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3c21089..db5261c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -85,6 +85,7 @@ enum perf_output_field { PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, PERF_OUTPUT_BRSTACKINSN = 1U << 23, + PERF_OUTPUT_BRSTACKOFF = 1U << 24, }; struct output_option { @@ -115,6 +116,7 @@ struct output_option { {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, + {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, }; /* default set to maintain compatibility with current format */ @@ -299,10 +301,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; } if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && - !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { - pr_err("Display of DSO requested but none of sample IP, sample address, " - "brstack\nor brstacksym are selected. Hence, no addresses to " - "convert to DSO.\n"); + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) { + pr_err("Display of DSO requested but no address to convert. Select\n" + "sample IP, sample address, brstack, brstacksym, or brstackoff.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -606,6 +607,51 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +static void print_sample_brstackoff(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + if (alf.map && !alf.map->dso->adjust_symbols) + from = map__map_ip(alf.map, from); + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + if (alt.map && !alt.map->dso->adjust_symbols) + to = map__map_ip(alt.map, to); + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + printf("/%c/%c/%c/%d ", + mispred_str(br->entries + i), + br->entries[i].flags.in_tx ? 'X' : '-', + br->entries[i].flags.abort ? 'A' : '-', + br->entries[i].flags.cycles); + } +} #define MAXBB 16384UL static int grab_bb(u8 *buffer, u64 start, u64 end, @@ -1227,6 +1273,8 @@ static void process_event(struct perf_script *script, print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) print_sample_brstacksym(sample, thread, attr); + else if (PRINT_FIELD(BRSTACKOFF)) + print_sample_brstackoff(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [tip:perf/core] perf script: Support -F brstack,dso and brstacksym,dso 2017-06-19 16:38 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello 2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello @ 2017-06-20 9:04 ` tip-bot for Mark Santaniello 1 sibling, 0 replies; 6+ messages in thread From: tip-bot for Mark Santaniello @ 2017-06-20 9:04 UTC (permalink / raw) To: linux-tip-commits Cc: peterz, mingo, tglx, acme, linux-kernel, alexander.shishkin, marksan, hpa Commit-ID: 55b9b50811ca459e4688543b688b7b2b85ec5ea8 Gitweb: http://git.kernel.org/tip/55b9b50811ca459e4688543b688b7b2b85ec5ea8 Author: Mark Santaniello <marksan@fb.com> AuthorDate: Mon, 19 Jun 2017 09:38:24 -0700 Committer: Arnaldo Carvalho de Melo <acme@redhat.com> CommitDate: Mon, 19 Jun 2017 22:05:40 -0300 perf script: Support -F brstack,dso and brstacksym,dso Perf script can report the dso for "addr" and "ip" fields. This adds the same support for the "brstack" and "brstacksym" fields. This can be helpful for AutoFDO: we can ignore LBR entries unless the source and target address are both in the target module we are about to build. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: $ cat burncpu.cpp #include <dlfcn.h> int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } $ cat dso.cpp extern "C" void do_nothing() {} $ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution with perf record -b. Using the new perf script functionality I can easily find cases where there was a transition from one dso to another: $ perf record -a -b -- sleep 5 [ perf record: Woken up 55 times to write data ] [ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ] $ perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 $ perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello <marksan@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20170619163825.2012979-1-marksan@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-script.c | 61 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index afa84de..3c21089 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected.\n"); return -EINVAL; } - if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { - pr_err("Display of DSO requested but neither sample IP nor " - "sample address\nis selected. Hence, no addresses to convert " - "to DSO.\n"); + if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { + pr_err("Display of DSO requested but none of sample IP, sample address, " + "brstack\nor brstacksym are selected. Hence, no addresses to " + "convert to DSO.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(struct perf_sample *sample) +static void print_sample_brstack(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; - u64 i; + struct addr_location alf, alt; + u64 i, from, to; if (!(br && br->nr)) return; for (i = 0; i < br->nr; i++) { - printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", - br->entries[i].from, - br->entries[i].to, + from = br->entries[i].from; + to = br->entries[i].to; + + if (PRINT_FIELD(DSO)) { + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + } + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + + printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', @@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample) } static void print_sample_brstacksym(struct perf_sample *sample, - struct thread *thread) + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; @@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample, alt.sym = map__find_symbol(alt.map, alt.addr); symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } putchar('/'); symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', @@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script, print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(sample); + print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(sample, thread); + print_sample_brstacksym(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-06-20 15:11 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2017-06-19 16:38 [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Mark Santaniello 2017-06-19 16:38 ` [PATCH 2/2] Support perf script -F brstackoff,dso Mark Santaniello 2017-06-19 18:12 ` Arnaldo Carvalho de Melo 2017-06-20 15:10 ` Mark Santaniello 2017-06-20 9:04 ` [tip:perf/core] perf script: Support " tip-bot for Mark Santaniello 2017-06-20 9:04 ` [tip:perf/core] perf script: Support -F brstack,dso and brstacksym,dso tip-bot for Mark Santaniello
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox