From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751868AbdFSQi4 (ORCPT ); Mon, 19 Jun 2017 12:38:56 -0400 Received: from mx0b-00082601.pphosted.com ([67.231.153.30]:56456 "EHLO mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751175AbdFSQiz (ORCPT ); Mon, 19 Jun 2017 12:38:55 -0400 Smtp-Origin-Hostprefix: devbig From: Mark Santaniello Smtp-Origin-Hostname: devbig619.prn2.facebook.com To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Alexander Shishkin CC: , Mark Santaniello Smtp-Origin-Cluster: prn2c22 Subject: [PATCH 1/2] Support perf script -F brstack,dso and brstacksym,dso Date: Mon, 19 Jun 2017 09:38:24 -0700 Message-ID: <20170619163825.2012979-1-marksan@fb.com> X-Mailer: git-send-email 2.9.3 X-FB-Internal: Safe MIME-Version: 1.0 Content-Type: text/plain X-Proofpoint-Spam-Reason: safe X-FB-Internal: Safe X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-06-19_09:,, signatures=0 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Perf script can report the dso for "addr" and "ip" fields. This adds the same support for the "brstack" and "brstacksym" fields. This can be helpful for AutoFDO: we can ignore LBR entries unless the source and target address are both in the target module we are about to build. I built a small test akin to "while(1) { do_nothing(); }" where the do_nothing function is loaded from a dso: ~/src/burncpu$ cat burncpu.cpp #include int main() { void* handle = dlopen("./dso.so", RTLD_LAZY); if (!handle) return -1; typedef void (*fp)(); fp do_nothing = (fp) dlsym(handle, "do_nothing"); while(1) { do_nothing(); } } ~/src/burncpu$ cat dso.cpp extern "C" void do_nothing() {} ~/src/burncpu$ cat build.sh #!/bin/bash g++ -shared dso.cpp -o dso.so g++ burncpu.cpp -o burncpu -ldl I sampled the execution with perf record -b. Using the new perf script functionality I can easily find cases where there was a transition from one dso to another: $ ./perf record -a -b -- sleep 5 [ perf record: Woken up 55 times to write data ] [ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ] $ ./perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' \ > | grep burncpu | grep dso.so | head -n 1 0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0 $ ./perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' \ > | grep burncpu | grep dso.so | head -n 1 do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0 Signed-off-by: Mark Santaniello --- tools/perf/builtin-script.c | 61 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 4761b0d..6a7033b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected.\n"); return -EINVAL; } - if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { - pr_err("Display of DSO requested but neither sample IP nor " - "sample address\nis selected. Hence, no addresses to convert " - "to DSO.\n"); + if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) { + pr_err("Display of DSO requested but none of sample IP, sample address, " + "brstack\nor brstacksym are selected. Hence, no addresses to " + "convert to DSO.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(struct perf_sample *sample) +static void print_sample_brstack(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; - u64 i; + struct addr_location alf, alt; + u64 i, from, to; if (!(br && br->nr)) return; for (i = 0; i < br->nr; i++) { - printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", - br->entries[i].from, - br->entries[i].to, + from = br->entries[i].from; + to = br->entries[i].to; + + if (PRINT_FIELD(DSO)) { + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + } + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + + printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', @@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample) } static void print_sample_brstacksym(struct perf_sample *sample, - struct thread *thread) + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; @@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample, alt.sym = map__find_symbol(alt.map, alt.addr); symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } putchar('/'); symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', @@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script, print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(sample); + print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(sample, thread); + print_sample_brstacksym(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); -- 2.9.3