From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Bram Stolk <b.stolk@gmail.com>
Cc: linux-perf-users@vger.kernel.org
Subject: Re: Adding a recording delay interferes with symbols/call-stack
Date: Tue, 31 Oct 2017 16:14:11 -0300 [thread overview]
Message-ID: <20171031191411.GB3531@kernel.org> (raw)
In-Reply-To: <CABYXEktUSaLrT4Ajp17D4i93o1Sia+Jd5vpSAydEPNOw6nTFiA@mail.gmail.com>
Em Tue, Oct 31, 2017 at 10:10:28AM -0700, Bram Stolk escreveu:
> Thanks for trying to reproduce, Arnaldo.
>
> But for me, it's not the kernel symbols that are missing: they are all
> there, syscalls and stuff.
> Just the app's symbols itself are missing.
>
> What happens if you try it on a binary you built locally, that spends
> cycles in userspace?
So, trying with 'perf trace' to do a system wide strace like session for the
'open' syscall, that will use DWARF callchains and go from userspace all the
way to raw_syscalls:sys_enter/sys_exit, to check various setups to try to
narrow down what may be broken:
[root@jouet ~]# perf trace --max-stack 32 -e open
518.496 ( 0.049 ms): libvirtd/1171 open(filename: /proc/cpuinfo, mode: IRUGO|IWUGO ) = 23
__open_nocancel (inlined)
__GI__IO_file_open (inlined)
__GI__IO_file_fopen (inlined)
__fopen_internal (/usr/lib64/libc-2.25.so)
virHostCPUGetInfo (/usr/lib64/libvirt.so.0.3002.1)
virCapabilitiesGetNodeInfo (/usr/lib64/libvirt.so.0.3002.1)
virNodeGetInfo (/usr/lib64/libvirt.so.0.3002.1)
_init (/usr/sbin/libvirtd)
virNetServerProgramDispatch (/usr/lib64/libvirt.so.0.3002.1)
[0xffffaa7a2a4a8737] (/usr/sbin/libvirtd)
[0xffff80b476e99f30] (/usr/lib64/libvirt.so.0.3002.1)
[0xffff80b476e992a7] (/usr/lib64/libvirt.so.0.3002.1)
start_thread (/usr/lib64/libpthread-2.25.so)
__GI___clone (inlined)
518.817 ( 0.022 ms): libvirtd/1171 open(filename: /sys/devices/system/cpu/present, mode: IFSOCK|ISGID|IXUSR|IRGRP|0x720b0000) = 24
[0xffff80b47aad198d] (/usr/lib64/libpthread-2.25.so)
virFileReadAll (/usr/lib64/libvirt.so.0.3002.1)
virFileReadValueBitmap (/usr/lib64/libvirt.so.0.3002.1)
virSysfsGetValueBitmap (/usr/lib64/libvirt.so.0.3002.1)
virHostCPUGetPresentBitmap (/usr/lib64/libvirt.so.0.3002.1)
virHostCPUGetInfoPopulateLinux (/usr/lib64/libvirt.so.0.3002.1)
virHostCPUGetInfo (/usr/lib64/libvirt.so.0.3002.1)
virCapabilitiesGetNodeInfo (/usr/lib64/libvirt.so.0.3002.1)
virNodeGetInfo (/usr/lib64/libvirt.so.0.3002.1)
_init (/usr/sbin/libvirtd)
virNetServerProgramDispatch (/usr/lib64/libvirt.so.0.3002.1)
[0xffffaa7a2a4a8737] (/usr/sbin/libvirtd)
[0xffff80b476e99f30] (/usr/lib64/libvirt.so.0.3002.1)
[0xffff80b476e992a7] (/usr/lib64/libvirt.so.0.3002.1)
start_thread (/usr/lib64/libpthread-2.25.so)
__GI___clone (inlined)
--------------------------
Seems to work, now using a CPU hog and sampling the most precise CPU "cycles" hardware counter:
# time perf record --call-graph=dwarf stress --cpu 1 --timeout 10
stress: info: [4690] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd
stress: info: [4690] successful run completed in 10s
[ perf record: Woken up 1288 times to write data ]
[ perf record: Captured and wrote 321.915 MB perf.data (40001 samples) ]
real 0m10.174s
user 0m10.065s
sys 0m0.244s
#
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|DATA_SRC, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, mmap2: 1, comm_exec: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192
#
# perf report --stdio --dsos stress
# To display the perf.data header info, please use --header/--header-only options.
#
# dso: stress
#
# Total Lost Samples: 0
#
# Samples: 40K of event 'cycles:ppp'
# Event count (approx.): 31368767617
#
# Children Self Command Symbol
# ........ ........ ....... ......................
#
88.38% 14.59% stress [.] hogcpu
|
|--73.78%--hogcpu
| |
| --73.73%--rand
| |
| --73.71%--__random
| |
| --40.58%--__random_r
|
|--13.00%--_start
| __libc_start_main
| main
| hogcpu
|
--1.59%--0xbb7
hogcpu
86.78% 0.00% stress [.] _start
|
---_start
__libc_start_main
main
hogcpu
|
--73.73%--rand
|
--73.71%--__random
|
--40.58%--__random_r
86.78% 0.00% stress [.] __libc_start_main
|
---__libc_start_main
main
hogcpu
|
--73.73%--rand
|
--73.71%--__random
|
--40.58%--__random_r
86.78% 0.00% stress [.] main
|
---main
hogcpu
|
--73.73%--rand
|
--73.71%--__random
|
--40.58%--__random_r
0.01% 0.01% stress [k] 0x000055ae24920bc9
0.00% 0.00% stress [k] 0x00007fba0ad15790
0.00% 0.00% stress [k] 0x00007fba0ad157e6
0.00% 0.00% stress [k] 0x00007fba0ad15770
0.00% 0.00% stress [k] 0x000055ae24920bcd
0.00% 0.00% stress [k] 0x00007fba0ad157a5
0.00% 0.00% stress [k] 0x00007fba0ad15c7d
0.00% 0.00% stress [k] 0x00007fba0ad15808
#
# (Tip: System-wide collection from all CPUs: perf record -a)
#
Also with 'perf script':
<SNIP>
stress 4691 1422.153895: 576462 cycles:ppp:
3b946 __random_r (/usr/lib64/libc-2.25.so)
3b7d1 __random (/usr/lib64/libc-2.25.so)
3bc78 rand (/usr/lib64/libc-2.25.so)
2bbc hogcpu (/usr/bin/stress)
160c main (/usr/bin/stress)
20509 __libc_start_main (/usr/lib64/libc-2.25.so)
2ac9 _start (/usr/bin/stress)
stress 4691 1422.154121: 582967 cycles:ppp:
3b946 __random_r (/usr/lib64/libc-2.25.so)
3b7d1 __random (/usr/lib64/libc-2.25.so)
3bc78 rand (/usr/lib64/libc-2.25.so)
2bbc hogcpu (/usr/bin/stress)
160c main (/usr/bin/stress)
20509 __libc_start_main (/usr/lib64/libc-2.25.so)
2ac9 _start (/usr/bin/stress)
stress 4691 1422.154349: 590924 cycles:ppp:
3b93c __random_r (/usr/lib64/libc-2.25.so)
3b7d1 __random (/usr/lib64/libc-2.25.so)
3bc78 rand (/usr/lib64/libc-2.25.so)
2bbc hogcpu (/usr/bin/stress)
160c main (/usr/bin/stress)
20509 __libc_start_main (/usr/lib64/libc-2.25.so)
2ac9 _start (/usr/bin/stress)
<SNIP>
> Here is my output on my clang-built binary:
I'll try as well with a clang built binary:
[acme@jouet linux]$ make CC=clang O=/tmp/build/perf -C tools/perf install-bin
I.e. will profile a perf binary built with clang:
$ readelf -wi ~/bin/perf | grep DW_AT_producer | head -1
<c> DW_AT_producer : (indirect string, offset: 0x0): clang version 4.0.0 (http://llvm.org/git/clang.git f5be8ba13adc4ba1011a7ccd60c844bd60427c1c) (http://llvm.org/git/llvm.git efca1a37676f4cd276d947658cf90b0fb625abfd)
$
# perf record -F 2000 --call-graph=dwarf perf top
[ perf record: Woken up 20 times to write data ]
[ perf record: Captured and wrote 5.771 MB perf.data (694 samples) ]
# perf report --dsos=perf --no-children --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# dso: perf
#
# Total Lost Samples: 0
#
# Samples: 694 of event 'cycles:ppp'
# Event count (approx.): 889028447
#
# Overhead Command Symbol
# ........ ....... ......................................
#
4.77% perf [.] rb_next
|
---rb_next
|
|--3.15%--map_groups__fixup_overlappings
| thread__insert_map
| machine__process_mmap2_event
| machine__process_event
| perf_event__process
| perf_event__synthesize_mmap_events
| 0xffffffffffffffff
|
--1.12%--map__next
map_groups__find_by_name
map_groups__set_modules_path_dir
map_groups__set_modules_path_dir
0xffffffffffffffff
2.69% perf [.] map_groups__fixup_overlappings
|
---map_groups__fixup_overlappings
thread__insert_map
machine__process_mmap2_event
machine__process_event
perf_event__process
perf_event__synthesize_mmap_events
0xffffffffffffffff
1.36% perf [.] rb_erase
|
---rb_erase
symbols__delete
dso__delete
dso__put
|
--1.18%--map__put
__map__zput (inlined)
hist_entry__delete
hists_evsel__exit
perf_evsel__exit
perf_evsel__delete
perf_evlist__purge (inlined)
perf_evlist__delete
cmd_top
0xffffffffffffffff
Also seems to work.
Can you try building the latest codebase, that I'm putting at:
[acme@jouet linux]$ rm -f perf-4.14.0-rc6.tar.xz
[acme@jouet linux]$ make help | grep perf
perf-tar-src-pkg - Build perf-4.14.0-rc6.tar source tarball
perf-targz-src-pkg - Build perf-4.14.0-rc6.tar.gz source tarball
perf-tarbz2-src-pkg - Build perf-4.14.0-rc6.tar.bz2 source tarball
perf-tarxz-src-pkg - Build perf-4.14.0-rc6.tar.xz source tarball
[acme@jouet linux]$ make perf-tarxz-src-pkg
TAR
PERF_VERSION = 4.14.rc6.gadb243
[acme@jouet linux]$ ls -lart perf-4*
-rw-rw-r--. 1 acme acme 1252444 Oct 31 16:06 perf-4.14.0-rc6.tar.xz
[acme@jouet linux]$
http://vger.kernel.org/~acme/perf/perf-4.14.0-rc6.tar.xz
Download it and try building it:
$ mkdir /tmp/perf
$ cd /tmp/perf
$ wget http://vger.kernel.org/~acme/perf/perf-4.14.0-rc6.tar.xz
$ tar xf perf-4.14.0-rc6.tar.xz
$ cd perf-4.14.0-rc6
$ make -C tools/perf install-bin
Then make sure your ~/bin/ directory is in the path and try your workloads to
see if there was some fix that addresses the problems you're experiencing.
- Arnaldo
next prev parent reply other threads:[~2017-10-31 19:14 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-30 20:25 Adding a recording delay interferes with symbols/call-stack Bram Stolk
2017-10-31 14:25 ` Arnaldo Carvalho de Melo
2017-10-31 17:10 ` Bram Stolk
2017-10-31 19:14 ` Arnaldo Carvalho de Melo [this message]
2017-11-01 4:36 ` Bram Stolk
2017-11-02 6:20 ` Namhyung Kim
2017-11-02 7:01 ` Bram Stolk
2017-11-02 7:07 ` Namhyung Kim
2017-11-02 7:10 ` Bram Stolk
2017-11-03 18:41 ` Arnaldo Carvalho de Melo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171031191411.GB3531@kernel.org \
--to=acme@kernel.org \
--cc=b.stolk@gmail.com \
--cc=linux-perf-users@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.