* [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat
[not found] <1400276595-6965-1-git-send-email-andi@firstfloor.org>
@ 2014-05-16 21:43 ` Andi Kleen
2014-05-17 8:31 ` Sam Ravnborg
0 siblings, 1 reply; 4+ messages in thread
From: Andi Kleen @ 2014-05-16 21:43 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm, Andi Kleen, linux-kbuild, mmarek
From: Andi Kleen <ak@linux.intel.com>
Add a tool to hunt for inline bloat. It uses objdump -S to account
inlines.
Example output:
Total code bytes seen 10463206
Code bytes by functions:
Function Total Avg Num
kmalloc 37132 (0.00%) 11 3310
ixgbe_read_reg 35440 (0.00%) 24 1444
spin_lock 28975 (0.00%) 11 2575
constant_test_bit 26387 (0.00%) 5 4642
arch_spin_unlock 24986 (0.00%) 7 3364
spin_unlock_irqrestore 24928 (0.00%) 11 2258
readl 24584 (0.00%) 4 5344
writel 23199 (0.00%) 6 3643
perf_fetch_caller_regs 22436 (0.00%) 27 821
get_current 22076 (0.00%) 9 2288
_radeon_msleep 19680 (0.00%) 55 353
INIT_LIST_HEAD 19410 (0.00%) 11 1747
list_del 19270 (0.00%) 16 1176
__ew32_prepare 19080 (0.00%) 25 740
__list_add 17830 (0.00%) 12 1406
Cc: linux-kbuild@vger.kernel.org
Cc: mmarek@suse.cz
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
scripts/inline-account.py | 164 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 164 insertions(+)
create mode 100755 scripts/inline-account.py
diff --git a/scripts/inline-account.py b/scripts/inline-account.py
new file mode 100755
index 0000000..2dfbf7c
--- /dev/null
+++ b/scripts/inline-account.py
@@ -0,0 +1,164 @@
+#!/usr/bin/python
+# account code bytes per source code / functions from objdump -Sl output
+# useful to find inline bloat
+# Author: Andi Kleen
+import os, sys, re, argparse, multiprocessing
+from collections import Counter
+
+p = argparse.ArgumentParser(
+ description="""
+Account code bytes per source code / functions from objdump.
+Useful to find inline bloat.
+
+The line numbers are the beginning of a block, so the actual code can be later.
+Line numbers can be a also little off due to objdump bugs
+also some misaccounting can happen due to inexact gcc debug information.
+The number output for functions may account a single large function multiple
+times. program/object files need to be built with -g.
+
+This is somewhat slow due to objdump -S being slow. It helps to have
+plenty of cores.""")
+p.add_argument('--min-bytes', type=int, help='minimum bytes to report', default=100)
+p.add_argument('--threads', '-t', type=int, default=multiprocessing.cpu_count(),
+ help='Number of objdump processes to run')
+p.add_argument('file', help='object file/program as input')
+args = p.parse_args()
+
+def get_syms(fn):
+ f = os.popen("nm --print-size " + fn)
+ syms = []
+ pc = None
+ for l in f:
+ n = l.split()
+ if len(n) > 2 and n[2].upper() == "T":
+ pc = int(n[0], 16)
+ syms.append(pc)
+ ln = int(n[1], 16)
+ f.close()
+ if not pc:
+ sys.exit(fn + " has no symbols")
+ syms.append(pc + ln)
+ return syms
+
+class Account:
+ pass
+
+def add_account(a, b):
+ a.funcbytes += b.funcbytes
+ a.linebytes += b.linebytes
+ a.funccount += b.funccount
+ a.nolinebytes += a.nolinebytes
+ a.nofuncbytes += a.nofuncbytes
+ a.total += b.total
+ return a
+
+# dont add sys.exit here, causes deadlocks
+def account_range(r):
+ a = Account()
+ a.funcbytes = Counter()
+ a.linebytes = Counter()
+ a.funccount = Counter()
+ a.nolinebytes = 0
+ a.nofuncbytes = 0
+ a.total = 0
+
+ line = None
+ func = None
+ codefunc = None
+
+ cmd = ("objdump -Sl %s --start-address=%#x --stop-address=%#x" %
+ (args.file, r[0], r[1]))
+ f = os.popen(cmd)
+ for l in f:
+ # 250: e8 00 00 00 00 callq 255 <proc_skip_spaces+0x5>
+ m = re.match(r'\s*([0-9a-fA-F]+):\s+(.*)', l)
+ if m:
+ #print "iscode", func, l,
+ bytes = len(re.findall(r'[0-9a-f][0-9a-f] ', m.group(2)))
+ if not func:
+ a.nofuncbytes += bytes
+ continue
+ if not line:
+ a.nolinebytes += bytes
+ continue
+ a.total += bytes
+ a.funcbytes[func] += bytes
+ a.linebytes[(file, line)] += bytes
+ codefunc = func
+ continue
+
+ # sysctl_init():
+ m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(\):$', l)
+ if m:
+ if codefunc and m.group(1) != codefunc:
+ a.funccount[codefunc] += 1
+ codefunc = None
+ func = m.group(1)
+ continue
+
+ # /sysctl.c:1666
+ m = re.match(r'^([^:]+):(\d+)$', l)
+ if m:
+ file, line = m.group(1), int(m.group(2))
+ continue
+ f.close()
+
+ if codefunc:
+ a.funccount[codefunc] += 1
+ return a
+
+# objdump -S is slow, so we parallelize
+
+# split symbol table into chunks for parallelization
+# we split on functions boundaries to avoid mis-accounting
+# assumes functions have roughly similar length
+syms = sorted(get_syms(args.file))
+chunk = min((len(syms) - 1) / args.threads, len(syms) - 1)
+boundaries = [syms[x] for x in range(0, len(syms) - 1, chunk)] + [syms[-1]]
+ranges = [(boundaries[x], boundaries[x+1]) for x in range(0, len(boundaries) - 1)]
+assert ranges[0][0] == syms[0]
+assert ranges[-1][1] == syms[-1]
+
+# map-reduce
+if args.threads == 1:
+ al = map(account_range, ranges)
+else:
+ al = multiprocessing.Pool(args.threads).map(account_range, ranges)
+a = reduce(add_account, al)
+
+print "Total code bytes seen", a.total
+#print "Bytes with no function %d (%.2f%%)" % (a.nofuncbytes, 100.0*(float(a.nofuncbytes)/a.total))
+#print "Bytes with no lines %d (%.2f%%)" % (a.nolinebytes, 100.0*(float(a.nolinebytes)/a.total))
+
+def sort_map(m):
+ return sorted(m.keys(), key=lambda x: m[x], reverse=True)
+
+print "\nCode bytes by functions:"
+print "%-50s %-5s %-5s %-5s %-5s" % ("Function", "Total", "", "Avg", "Num")
+for j in sort_map(a.funcbytes):
+ if a.funcbytes[j] < args.min_bytes:
+ break
+ print "%-50s %-5d (%.2f%%) %-5d %-5d" % (
+ j,
+ a.funcbytes[j],
+ a.funcbytes[j] / float(a.total),
+ a.funcbytes[j] / a.funccount[j],
+ a.funccount[j])
+
+for j in a.linebytes.keys():
+ if a.linebytes[j] < args.min_bytes:
+ del a.linebytes[j]
+
+# os.path.commonprefix fails with >50k entries
+# just use the first 10
+prefix = os.path.commonprefix(map(lambda x: x[0], a.linebytes.keys()[:10]))
+
+print "\nCode bytes by nearby source line blocks:"
+print "prefix", prefix
+
+print "%-50s %-5s" % ("Line", "Total")
+for j in sort_map(a.linebytes):
+ print "%-50s %-5d (%.2f%%)" % (
+ "%s:%d" % (j[0].replace(prefix, ""), j[1]),
+ a.linebytes[j],
+ a.linebytes[j] / float(a.total))
--
1.9.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat
2014-05-16 21:43 ` [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat Andi Kleen
@ 2014-05-17 8:31 ` Sam Ravnborg
2014-05-17 9:36 ` Sam Ravnborg
0 siblings, 1 reply; 4+ messages in thread
From: Sam Ravnborg @ 2014-05-17 8:31 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, akpm, Andi Kleen, linux-kbuild, mmarek
Hi Andi.
On Fri, May 16, 2014 at 02:43:15PM -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Add a tool to hunt for inline bloat. It uses objdump -S to account
> inlines.
I tried this on my sparc32 build - but it failed with:
objdump: can't disassemble for architecture UNKNOWN!
It looks simple to add CROSS_COMPILE support but I did not do so.
My python skills are non-existing.
Sam
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat
2014-05-17 8:31 ` Sam Ravnborg
@ 2014-05-17 9:36 ` Sam Ravnborg
2014-05-17 16:51 ` Andi Kleen
0 siblings, 1 reply; 4+ messages in thread
From: Sam Ravnborg @ 2014-05-17 9:36 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, akpm, Andi Kleen, linux-kbuild, mmarek
On Sat, May 17, 2014 at 10:31:44AM +0200, Sam Ravnborg wrote:
> Hi Andi.
>
> On Fri, May 16, 2014 at 02:43:15PM -0700, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> >
> > Add a tool to hunt for inline bloat. It uses objdump -S to account
> > inlines.
> I tried this on my sparc32 build - but it failed with:
> objdump: can't disassemble for architecture UNKNOWN!
>
> It looks simple to add CROSS_COMPILE support but I did not do so.
> My python skills are non-existing.
Patched the calls to nm and objdump - but it gave no output
when I ran the script.
nm --print-size shows following output:
00002910 00000024 r CSWTCH.946
00002bd4 00000024 r CSWTCH.951
U PDE_DATA
U ROOT_DEV
000000fc 00000014 T SyS_accept
00002c98 000001a8 T SyS_accept4
00000fc4 0000008c T SyS_bind
00000eb4 00000094 T SyS_connect
00000d98 00000094 T SyS_getpeername
00000e2c 00000088 T SyS_getsockname
00000c6c 00000090 T SyS_getsockopt
00000f48 0000007c T SyS_listen
00000128 00000018 T SyS_recv
0000142c 000000f0 T SyS_recvfrom
00001920 000000d0 T SyS_recvmmsg
00001238 00000010 T SyS_recvmsg
00000110 00000018 T SyS_send
00001d38 00000010 T SyS_sendmmsg
00001db0 00000010 T SyS_sendmsg
000015f4 000000dc T SyS_sendto
00000cfc 0000009c T SyS_setsockopt
00000c0c 00000060 T SyS_shutdown
00003020 000000b4 T SyS_socket
000007ec 000001f8 T SyS_socketcall
00002e40 000001e0 T SyS_socketpair
000b776c 00000098 t T.1063
000b762c 000000d0 t T.1064
objdump -Sl shows following output:
000000d4 <sock_mmap>:
sock_mmap():
d4: 9d e3 bf a0 save %sp, -96, %sp
d8: c2 06 20 78 ld [ %i0 + 0x78 ], %g1
dc: 94 10 00 19 mov %i1, %o2
e0: 92 10 00 01 mov %g1, %o1
e4: c2 00 60 18 ld [ %g1 + 0x18 ], %g1
e8: c2 00 60 40 ld [ %g1 + 0x40 ], %g1
ec: 9f c0 40 00 call %g1
f0: 90 10 00 18 mov %i0, %o0
f4: 81 c7 e0 08 ret
f8: 91 e8 00 08 restore %g0, %o0, %o0
000000fc <SyS_accept>:
sys_accept():
fc: 96 10 20 00 clr %o3
100: 82 13 c0 00 mov %o7, %g1
104: 40 00 00 00 call 104 <SyS_accept+0x8>
108: 9e 10 40 00 mov %g1, %o7
10c: 01 00 00 00 nop
00000110 <SyS_send>:
SyS_send():
110: 98 10 20 00 clr %o4 ! 0 <sock_from_file-0x4c>
114: 9a 10 20 00 clr %o5
118: 82 13 c0 00 mov %o7, %g1
11c: 40 00 00 00 call 11c <SyS_send+0xc>
120: 9e 10 40 00 mov %g1, %o7
124: 01 00 00 00 nop
00000128 <SyS_recv>:
sys_recv():
128: 98 10 20 00 clr %o4 ! 0 <sock_from_file-0x4c>
12c: 9a 10 20 00 clr %o5
130: 82 13 c0 00 mov %o7, %g1
134: 40 00 00 00 call 134 <SyS_recv+0xc>
138: 9e 10 40 00 mov %g1, %o7
13c: 01 00 00 00 nop
Sam
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat
2014-05-17 9:36 ` Sam Ravnborg
@ 2014-05-17 16:51 ` Andi Kleen
0 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2014-05-17 16:51 UTC (permalink / raw)
To: Sam Ravnborg
Cc: Andi Kleen, linux-kernel, akpm, Andi Kleen, linux-kbuild, mmarek
> Patched the calls to nm and objdump - but it gave no output
> when I ran the script.
You have to compile with debug info on.
-Andi
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2014-05-17 16:51 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1400276595-6965-1-git-send-email-andi@firstfloor.org>
2014-05-16 21:43 ` [PATCH 8/8] Kbuild: add inline-account tool to find inline bloat Andi Kleen
2014-05-17 8:31 ` Sam Ravnborg
2014-05-17 9:36 ` Sam Ravnborg
2014-05-17 16:51 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox