* [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source
@ 2025-10-15 23:28 Pierrick Bouvier
2025-10-15 23:30 ` Pierrick Bouvier
2025-10-16 5:45 ` Philippe Mathieu-Daudé
0 siblings, 2 replies; 3+ messages in thread
From: Pierrick Bouvier @ 2025-10-15 23:28 UTC (permalink / raw)
To: qemu-devel
Cc: Pierrick Bouvier, Alex Bennée, Mahmoud Mandour,
Manos Pitsidianakis, Alexandre Iooss, Phil Mathieu-Daudé
Enhance uftrace_symbols.py to generate .dbg files, containing
source location for every symbol present in .sym file.
It allows to use `uftrace {replay,dump} --srcline` and show origin of
functions, connecting trace to original source code.
It was first implemented with pyelftools DWARF parser, which was way
too slow (~minutes) to get locations for every symbol in the linux
kernel. Thus, we use `addr2line` instead, which runs in seconds.
As well, there were some bugs with latest pyelftools release,
requiring to run master version, which is not installable with pip.
Thus, since we now require binutils (addr2line), we can ditch pyelftools
based implementation and simply rely on `nm` to get symbols information,
which is faster and better.
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
contrib/plugins/uftrace_symbols.py | 116 +++++++++++++++++++----------
1 file changed, 76 insertions(+), 40 deletions(-)
diff --git a/contrib/plugins/uftrace_symbols.py b/contrib/plugins/uftrace_symbols.py
index b49e03203c8..45fb79c7a58 100755
--- a/contrib/plugins/uftrace_symbols.py
+++ b/contrib/plugins/uftrace_symbols.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
-# Create symbols and mapping files for uftrace.
+# Create symbols, debug and mapping files for uftrace.
#
# Copyright 2025 Linaro Ltd
# Author: Pierrick Bouvier <pierrick.bouvier@linaro.org>
@@ -9,44 +9,71 @@
# SPDX-License-Identifier: GPL-2.0-or-later
import argparse
-import elftools # pip install pyelftools
import os
+import subprocess
-from elftools.elf.elffile import ELFFile
-from elftools.elf.sections import SymbolTableSection
+class Symbol:
+ def __init__(self, name, addr, size):
+ self.name = name
+ # clamp addr to 48 bits, like uftrace entries
+ self.addr = addr & 0xffffffffffff
+ self.full_addr = addr
+ self.size = size
-def elf_func_symbols(elf):
- symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections())
- if isinstance(s, SymbolTableSection)]
- symbols = []
- for _, section in symbol_tables:
- for _, symbol in enumerate(section.iter_symbols()):
- if symbol_size(symbol) == 0:
- continue
- type = symbol['st_info']['type']
- if type == 'STT_FUNC' or type == 'STT_NOTYPE':
- symbols.append(symbol)
- symbols.sort(key = lambda x: symbol_addr(x))
+ def set_loc(self, file, line):
+ self.file = file
+ self.line = line
+
+def get_symbols(elf_file):
+ symbols=[]
+ try:
+ out = subprocess.check_output(['nm', '--print-size', elf_file],
+ stderr=subprocess.STDOUT,
+ text=True)
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ raise
+ out = out.strip().split('\n')
+ for line in out:
+ info = line.split(' ')
+ if len(info) == 3:
+ # missing size information
+ continue
+ addr, size, type, name = info
+ # add only symbols from .text section
+ if type.lower() != 't':
+ continue
+ addr = int(addr, 16)
+ size = int(size, 16)
+ symbols.append(Symbol(name, addr, size))
+ symbols.sort(key = lambda x: x.addr)
return symbols
-def symbol_size(symbol):
- return symbol['st_size']
-
-def symbol_addr(symbol):
- addr = symbol['st_value']
- # clamp addr to 48 bits, like uftrace entries
- return addr & 0xffffffffffff
-
-def symbol_name(symbol):
- return symbol.name
+def find_symbols_locations(elf_file, symbols):
+ addresses = '\n'.join([hex(x.full_addr) for x in symbols])
+ try:
+ out = subprocess.check_output(['addr2line', '--exe', elf_file],
+ stderr=subprocess.STDOUT,
+ input=addresses, text=True)
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ raise
+ out = out.strip().split('\n')
+ assert len(out) == len(symbols)
+ for i in range(len(symbols)):
+ s = symbols[i]
+ file, line = out[i].split(':')
+ # addr2line may return 'line (discriminator [0-9]+)' sometimes,
+ # remove this to keep only line number.
+ line = line.split(' ')[0]
+ s.set_loc(file, line)
class BinaryFile:
def __init__(self, path, map_offset):
self.fullpath = os.path.realpath(path)
self.map_offset = map_offset
- with open(path, 'rb') as f:
- self.elf = ELFFile(f)
- self.symbols = elf_func_symbols(self.elf)
+ self.symbols = get_symbols(self.fullpath)
+ find_symbols_locations(self.fullpath, self.symbols)
def path(self):
return self.fullpath
@@ -56,24 +83,31 @@ def addr_start(self):
def addr_end(self):
last_sym = self.symbols[-1]
- return symbol_addr(last_sym) + symbol_size(last_sym) + self.map_offset
+ return last_sym.addr + last_sym.size + self.map_offset
def generate_symbol_file(self, prefix_symbols):
binary_name = os.path.basename(self.fullpath)
- sym_file_path = f'./uftrace.data/{binary_name}.sym'
+ sym_file_path = os.path.join('uftrace.data', f'{binary_name}.sym')
print(f'{sym_file_path} ({len(self.symbols)} symbols)')
with open(sym_file_path, 'w') as sym_file:
# print hexadecimal addresses on 48 bits
addrx = "0>12x"
for s in self.symbols:
- addr = symbol_addr(s)
+ addr = s.addr
addr = f'{addr:{addrx}}'
- size = f'{symbol_size(s):{addrx}}'
- name = symbol_name(s)
+ size = f'{s.size:{addrx}}'
if prefix_symbols:
- name = f'{binary_name}:{name}'
+ name = f'{binary_name}:{s.name}'
print(addr, size, 'T', name, file=sym_file)
+ def generate_debug_file(self):
+ binary_name = os.path.basename(self.fullpath)
+ dbg_file_path = os.path.join('uftrace.data', f'{binary_name}.dbg')
+ with open(dbg_file_path, 'w') as dbg_file:
+ for s in self.symbols:
+ print(f'F: {hex(s.addr)} {s.name}', file=dbg_file)
+ print(f'L: {s.line} {s.file}', file=dbg_file)
+
def parse_parameter(p):
s = p.split(":")
path = s[0]
@@ -84,7 +118,7 @@ def parse_parameter(p):
offset = s[1]
if not offset.startswith('0x'):
err = f'offset "{offset}" is not an hexadecimal constant. '
- err += 'It should starts with "0x".'
+ err += 'It should start with "0x".'
raise ValueError(err)
offset = int(offset, 16)
return path, offset
@@ -97,7 +131,7 @@ def is_from_user_mode(map_file_path):
return False
def generate_map(binaries):
- map_file_path = './uftrace.data/sid-0.map'
+ map_file_path = os.path.join('uftrace.data', 'sid-0.map')
if is_from_user_mode(map_file_path):
print(f'do not overwrite {map_file_path} generated from qemu-user')
@@ -124,7 +158,8 @@ def generate_map(binaries):
def main():
parser = argparse.ArgumentParser(description=
- 'generate symbol files for uftrace')
+ 'generate symbol files for uftrace. '
+ 'Require binutils (nm and addr2line).')
parser.add_argument('elf_file', nargs='+',
help='path to an ELF file. '
'Use /path/to/file:0xdeadbeef to add a mapping offset.')
@@ -133,8 +168,8 @@ def main():
action=argparse.BooleanOptionalAction)
args = parser.parse_args()
- if not os.path.exists('./uftrace.data'):
- os.mkdir('./uftrace.data')
+ if not os.path.exists('uftrace.data'):
+ os.mkdir('uftrace.data')
binaries = []
for file in args.elf_file:
@@ -145,6 +180,7 @@ def main():
for b in binaries:
b.generate_symbol_file(args.prefix_symbols)
+ b.generate_debug_file()
generate_map(binaries)
--
2.47.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source
2025-10-15 23:28 [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source Pierrick Bouvier
@ 2025-10-15 23:30 ` Pierrick Bouvier
2025-10-16 5:45 ` Philippe Mathieu-Daudé
1 sibling, 0 replies; 3+ messages in thread
From: Pierrick Bouvier @ 2025-10-15 23:30 UTC (permalink / raw)
To: qemu-devel
Cc: Alex Bennée, Mahmoud Mandour, Manos Pitsidianakis,
Alexandre Iooss, Phil Mathieu-Daudé
On 10/15/25 4:28 PM, Pierrick Bouvier wrote:
> Enhance uftrace_symbols.py to generate .dbg files, containing
> source location for every symbol present in .sym file.
> It allows to use `uftrace {replay,dump} --srcline` and show origin of
> functions, connecting trace to original source code.
>
> It was first implemented with pyelftools DWARF parser, which was way
> too slow (~minutes) to get locations for every symbol in the linux
> kernel. Thus, we use `addr2line` instead, which runs in seconds.
>
> As well, there were some bugs with latest pyelftools release,
> requiring to run master version, which is not installable with pip.
> Thus, since we now require binutils (addr2line), we can ditch pyelftools
> based implementation and simply rely on `nm` to get symbols information,
> which is faster and better.
>
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
> contrib/plugins/uftrace_symbols.py | 116 +++++++++++++++++++----------
> 1 file changed, 76 insertions(+), 40 deletions(-)
v2
--
- Cosmetic changes in commit description
- use os.path.join to build paths
Forgot to apply the:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source
2025-10-15 23:28 [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source Pierrick Bouvier
2025-10-15 23:30 ` Pierrick Bouvier
@ 2025-10-16 5:45 ` Philippe Mathieu-Daudé
1 sibling, 0 replies; 3+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-10-16 5:45 UTC (permalink / raw)
To: Pierrick Bouvier, qemu-devel
Cc: Alex Bennée, Mahmoud Mandour, Manos Pitsidianakis,
Alexandre Iooss
On 16/10/25 01:28, Pierrick Bouvier wrote:
> Enhance uftrace_symbols.py to generate .dbg files, containing
> source location for every symbol present in .sym file.
> It allows to use `uftrace {replay,dump} --srcline` and show origin of
> functions, connecting trace to original source code.
>
> It was first implemented with pyelftools DWARF parser, which was way
> too slow (~minutes) to get locations for every symbol in the linux
> kernel. Thus, we use `addr2line` instead, which runs in seconds.
>
> As well, there were some bugs with latest pyelftools release,
> requiring to run master version, which is not installable with pip.
> Thus, since we now require binutils (addr2line), we can ditch pyelftools
> based implementation and simply rely on `nm` to get symbols information,
> which is faster and better.
>
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
> contrib/plugins/uftrace_symbols.py | 116 +++++++++++++++++++----------
> 1 file changed, 76 insertions(+), 40 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-10-16 5:46 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-15 23:28 [PATCH v2] contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source Pierrick Bouvier
2025-10-15 23:30 ` Pierrick Bouvier
2025-10-16 5:45 ` Philippe Mathieu-Daudé
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).