From: Petr Pavlu <petr.pavlu@suse.com>
To: Sami Tolvanen <samitolvanen@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>,
Luis Chamberlain <mcgrof@kernel.org>,
Miguel Ojeda <ojeda@kernel.org>,
Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
Matthew Maurer <mmaurer@google.com>,
Alex Gaynor <alex.gaynor@gmail.com>,
Wedson Almeida Filho <wedsonaf@gmail.com>,
Gary Guo <gary@garyguo.net>, Petr Pavlu <petr.pavlu@suse.com>,
Neal Gompa <neal@gompa.dev>, Hector Martin <marcan@marcan.st>,
Janne Grunau <j@jannau.net>, Asahi Linux <asahi@lists.linux.dev>,
linux-kbuild@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-modules@vger.kernel.org, rust-for-linux@vger.kernel.org
Subject: Re: [PATCH v2 03/19] gendwarfksyms: Add address matching
Date: Tue, 27 Aug 2024 14:40:30 +0200 [thread overview]
Message-ID: <d63ddefe-a6f6-4a5b-9330-11438fca8f9f@suse.com> (raw)
In-Reply-To: <20240815173903.4172139-24-samitolvanen@google.com>
On 8/15/24 19:39, Sami Tolvanen wrote:
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
> scripts/gendwarfksyms/gendwarfksyms.c | 2 +
> scripts/gendwarfksyms/gendwarfksyms.h | 7 ++
> scripts/gendwarfksyms/symbols.c | 161 +++++++++++++++++++++++++-
> 3 files changed, 165 insertions(+), 5 deletions(-)
>
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index d209b237766b..e2f8ee5a4bf3 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -118,6 +118,8 @@ int main(int argc, const char **argv)
> return -1;
> }
>
> + check(symbol_read_symtab(fd));
> +
> dwfl = dwfl_begin(&callbacks);
> if (!dwfl) {
> error("dwfl_begin failed for '%s': %s", object_files[n],
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 03f3e408a839..cb9106dfddb9 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -61,6 +61,10 @@ extern bool debug;
> /*
> * symbols.c
> */
> +struct symbol_addr {
> + uint32_t section;
> + Elf64_Addr address;
> +};
>
> static inline u32 name_hash(const char *name)
> {
> @@ -69,10 +73,13 @@ static inline u32 name_hash(const char *name)
>
> struct symbol {
> const char *name;
> + struct symbol_addr addr;
> + struct hlist_node addr_hash;
> struct hlist_node name_hash;
> };
>
> extern int symbol_read_exports(FILE *file);
> +extern int symbol_read_symtab(int fd);
> extern struct symbol *symbol_get(const char *name);
>
> /*
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index 673ad9cf9e77..f96acb941196 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -6,11 +6,43 @@
> #include "gendwarfksyms.h"
>
> #define SYMBOL_HASH_BITS 15
> +
> +/* struct symbol_addr -> struct symbol */
> +static DEFINE_HASHTABLE(symbol_addrs, SYMBOL_HASH_BITS);
> +/* name -> struct symbol */
> static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
>
> +static inline u32 symbol_addr_hash(const struct symbol_addr *addr)
> +{
> + return jhash(addr, sizeof(struct symbol_addr), 0);
I would be careful and avoid including the padding between
symbol_addr.section and symbol_addr.address in the hash calculation.
> +}
> +
> typedef int (*symbol_callback_t)(struct symbol *, void *arg);
>
> -static int for_each(const char *name, symbol_callback_t func, void *data)
> +static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
> + void *data)
> +{
> + struct hlist_node *tmp;
> + struct symbol *match = NULL;
> + int processed = 0;
> +
> + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
> + symbol_addr_hash(&sym->addr)) {
> + if (match == sym)
> + continue; /* Already processed */
> +
> + if (match->addr.section == sym->addr.section &&
> + match->addr.address == sym->addr.address) {
> + check(func(match, data));
> + ++processed;
> + }
> + }
> +
> + return processed;
> +}
> +
> +static int for_each(const char *name, bool name_only, symbol_callback_t func,
> + void *data)
> {
> struct hlist_node *tmp;
> struct symbol *match;
> @@ -23,9 +55,13 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
> if (strcmp(match->name, name))
> continue;
>
> + /* Call func for the match, and all address matches */
> if (func)
> check(func(match, data));
>
> + if (!name_only && match->addr.section != SHN_UNDEF)
> + return checkp(__for_each_addr(match, func, data)) + 1;
> +
> return 1;
> }
>
> @@ -34,7 +70,7 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>
> static bool is_exported(const char *name)
> {
> - return checkp(for_each(name, NULL, NULL)) > 0;
> + return checkp(for_each(name, true, NULL, NULL)) > 0;
> }
>
> int symbol_read_exports(FILE *file)
> @@ -57,13 +93,14 @@ int symbol_read_exports(FILE *file)
> if (is_exported(name))
> continue; /* Ignore duplicates */
>
> - sym = malloc(sizeof(struct symbol));
> + sym = calloc(1, sizeof(struct symbol));
> if (!sym) {
> - error("malloc failed");
> + error("calloc failed");
> return -1;
> }
>
> sym->name = name;
> + sym->addr.section = SHN_UNDEF;
> name = NULL;
>
> hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> @@ -91,6 +128,120 @@ struct symbol *symbol_get(const char *name)
> {
> struct symbol *sym = NULL;
>
> - for_each(name, get_symbol, &sym);
> + for_each(name, false, get_symbol, &sym);
> return sym;
> }
> +
> +typedef int (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
> + Elf32_Word xndx, void *arg);
> +
> +static int elf_for_each_symbol(int fd, elf_symbol_callback_t func, void *arg)
> +{
> + size_t sym_size;
> + GElf_Shdr shdr_mem;
> + GElf_Shdr *shdr;
> + Elf_Data *xndx_data = NULL;
> + Elf_Scn *scn;
> + Elf *elf;
> +
> + if (elf_version(EV_CURRENT) != EV_CURRENT) {
> + error("elf_version failed: %s", elf_errmsg(-1));
> + return -1;
> + }
> +
> + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
> + if (!elf) {
> + error("elf_begin failed: %s", elf_errmsg(-1));
> + return -1;
> + }
> +
> + sym_size = gelf_getclass(elf) == ELFCLASS32 ? sizeof(Elf32_Sym) :
> + sizeof(Elf64_Sym);
> +
> + scn = elf_nextscn(elf, NULL);
> +
> + while (scn) {
> + shdr = gelf_getshdr(scn, &shdr_mem);
> +
> + if (shdr && shdr->sh_type == SHT_SYMTAB_SHNDX) {
> + xndx_data = elf_getdata(scn, NULL);
> + break;
> + }
> +
> + scn = elf_nextscn(elf, scn);
> + }
> +
> + scn = elf_nextscn(elf, NULL);
> +
> + while (scn) {
> + shdr = gelf_getshdr(scn, &shdr_mem);
> +
> + if (shdr && shdr->sh_type == SHT_SYMTAB) {
> + Elf_Data *data = elf_getdata(scn, NULL);
> + unsigned int nsyms = data->d_size / sym_size;
I think strictly speaking this should be:
size_t nsyms = shdr->sh_size / shdr->sh_entsize;
.. and the code could check that shdr->sh_entsize is same as what
gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT) returns.
> + unsigned int n;
> +
> + for (n = 0; n < nsyms; ++n) {
The first symbol in the symbol table is always undefined, the loop can
start from 1.
Alternatively, since elf_for_each_symbol() ends up in the entire series
being used only with process_symbol() which skips symbols with the local
binding, the function could be renamed to elf_for_each_global_symbol()
and start the loop from shdr->sh_info.
> + const char *name = NULL;
> + Elf32_Word xndx = 0;
> + GElf_Sym sym_mem;
> + GElf_Sym *sym;
> +
> + sym = gelf_getsymshndx(data, xndx_data, n,
> + &sym_mem, &xndx);
> +
> + if (sym->st_shndx != SHN_XINDEX)
> + xndx = sym->st_shndx;
> +
> + name = elf_strptr(elf, shdr->sh_link,
> + sym->st_name);
> +
> + /* Skip empty symbol names */
> + if (name && *name &&
> + checkp(func(name, sym, xndx, arg)) > 0)
> + break;
> + }
> + }
> +
> + scn = elf_nextscn(elf, scn);
> + }
> +
> + return check(elf_end(elf));
> +}
> +
> +static int set_symbol_addr(struct symbol *sym, void *arg)
> +{
> + struct symbol_addr *addr = arg;
> +
> + if (sym->addr.section == SHN_UNDEF) {
> + sym->addr.section = addr->section;
> + sym->addr.address = addr->address;
> + hash_add(symbol_addrs, &sym->addr_hash,
> + symbol_addr_hash(&sym->addr));
> +
> + debug("%s -> { %u, %lx }", sym->name, sym->addr.section,
> + sym->addr.address);
> + } else {
> + warn("multiple addresses for symbol %s?", sym->name);
> + }
> +
> + return 0;
> +}
> +
> +static int process_symbol(const char *name, GElf_Sym *sym, Elf32_Word xndx,
> + void *arg)
> +{
> + struct symbol_addr addr = { .section = xndx, .address = sym->st_value };
> +
> + /* Set addresses for exported symbols */
> + if (GELF_ST_BIND(sym->st_info) != STB_LOCAL &&
> + addr.section != SHN_UNDEF)
> + checkp(for_each(name, true, set_symbol_addr, &addr));
> +
> + return 0;
> +}
> +
> +int symbol_read_symtab(int fd)
> +{
> + return elf_for_each_symbol(fd, process_symbol, NULL);
> +}
--
Thanks,
Petr
next prev parent reply other threads:[~2024-08-27 12:40 UTC|newest]
Thread overview: 105+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-15 17:39 [PATCH v2 00/19] Implement DWARF modversions Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 01/19] tools: Add gendwarfksyms Sami Tolvanen
2024-08-16 7:14 ` Greg Kroah-Hartman
2024-08-27 16:44 ` Sami Tolvanen
2024-08-26 17:41 ` Petr Pavlu
2024-08-26 18:47 ` Sami Tolvanen
2024-08-28 12:31 ` Petr Pavlu
2024-08-28 21:28 ` Sami Tolvanen
2024-08-28 17:45 ` Masahiro Yamada
2024-08-28 21:32 ` Sami Tolvanen
2024-09-05 2:29 ` Masahiro Yamada
2024-09-05 20:52 ` Sami Tolvanen
2024-09-10 9:43 ` Masahiro Yamada
2024-09-10 21:09 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 02/19] gendwarfksyms: Add symbol list handling Sami Tolvanen
2024-08-27 9:16 ` Petr Pavlu
2024-08-27 18:47 ` Sami Tolvanen
2024-08-28 12:35 ` Petr Pavlu
2024-08-28 23:09 ` Sami Tolvanen
2024-09-02 9:52 ` Petr Pavlu
2024-08-28 18:16 ` Masahiro Yamada
2024-08-28 21:50 ` Sami Tolvanen
2024-09-01 10:59 ` Masahiro Yamada
2024-09-04 20:51 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 03/19] gendwarfksyms: Add address matching Sami Tolvanen
2024-08-27 12:40 ` Petr Pavlu [this message]
2024-08-27 21:28 ` Sami Tolvanen
2024-08-28 18:22 ` Masahiro Yamada
2024-08-28 21:56 ` Sami Tolvanen
2024-09-01 11:10 ` Masahiro Yamada
2024-09-04 20:48 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 04/19] gendwarfksyms: Add support for type pointers Sami Tolvanen
2024-08-28 6:50 ` Masahiro Yamada
2024-08-28 7:15 ` Masahiro Yamada
2024-08-28 21:58 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 05/19] gendwarfksyms: Expand base_type Sami Tolvanen
2024-08-28 12:46 ` Petr Pavlu
2024-08-28 22:19 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 06/19] gendwarfksyms: Add a cache for processed DIEs Sami Tolvanen
2024-08-28 18:15 ` Masahiro Yamada
2024-08-28 22:27 ` Sami Tolvanen
2024-09-02 10:05 ` Petr Pavlu
2024-09-05 17:19 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 07/19] gendwarfksyms: Expand type modifiers and typedefs Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 08/19] gendwarfksyms: Expand subroutine_type Sami Tolvanen
2024-09-03 15:11 ` Petr Pavlu
2024-09-05 17:22 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 09/19] gendwarfksyms: Expand array_type Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 10/19] gendwarfksyms: Expand structure types Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 11/19] gendwarfksyms: Limit structure expansion Sami Tolvanen
2024-09-03 15:15 ` Petr Pavlu
2024-09-05 18:15 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 12/19] gendwarfksyms: Add die_map debugging Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 13/19] gendwarfksyms: Add symtypes output Sami Tolvanen
2024-09-10 14:58 ` Petr Pavlu
2024-09-10 21:15 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 14/19] gendwarfksyms: Add symbol versioning Sami Tolvanen
2024-09-11 10:08 ` Petr Pavlu
2024-09-11 16:03 ` Sami Tolvanen
2024-09-12 10:28 ` Petr Pavlu
2024-08-15 17:39 ` [PATCH v2 15/19] gendwarfksyms: Add support for declaration-only data structures Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 16/19] gendwarfksyms: Add support for reserved structure fields Sami Tolvanen
2024-08-16 7:20 ` Greg Kroah-Hartman
2024-08-16 15:50 ` Sami Tolvanen
2024-08-17 7:41 ` Greg Kroah-Hartman
2024-08-17 13:19 ` Benno Lossin
2024-08-19 18:25 ` Greg Kroah-Hartman
2024-08-19 21:46 ` Benno Lossin
2024-08-19 19:38 ` Sami Tolvanen
2024-08-19 22:16 ` Benno Lossin
2024-08-20 18:47 ` Sami Tolvanen
2024-08-20 20:03 ` Matthew Maurer
2024-08-21 11:31 ` Benno Lossin
2024-08-21 23:01 ` Sami Tolvanen
2024-08-21 23:29 ` Greg Kroah-Hartman
2024-08-22 5:55 ` Benno Lossin
2024-08-22 7:29 ` Greg Kroah-Hartman
2024-08-22 12:00 ` Benno Lossin
2024-08-22 23:53 ` Greg Kroah-Hartman
2024-08-23 19:17 ` Sami Tolvanen
2024-08-24 13:29 ` Benno Lossin
2024-08-24 13:27 ` Benno Lossin
2024-08-30 9:34 ` Miroslav Benes
2024-08-31 0:05 ` Sami Tolvanen
2024-09-11 11:43 ` Petr Pavlu
2024-09-12 16:06 ` Sami Tolvanen
2024-09-12 18:08 ` Benno Lossin
2024-09-12 20:58 ` Sami Tolvanen
2024-09-12 21:58 ` Benno Lossin
2024-09-12 22:37 ` Sami Tolvanen
2024-09-13 8:00 ` Benno Lossin
2024-08-15 17:39 ` [PATCH v2 17/19] export: Add __gendwarfksyms_ptr_ references to exported symbols Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 18/19] x86/asm-prototypes: Include <asm/ptrace.h> Sami Tolvanen
2024-09-01 10:50 ` Masahiro Yamada
2024-09-04 20:47 ` Sami Tolvanen
2024-08-15 17:39 ` [PATCH v2 19/19] kbuild: Add gendwarfksyms as an alternative to genksyms Sami Tolvanen
2024-08-15 20:13 ` [PATCH v2 00/19] Implement DWARF modversions Sedat Dilek
2024-08-15 20:47 ` Sami Tolvanen
2024-08-21 0:12 ` Sedat Dilek
2024-08-16 7:15 ` Greg Kroah-Hartman
2024-08-22 16:43 ` Jonathan Corbet
2024-08-22 17:57 ` Sami Tolvanen
2024-08-28 7:04 ` Masahiro Yamada
2024-08-28 22:53 ` Sami Tolvanen
2024-09-02 9:57 ` Petr Pavlu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d63ddefe-a6f6-4a5b-9330-11438fca8f9f@suse.com \
--to=petr.pavlu@suse.com \
--cc=alex.gaynor@gmail.com \
--cc=asahi@lists.linux.dev \
--cc=gary@garyguo.net \
--cc=gregkh@linuxfoundation.org \
--cc=j@jannau.net \
--cc=linux-kbuild@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-modules@vger.kernel.org \
--cc=marcan@marcan.st \
--cc=masahiroy@kernel.org \
--cc=mcgrof@kernel.org \
--cc=mmaurer@google.com \
--cc=neal@gompa.dev \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=samitolvanen@google.com \
--cc=wedsonaf@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox