public inbox for linux-kbuild@vger.kernel.org
 help / color / mirror / Atom feed
From: Petr Pavlu <petr.pavlu@suse.com>
To: Sami Tolvanen <samitolvanen@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>,
	Luis Chamberlain <mcgrof@kernel.org>,
	Miguel Ojeda <ojeda@kernel.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Matthew Maurer <mmaurer@google.com>,
	Alex Gaynor <alex.gaynor@gmail.com>,
	Wedson Almeida Filho <wedsonaf@gmail.com>,
	Gary Guo <gary@garyguo.net>, Petr Pavlu <petr.pavlu@suse.com>,
	Neal Gompa <neal@gompa.dev>, Hector Martin <marcan@marcan.st>,
	Janne Grunau <j@jannau.net>, Miroslav Benes <mbenes@suse.cz>,
	Asahi Linux <asahi@lists.linux.dev>,
	linux-kbuild@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-modules@vger.kernel.org, rust-for-linux@vger.kernel.org
Subject: Re: [PATCH v3 04/20] gendwarfksyms: Add address matching
Date: Tue, 1 Oct 2024 16:06:37 +0200	[thread overview]
Message-ID: <429b7310-3724-48a2-a8ac-e686c6945024@suse.com> (raw)
In-Reply-To: <20240923181846.549877-26-samitolvanen@google.com>

On 9/23/24 20:18, Sami Tolvanen wrote:
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/gendwarfksyms.c |   2 +
>  scripts/gendwarfksyms/gendwarfksyms.h |  13 +++
>  scripts/gendwarfksyms/symbols.c       | 153 +++++++++++++++++++++++++-
>  3 files changed, 165 insertions(+), 3 deletions(-)
> 
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index 096a334fa5b3..5032ec487626 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -105,6 +105,8 @@ int main(int argc, char **argv)
>  			return -1;
>  		}
>  
> +		symbol_read_symtab(fd);
> +
>  		dwfl = dwfl_begin(&callbacks);
>  		if (!dwfl) {
>  			error("dwfl_begin failed for '%s': %s", argv[n],
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 1a10d18f178e..a058647e2361 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -66,14 +66,27 @@ extern int dump_dies;
>   * symbols.c
>   */
>  
> +static inline unsigned int addr_hash(uintptr_t addr)
> +{
> +	return hash_ptr((const void *)addr);
> +}
> +
> +struct symbol_addr {
> +	uint32_t section;
> +	Elf64_Addr address;
> +};
> +
>  struct symbol {
>  	const char *name;
> +	struct symbol_addr addr;
> +	struct hlist_node addr_hash;
>  	struct hlist_node name_hash;
>  };
>  
>  typedef void (*symbol_callback_t)(struct symbol *, void *arg);
>  
>  void symbol_read_exports(FILE *file);
> +void symbol_read_symtab(int fd);
>  struct symbol *symbol_get(const char *name);
>  
>  /*
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index 1809be93d18c..d84b46675dd1 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -6,9 +6,41 @@
>  #include "gendwarfksyms.h"
>  
>  #define SYMBOL_HASH_BITS 15
> +
> +/* struct symbol_addr -> struct symbol */
> +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS);
> +/* name -> struct symbol */
>  static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
>  
> -static int for_each(const char *name, symbol_callback_t func, void *data)
> +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr)
> +{
> +	return hash_32(addr->section ^ addr_hash(addr->address));
> +}
> +
> +static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
> +			   void *data)
> +{
> +	struct hlist_node *tmp;
> +	struct symbol *match = NULL;
> +	int processed = 0;
> +
> +	hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
> +				    symbol_addr_hash(&sym->addr)) {
> +		if (match == sym)
> +			continue; /* Already processed */
> +
> +		if (match->addr.section == sym->addr.section &&
> +		    match->addr.address == sym->addr.address) {
> +			func(match, data);
> +			++processed;
> +		}
> +	}
> +
> +	return processed;
> +}
> +
> +static int for_each(const char *name, bool name_only, symbol_callback_t func,
> +		    void *data)
>  {
>  	struct hlist_node *tmp;
>  	struct symbol *match;
> @@ -21,9 +53,13 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>  		if (strcmp(match->name, name))
>  			continue;
>  
> +		/* Call func for the match, and all address matches */
>  		if (func)
>  			func(match, data);
>  
> +		if (!name_only && match->addr.section != SHN_UNDEF)
> +			return checkp(__for_each_addr(match, func, data)) + 1;
> +
>  		return 1;
>  	}
>  
> @@ -32,7 +68,7 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>  
>  static bool is_exported(const char *name)
>  {
> -	return checkp(for_each(name, NULL, NULL)) > 0;
> +	return checkp(for_each(name, true, NULL, NULL)) > 0;
>  }
>  
>  void symbol_read_exports(FILE *file)
> @@ -55,6 +91,7 @@ void symbol_read_exports(FILE *file)
>  
>  		sym = xcalloc(1, sizeof(struct symbol));
>  		sym->name = name;
> +		sym->addr.section = SHN_UNDEF;
>  
>  		hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
>  		++nsym;
> @@ -77,6 +114,116 @@ struct symbol *symbol_get(const char *name)
>  {
>  	struct symbol *sym = NULL;
>  
> -	for_each(name, get_symbol, &sym);
> +	for_each(name, false, get_symbol, &sym);
>  	return sym;
>  }

What is the reason that the for_each() call in symbol_get() is invoked
with name_only=false?

> +
> +typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
> +				      Elf32_Word xndx, void *arg);
> +
> +static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg)
> +{
> +	size_t sym_size;
> +	GElf_Shdr shdr_mem;
> +	GElf_Shdr *shdr;
> +	Elf_Data *xndx_data = NULL;
> +	Elf_Scn *scn;
> +	Elf *elf;
> +
> +	if (elf_version(EV_CURRENT) != EV_CURRENT)
> +		error("elf_version failed: %s", elf_errmsg(-1));
> +
> +	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
> +	if (!elf)
> +		error("elf_begin failed: %s", elf_errmsg(-1));
> +
> +	scn = elf_nextscn(elf, NULL);
> +
> +	while (scn) {
> +		shdr = gelf_getshdr(scn, &shdr_mem);
> +
> +		if (shdr && shdr->sh_type == SHT_SYMTAB_SHNDX) {
> +			xndx_data = elf_getdata(scn, NULL);
> +			break;
> +		}
> +
> +		scn = elf_nextscn(elf, scn);
> +	}
> +
> +	sym_size = gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT);
> +	scn = elf_nextscn(elf, NULL);
> +
> +	while (scn) {
> +		shdr = gelf_getshdr(scn, &shdr_mem);
> +
> +		if (shdr && shdr->sh_type == SHT_SYMTAB) {
> +			Elf_Data *data = elf_getdata(scn, NULL);
> +			unsigned int nsyms;
> +			unsigned int n;
> +
> +			if (shdr->sh_entsize != sym_size)
> +				error("expected sh_entsize (%lu) to be %zu",
> +				      shdr->sh_entsize, sym_size);
> +
> +			nsyms = shdr->sh_size / shdr->sh_entsize;
> +
> +			for (n = 1; n < nsyms; ++n) {
> +				const char *name = NULL;
> +				Elf32_Word xndx = 0;
> +				GElf_Sym sym_mem;
> +				GElf_Sym *sym;
> +
> +				sym = gelf_getsymshndx(data, xndx_data, n,
> +						       &sym_mem, &xndx);

Please check for sym==NULL in case the file is malformed, e.g.
.symtab_shndx is truncated.

> +
> +				if (GELF_ST_BIND(sym->st_info) == STB_LOCAL)
> +					continue;
> +
> +				if (sym->st_shndx != SHN_XINDEX)
> +					xndx = sym->st_shndx;
> +
> +				name = elf_strptr(elf, shdr->sh_link,
> +						  sym->st_name);
> +
> +				/* Skip empty symbol names */
> +				if (name && *name)
> +					func(name, sym, xndx, arg);
> +			}
> +		}
> +
> +		scn = elf_nextscn(elf, scn);
> +	}
> +
> +	check(elf_end(elf));
> +}
> +
> +static void set_symbol_addr(struct symbol *sym, void *arg)
> +{
> +	struct symbol_addr *addr = arg;
> +
> +	if (sym->addr.section == SHN_UNDEF) {
> +		sym->addr = *addr;
> +		hash_add(symbol_addrs, &sym->addr_hash,
> +			 symbol_addr_hash(&sym->addr));
> +
> +		debug("%s -> { %u, %lx }", sym->name, sym->addr.section,
> +		      sym->addr.address);
> +	} else {
> +		warn("multiple addresses for symbol %s?", sym->name);
> +	}
> +}
> +
> +static void elf_set_symbol_addr(const char *name, GElf_Sym *sym,
> +				Elf32_Word xndx, void *arg)
> +{
> +	struct symbol_addr addr = { .section = xndx, .address = sym->st_value };
> +
> +	/* Set addresses for exported symbols */
> +	if (addr.section != SHN_UNDEF)
> +		for_each(name, true, set_symbol_addr, &addr);
> +}
> +
> +void symbol_read_symtab(int fd)
> +{
> +	elf_for_each_global(fd, elf_set_symbol_addr, NULL);
> +}

-- 
Thanks,
Petr

  reply	other threads:[~2024-10-01 14:06 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-23 18:18 [PATCH v3 00/20] Implement DWARF modversions Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 01/20] scripts: import more list macros Sami Tolvanen
2024-10-06 17:36   ` Masahiro Yamada
2024-09-23 18:18 ` [PATCH v3 02/20] scripts: move genksyms crc32 implementation to a common include Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 03/20] tools: Add gendwarfksyms Sami Tolvanen
2024-10-01 14:04   ` Petr Pavlu
2024-10-01 19:59     ` Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 04/20] gendwarfksyms: Add address matching Sami Tolvanen
2024-10-01 14:06   ` Petr Pavlu [this message]
2024-10-01 20:06     ` Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 05/20] gendwarfksyms: Expand base_type Sami Tolvanen
2024-10-01 14:08   ` Petr Pavlu
2024-09-23 18:18 ` [PATCH v3 06/20] gendwarfksyms: Add a cache for processed DIEs Sami Tolvanen
2024-10-01 14:10   ` Petr Pavlu
2024-10-01 20:18     ` Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 07/20] gendwarfksyms: Expand type modifiers and typedefs Sami Tolvanen
2024-10-01 14:11   ` Petr Pavlu
2024-09-23 18:18 ` [PATCH v3 08/20] gendwarfksyms: Expand subroutine_type Sami Tolvanen
2024-10-01 14:12   ` Petr Pavlu
2024-09-23 18:18 ` [PATCH v3 09/20] gendwarfksyms: Expand array_type Sami Tolvanen
2024-10-01 14:13   ` Petr Pavlu
2024-09-23 18:18 ` [PATCH v3 10/20] gendwarfksyms: Expand structure types Sami Tolvanen
2024-10-01 14:16   ` Petr Pavlu
2024-10-01 21:20     ` Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 11/20] gendwarfksyms: Limit structure expansion Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 12/20] gendwarfksyms: Add die_map debugging Sami Tolvanen
2024-09-23 18:18 ` [PATCH v3 13/20] gendwarfksyms: Add symtypes output Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 14/20] gendwarfksyms: Add symbol versioning Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 15/20] gendwarfksyms: Add support for kABI rules Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 16/20] gendwarfksyms: Add support for reserved and ignored fields Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 17/20] gendwarfksyms: Add support for symbol type pointers Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 18/20] export: Add __gendwarfksyms_ptr_ references to exported symbols Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 19/20] kbuild: Add gendwarfksyms as an alternative to genksyms Sami Tolvanen
2024-09-23 18:19 ` [PATCH v3 20/20] Documentation/kbuild: Add DWARF module versioning Sami Tolvanen
2024-09-28 21:46 ` [PATCH v3 00/20] Implement DWARF modversions Neal Gompa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=429b7310-3724-48a2-a8ac-e686c6945024@suse.com \
    --to=petr.pavlu@suse.com \
    --cc=alex.gaynor@gmail.com \
    --cc=asahi@lists.linux.dev \
    --cc=gary@garyguo.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=j@jannau.net \
    --cc=linux-kbuild@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-modules@vger.kernel.org \
    --cc=marcan@marcan.st \
    --cc=masahiroy@kernel.org \
    --cc=mbenes@suse.cz \
    --cc=mcgrof@kernel.org \
    --cc=mmaurer@google.com \
    --cc=neal@gompa.dev \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=samitolvanen@google.com \
    --cc=wedsonaf@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox