linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@redhat.com>, Ian Rogers <irogers@google.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org
Subject: Re: [PATCH 9/9] perf symbols: Get symbols for .plt.got for x86-64
Date: Mon, 30 Jan 2023 15:26:01 -0800	[thread overview]
Message-ID: <Y9hSCR6WvpI4b5Cm@google.com> (raw)
In-Reply-To: <20230127170222.9895-10-adrian.hunter@intel.com>

On Fri, Jan 27, 2023 at 07:02:22PM +0200, Adrian Hunter wrote:
> For x86_64, determine a symbol for .plt.got entries. That requires
> computing the target offset and finding that in .rela.dyn, which in
> turn means .rela.dyn needs to be sorted by offset.
> 
> Example:
> 
>   In this example, the GNU C Library is using .plt.got for malloc and
>   free.
> 
>   Before:
> 
>     $ gcc --version
>     gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
>     Copyright (C) 2021 Free Software Foundation, Inc.
>     This is free software; see the source for copying conditions.  There is NO
>     warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
>     $ perf record -e intel_pt//u uname
>     Linux
>     [ perf record: Woken up 1 times to write data ]
>     [ perf record: Captured and wrote 0.027 MB perf.data ]
>     $ perf script --itrace=be --ns -F-event,+addr,-period,-comm,-tid,-cpu > /tmp/cmp1.txt
> 
>   After:
> 
>     $ perf script --itrace=be --ns -F-event,+addr,-period,-comm,-tid,-cpu > /tmp/cmp2.txt
>     $ diff /tmp/cmp1.txt /tmp/cmp2.txt | head -12
>     15509,15510c15509,15510
>     < 27046.755390907:      7f0b2943e3ab _nl_normalize_codeset+0x5b (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b29428380 offset_0x28380@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     < 27046.755390907:      7f0b29428384 offset_0x28380@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b294a5120 malloc+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     ---
>     > 27046.755390907:      7f0b2943e3ab _nl_normalize_codeset+0x5b (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b29428380 malloc@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     > 27046.755390907:      7f0b29428384 malloc@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b294a5120 malloc+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     15821,15822c15821,15822
>     < 27046.755394865:      7f0b2943850c _nl_load_locale_from_archive+0x5bc (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b29428370 offset_0x28370@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     < 27046.755394865:      7f0b29428374 offset_0x28370@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b294a5460 cfree@GLIBC_2.2.5+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     ---
>     > 27046.755394865:      7f0b2943850c _nl_load_locale_from_archive+0x5bc (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b29428370 free@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
>     > 27046.755394865:      7f0b29428374 free@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) =>     7f0b294a5460 cfree@GLIBC_2.2.5+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6)
> 
> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
> ---
>  tools/perf/util/symbol-elf.c | 158 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 154 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> index 254116d40e59..4fc8e7fc10f4 100644
> --- a/tools/perf/util/symbol-elf.c
> +++ b/tools/perf/util/symbol-elf.c
> @@ -466,28 +466,178 @@ static bool machine_is_x86(GElf_Half e_machine)
>  	return e_machine == EM_386 || e_machine == EM_X86_64;
>  }
>  
> +struct rela_dyn {
> +	GElf_Addr	offset;
> +	u32		sym_idx;
> +};
> +
> +struct rela_dyn_info {
> +	struct dso	*dso;
> +	Elf_Data	*plt_got_data;
> +	u32		nr_entries;
> +	struct rela_dyn	*sorted;
> +	Elf_Data	*dynsym_data;
> +	Elf_Data	*dynstr_data;
> +	Elf_Data	*rela_dyn_data;
> +};
> +
> +static void exit_rela_dyn(struct rela_dyn_info *di)
> +{
> +	free(di->sorted);
> +}
> +
> +static int cmp_offset(const void *a, const void *b)
> +{
> +	const struct rela_dyn *va = a;
> +	const struct rela_dyn *vb = b;
> +
> +	return va->offset < vb->offset ? -1 : (va->offset > vb->offset ? 1 : 0);
> +}
> +
> +static int sort_rela_dyn(struct rela_dyn_info *di)
> +{
> +	u32 i, n;
> +
> +	di->sorted = calloc(di->nr_entries, sizeof(di->sorted[0]));
> +	if (!di->sorted)
> +		return -1;
> +
> +	/* Get data for sorting: the offset and symbol index */
> +	for (i = 0, n = 0; i < di->nr_entries; i++) {
> +		GElf_Rela rela;
> +		u32 sym_idx;
> +
> +		gelf_getrela(di->rela_dyn_data, i, &rela);
> +		sym_idx = GELF_R_SYM(rela.r_info);
> +		if (sym_idx) {
> +			di->sorted[n].sym_idx = sym_idx;
> +			di->sorted[n].offset = rela.r_offset;
> +			n += 1;
> +		}
> +	}
> +
> +	/* Sort by offset */
> +	di->nr_entries = n;
> +	qsort(di->sorted, n, sizeof(di->sorted[0]), cmp_offset);
> +
> +	return 0;
> +}
> +
> +static void get_rela_dyn_info(Elf *elf, GElf_Ehdr *ehdr, struct rela_dyn_info *di, Elf_Scn *scn)
> +{
> +	GElf_Shdr rela_dyn_shdr;
> +	GElf_Shdr shdr;
> +
> +	di->plt_got_data = elf_getdata(scn, NULL);
> +
> +	scn = elf_section_by_name(elf, ehdr, &rela_dyn_shdr, ".rela.dyn", NULL);
> +	if (!scn || !rela_dyn_shdr.sh_link || !rela_dyn_shdr.sh_entsize)
> +		return;
> +
> +	di->nr_entries = rela_dyn_shdr.sh_size / rela_dyn_shdr.sh_entsize;
> +	di->rela_dyn_data = elf_getdata(scn, NULL);
> +
> +	scn = elf_getscn(elf, rela_dyn_shdr.sh_link);
> +	if (!scn || !gelf_getshdr(scn, &shdr) || !shdr.sh_link)
> +		return;
> +
> +	di->dynsym_data = elf_getdata(scn, NULL);
> +	di->dynstr_data = elf_getdata(elf_getscn(elf, shdr.sh_link), NULL);
> +
> +	if (!di->plt_got_data || !di->dynstr_data || !di->dynsym_data || !di->rela_dyn_data)
> +		return;
> +
> +	/* Sort into offset order */
> +	sort_rela_dyn(di);
> +}
> +
> +/* Get instruction displacement from a plt entry for x86_64 */
> +static u32 get_x86_64_plt_disp(const u8 *p)
> +{
> +	u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa};
> +	int n = 0;
> +
> +	/* Skip endbr64 */
> +	if (!memcmp(p, endbr64, sizeof(endbr64)))
> +		n += sizeof(endbr64);
> +	/* Skip bnd prefix */
> +	if (p[n] == 0xf2)
> +		n += 1;
> +	/* jmp with 4-byte displacement */
> +	if (p[n] == 0xff && p[n + 1] == 0x25) {
> +		n += 2;
> +		/* Also add offset from start of entry to end of instruction */
> +		return n + 4 + le32toh(*(const u32 *)(p + n));
> +	}
> +	return 0;
> +}
> +
> +static bool get_plt_got_name(GElf_Shdr *shdr, size_t i,
> +			     struct rela_dyn_info *di,
> +			     char *buf, size_t buf_sz)
> +{
> +	void *p = di->plt_got_data->d_buf + i;
> +	u32 disp = get_x86_64_plt_disp(p);
> +	struct rela_dyn vi, *vr;
> +	const char *sym_name;
> +	char *demangled;
> +	GElf_Sym sym;
> +
> +	if (!di->sorted || !disp)
> +		return false;
> +
> +	/* Compute target offset of the .plt.got entry */
> +	vi.offset = shdr->sh_offset + di->plt_got_data->d_off + i + disp;
> +
> +	/* Find that offset in .rela.dyn (sorted by offset) */
> +	vr = bsearch(&vi, di->sorted, di->nr_entries, sizeof(di->sorted[0]), cmp_offset);
> +	if (!vr)
> +		return false;
> +
> +	/* Get the associated symbol */
> +	gelf_getsym(di->dynsym_data, vr->sym_idx, &sym);
> +	sym_name = elf_sym__name(&sym, di->dynstr_data);
> +	demangled = demangle_sym(di->dso, 0, sym_name);
> +	if (demangled != NULL)
> +		sym_name = demangled;
> +
> +	snprintf(buf, buf_sz, "%s@plt", sym_name);
> +
> +	free(demangled);
> +
> +	return *sym_name;
> +}
> +
>  static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
>  					   GElf_Ehdr *ehdr,
>  					   char *buf, size_t buf_sz)
>  {
> +	struct rela_dyn_info di = { .dso = dso };
>  	struct symbol *sym;
>  	GElf_Shdr shdr;
>  	Elf_Scn *scn;
> +	int err = -1;
>  	size_t i;
>  
>  	scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL);
>  	if (!scn || !shdr.sh_entsize)
>  		return 0;
>  
> +	if (ehdr->e_machine == EM_X86_64)
> +		get_rela_dyn_info(elf, ehdr, &di, scn);

What about EM_386?  Now I'm seeing segfaults on 32 bit test programs
with .plt.got section.

Thanks,
Namhyung


> +
>  	for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) {
> -		snprintf(buf, buf_sz, "offset_%#zx@plt", shdr.sh_offset + i);
> +		if (!get_plt_got_name(&shdr, i, &di, buf, buf_sz))
> +			snprintf(buf, buf_sz, "offset_%#zx@plt", shdr.sh_offset + i);
>  		sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf);
>  		if (!sym)
> -			return -1;
> +			goto out;
>  		symbols__insert(&dso->symbols, sym);
>  	}
> -
> -	return 0;
> +	err = 0;
> +out:
> +	exit_rela_dyn(&di);
> +	return err;
>  }
>  
>  /*
> -- 
> 2.34.1
> 
> 

  reply	other threads:[~2023-01-30 23:26 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-27 17:02 [PATCH 0/9] perf symbols: Improve dso__synthesize_plt_symbols() for x86 Adrian Hunter
2023-01-27 17:02 ` [PATCH 1/9] perf symbols: Correct plt entry sizes " Adrian Hunter
2023-01-27 17:02 ` [PATCH 2/9] perf symbols: Add support for x86 .plt.sec Adrian Hunter
2023-01-30 17:34   ` Namhyung Kim
2023-01-30 18:35     ` Adrian Hunter
2023-01-30 22:22       ` Namhyung Kim
2023-01-31 10:14         ` Adrian Hunter
2023-01-27 17:02 ` [PATCH 3/9] perf symbols: Sort plt relocations for x86 Adrian Hunter
2023-01-27 17:02 ` [PATCH 4/9] perf symbols: Record whether a symbol is an alias for an IFUNC symbol Adrian Hunter
2023-01-27 17:02 ` [PATCH 5/9] perf symbols: Add support for IFUNC symbols for x86_64 Adrian Hunter
2023-01-27 17:02 ` [PATCH 6/9] perf symbols: Allow for .plt without header Adrian Hunter
2023-01-27 17:02 ` [PATCH 7/9] perf symbols: Allow for static executables with .plt Adrian Hunter
2023-01-27 17:02 ` [PATCH 8/9] perf symbols: Start adding support for .plt.got for x86 Adrian Hunter
2023-01-27 17:02 ` [PATCH 9/9] perf symbols: Get symbols for .plt.got for x86-64 Adrian Hunter
2023-01-30 23:26   ` Namhyung Kim [this message]
2023-01-31 10:17     ` Adrian Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y9hSCR6WvpI4b5Cm@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=irogers@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).