public inbox for dwarves@vger.kernel.org
 help / color / mirror / Atom feed
From: Eduard Zingerman <eddyz87@gmail.com>
To: dwarves@vger.kernel.org, arnaldo.melo@gmail.com
Cc: bpf@vger.kernel.org, kernel-team@fb.com, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, yonghong.song@linux.dev,
	Eduard Zingerman <eddyz87@gmail.com>,
	Alan Maguire <alan.maguire@oracle.com>, Daniel Xu <dxu@dxuuu.xyz>,
	Jiri Olsa <olsajiri@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Vadim Fedorenko <vadfed@meta.com>,
	Vadim Fedorenko <vadim.fedorenko@linux.dev>
Subject: [PATCH dwarves v2 1/1] btf_encoder: handle .BTF_ids section endianness
Date: Fri, 22 Nov 2024 13:44:31 -0800	[thread overview]
Message-ID: <20241122214431.292196-2-eddyz87@gmail.com> (raw)
In-Reply-To: <20241122214431.292196-1-eddyz87@gmail.com>

btf_encoder__tag_kfuncs() reads .BTF_ids section to identify a set of
kfuncs present in the ELF file being processed.
This section consists of:
- arrays of uint32_t elements;
- arrays of records with the following structure:
  struct btf_id_and_flag {
      uint32_t id;
      uint32_t flags;
  };

When endianness of a binary operated by pahole differs from the host
system's endianness, these fields require byte-swapping before use.
Currently, this byte-swapping does not occur, resulting in kfuncs not
being marked with declaration tags.

This commit resolves the issue by introducing an endianness conversion
step for the .BTF_ids section data before the main processing stage.
Since the ELF file is opened in O_RDONLY mode, gelf_xlatetom()
cannot be used for endianness conversion.
Instead, a new type is introduced:

  struct local_elf_data {
	void *d_buf;
	size_t d_size;
	int64_t d_off;
	bool owns_buf;
  };

This structure is populated from the Elf_Data object representing
the .BTF_ids section. When byte-swapping is required, a local copy
of d_buf is created.

Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Daniel Xu <dxu@dxuuu.xyz>
Cc: Jiri Olsa <olsajiri@gmail.com>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Vadim Fedorenko <vadfed@meta.com>
Fixes: 72e88f29c6f7 ("pahole: Inject kfunc decl tags into BTF")
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 btf_encoder.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/btf_encoder.c b/btf_encoder.c
index e1adddf..06d4a61 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -33,6 +33,7 @@
 #include <stdint.h>
 #include <search.h> /* for tsearch(), tfind() and tdestroy() */
 #include <pthread.h>
+#include <byteswap.h>
 
 #define BTF_IDS_SECTION		".BTF_ids"
 #define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
@@ -145,6 +146,14 @@ struct btf_kfunc_set_range {
 	uint64_t end;
 };
 
+/* Like Elf_Data, but when there is a need to change the data read from ELF */
+struct local_elf_data {
+	void *d_buf;
+	size_t d_size;
+	int64_t d_off;
+	bool owns_buf;
+};
+
 static LIST_HEAD(encoders);
 static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -1681,7 +1690,8 @@ out:
 }
 
 /* Returns if `sym` points to a kfunc set */
-static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr)
+static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, struct local_elf_data *idlist,
+			    size_t idlist_addr)
 {
 	void *ptr = idlist->d_buf;
 	struct btf_id_set8 *set;
@@ -1847,13 +1857,52 @@ static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *
 	return 0;
 }
 
+/* If byte order of 'elf' differs from current byte order, convert the data->d_buf.
+ * ELF file is opened in a readonly mode, so data->d_buf cannot be modified in place.
+ * Instead, allocate a new buffer if modification is necessary.
+ */
+static int convert_idlist_endianness(Elf *elf, Elf_Data *src, struct local_elf_data *dst)
+{
+	int byteorder, i;
+	char *elf_ident;
+	uint32_t *tmp;
+
+	dst->d_size = src->d_size;
+	dst->d_off = src->d_off;
+	elf_ident = elf_getident(elf, NULL);
+	if (elf_ident == NULL) {
+		fprintf(stderr, "Cannot get ELF identification from header\n");
+		return -EINVAL;
+	}
+	byteorder = elf_ident[EI_DATA];
+	if ((BYTE_ORDER == LITTLE_ENDIAN && byteorder == ELFDATA2LSB)
+	    || (BYTE_ORDER == BIG_ENDIAN && byteorder == ELFDATA2MSB)) {
+		dst->d_buf = src->d_buf;
+		dst->owns_buf = false;
+		return 0;
+	}
+	tmp = malloc(src->d_size);
+	if (tmp == NULL) {
+		fprintf(stderr, "Cannot allocate %lu bytes of memory\n", src->d_size);
+		return -ENOMEM;
+	}
+	memcpy(tmp, src->d_buf, src->d_size);
+	dst->d_buf = tmp;
+	dst->owns_buf = true;
+
+	/* .BTF_ids sections consist of u32 objects */
+	for (i = 0; i < dst->d_size / sizeof(uint32_t); i++)
+		tmp[i] = bswap_32(tmp[i]);
+	return 0;
+}
+
 static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 {
 	const char *filename = encoder->source_filename;
 	struct gobuffer btf_kfunc_ranges = {};
+	struct local_elf_data idlist = {};
 	struct gobuffer btf_funcs = {};
 	Elf_Data *symbols = NULL;
-	Elf_Data *idlist = NULL;
 	Elf_Scn *symscn = NULL;
 	int symbols_shndx = -1;
 	size_t idlist_addr = 0;
@@ -1918,7 +1967,9 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
 			idlist_shndx = i;
 			idlist_addr = shdr.sh_addr;
-			idlist = data;
+			err = convert_idlist_endianness(elf, data, &idlist);
+			if (err < 0)
+				goto out;
 		}
 	}
 
@@ -1960,7 +2011,7 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 			continue;
 
 		name = elf_strptr(elf, strtabidx, sym.st_name);
-		if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr))
+		if (!is_sym_kfunc_set(&sym, name, &idlist, idlist_addr))
 			continue;
 
 		range.start = sym.st_value;
@@ -2003,13 +2054,13 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 			if (ranges[j].start <= addr && addr < ranges[j].end) {
 				found = true;
 				off = addr - idlist_addr;
-				if (off < 0 || off + sizeof(*pair) > idlist->d_size) {
+				if (off < 0 || off + sizeof(*pair) > idlist.d_size) {
 					fprintf(stderr, "%s: kfunc '%s' offset outside section '%s'\n",
 						__func__, func, BTF_IDS_SECTION);
 					free(func);
 					goto out;
 				}
-				pair = idlist->d_buf + off;
+				pair = idlist.d_buf + off;
 				break;
 			}
 		}
@@ -2031,6 +2082,8 @@ static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
 out:
 	__gobuffer__delete(&btf_funcs);
 	__gobuffer__delete(&btf_kfunc_ranges);
+	if (idlist.owns_buf)
+		free(idlist.d_buf);
 	if (elf)
 		elf_end(elf);
 	if (fd != -1)
-- 
2.47.0


  reply	other threads:[~2024-11-22 21:44 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-22 21:44 [PATCH dwarves v2 0/1] btf_encoder: handle .BTF_ids section endianness Eduard Zingerman
2024-11-22 21:44 ` Eduard Zingerman [this message]
2024-11-23 13:24   ` [PATCH dwarves v2 1/1] " Jiri Olsa
2024-11-26 16:25     ` Arnaldo Carvalho de Melo
2024-11-26 17:32       ` Eduard Zingerman
2024-11-26 19:02         ` Arnaldo Carvalho de Melo
2024-11-27  0:35           ` Eduard Zingerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241122214431.292196-2-eddyz87@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=alan.maguire@oracle.com \
    --cc=andrii@kernel.org \
    --cc=arnaldo.melo@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dwarves@vger.kernel.org \
    --cc=dxu@dxuuu.xyz \
    --cc=kernel-team@fb.com \
    --cc=memxor@gmail.com \
    --cc=olsajiri@gmail.com \
    --cc=vadfed@meta.com \
    --cc=vadim.fedorenko@linux.dev \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox