All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiri Olsa <olsajiri@gmail.com>
To: Daniel Xu <dxu@dxuuu.xyz>
Cc: acme@kernel.org, quentin@isovalent.com,
	andrii.nakryiko@gmail.com, ast@kernel.org, daniel@iogearbox.net,
	bpf@vger.kernel.org
Subject: Re: [PATCH dwarves v3] pahole: Inject kfunc decl tags into BTF
Date: Wed, 31 Jan 2024 11:17:23 +0100	[thread overview]
Message-ID: <ZboeMyIvGChjaBLY@krava> (raw)
In-Reply-To: <0f25134ec999e368478c4ca993b3b729c2a03383.1706491733.git.dxu@dxuuu.xyz>

On Sun, Jan 28, 2024 at 06:30:19PM -0700, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> 
> Example of encoding:
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l
>         121
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337
>         [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static
>         [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1
> 
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> 
> ---
> Changes from v2:
> * More reliably detect kfunc membership in set8 by tracking set addr ranges
> * Rename some variables/functions to be more clear about kfunc vs func
> 
> Changes from v1:
> * Fix resource leaks
> * Fix callee -> caller typo
> * Rename btf_decl_tag from kfunc -> bpf_kfunc
> * Only grab btf_id_set funcs tagged kfunc
> * Presort btf func list
> 
>  btf_encoder.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 347 insertions(+)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index fd04008..4f742b1 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -34,6 +34,11 @@
>  #include <pthread.h>
>  
>  #define BTF_ENCODER_MAX_PROTO	512
> +#define BTF_IDS_SECTION		".BTF_ids"
> +#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
> +#define BTF_ID_SET8_PFX		"__BTF_ID__set8__"
> +#define BTF_SET8_KFUNCS		(1 << 0)
> +#define BTF_KFUNC_TYPE_TAG	"bpf_kfunc"
>  
>  /* state used to do later encoding of saved functions */
>  struct btf_encoder_state {
> @@ -79,6 +84,7 @@ struct btf_encoder {
>  			  gen_floats,
>  			  is_rel;
>  	uint32_t	  array_index_id;
> +	struct gobuffer   btf_funcs;

why does this need to be stored in encoder?

>  	struct {
>  		struct var_info vars[MAX_PERCPU_VAR_CNT];
>  		int		var_cnt;
> @@ -94,6 +100,17 @@ struct btf_encoder {
>  	} functions;
>  };
>  

SNIP

> +/* Returns if `sym` points to a kfunc set */
> +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr)
> +{
> +	int *ptr = idlist->d_buf;
> +	int idx, flags;
> +	bool is_set8;
> +
> +	/* kfuncs are only found in BTF_SET8's */
> +	is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1);
> +	if (!is_set8)
> +		return false;
> +
> +	idx = sym->st_value - idlist_addr;
> +	if (idx >= idlist->d_size) {
> +		fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name);
> +		return false;
> +	}
> +
> +	/* Check the set8 flags to see if it was marked as kfunc */
> +	idx = idx / sizeof(int);
> +	flags = ptr[idx + 1];
> +	return flags & BTF_SET8_KFUNCS;

I wonder it'd be easier to read/follow if we bring struct btf_id_set8
declaration in here and use it to get the flags field

> +}
> +
> +/*
> + * Parse BTF_ID symbol and return the func name.
> + *
> + * Returns:
> + *	Caller-owned string containing func name if successful.
> + *	NULL if !func or on error.
> + */
> +

SNIP

> +	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
> +	if (symbols_shndx == -1 || idlist_shndx == -1) {
> +		err = 0;
> +		goto out;
> +	}
> +
> +	if (!gelf_getshdr(symscn, &shdr)) {
> +		elf_error("Failed to get ELF symbol table header");
> +		goto out;
> +	}
> +	nr_syms = shdr.sh_size / shdr.sh_entsize;
> +
> +	err = btf_encoder__collect_btf_funcs(encoder);
> +	if (err) {
> +		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
> +		goto out;
> +	}
> +
> +	/* First collect all kfunc set ranges.
> +	 *
> +	 * Note we choose not to sort these ranges and accept a linear
> +	 * search when doing lookups. Reasoning is that the number of
> +	 * sets is ~O(100) and not worth the additional code to optimize.
> +	 */

I think we could event add gobuffer interface/support to sort and search
quickly the data (we use it in other place), but that can be done as follow
up when it will become a problem as you pointed out

> +	for (i = 0; i < nr_syms; i++) {
> +		struct btf_kfunc_set_range range = {};
> +		const char *name;
> +		GElf_Sym sym;
> +
> +		if (!gelf_getsym(symbols, i, &sym)) {
> +			elf_error("Failed to get ELF symbol(%d)", i);
> +			goto out;
> +		}
> +
> +		if (sym.st_shndx != idlist_shndx)
> +			continue;
> +
> +		name = elf_strptr(elf, strtabidx, sym.st_name);
> +		if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr))
> +			continue;
> +
> +		range.start = sym.st_value;
> +		range.end = sym.st_value + sym.st_size;
> +		gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range));
> +	}
> +
> +	/* Now inject BTF with kfunc decl tag for detected kfuncs */
> +	for (i = 0; i < nr_syms; i++) {
> +		const struct btf_kfunc_set_range *ranges;
> +		unsigned int ranges_cnt;
> +		char *func, *name;
> +		GElf_Sym sym;
> +		bool found;
> +		int err;
> +		int j;
> +
> +		if (!gelf_getsym(symbols, i, &sym)) {
> +			elf_error("Failed to get ELF symbol(%d)", i);
> +			goto out;
> +		}
> +
> +		if (sym.st_shndx != idlist_shndx)
> +			continue;
> +
> +		name = elf_strptr(elf, strtabidx, sym.st_name);
> +		func = get_func_name(name);
> +		if (!func)
> +			continue;
> +
> +		/* Check if function belongs to a kfunc set */
> +		ranges = gobuffer__entries(&btf_kfunc_ranges);
> +		ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges);
> +		found = false;
> +		for (j = 0; j < ranges_cnt; j++) {
> +			size_t addr = sym.st_value;

missing newline after declaration

> +			if (ranges[j].start <= addr && addr < ranges[j].end) {
> +				found = true;
> +				break;
> +			}
> +		}
> +		if (!found)

leaking func

jirka

> +			continue;
> +
> +		err = btf_encoder__tag_kfunc(encoder, func);
> +		if (err) {
> +			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
> +			free(func);
> +			goto out;
> +		}
> +		free(func);
> +	}
> +
> +	err = 0;
> +out:
> +	__gobuffer__delete(&btf_kfunc_ranges);
> +	if (elf)
> +		elf_end(elf);
> +	if (fd != -1)
> +		close(fd);
> +	return err;
> +}
> +
>  int btf_encoder__encode(struct btf_encoder *encoder)
>  {
>  	int err;
> @@ -1366,6 +1704,14 @@ int btf_encoder__encode(struct btf_encoder *encoder)
>  	if (btf__type_cnt(encoder->btf) == 1)
>  		return 0;
>  
> +	/* Note vmlinux may already contain btf_decl_tag's for kfuncs. So
> +	 * take care to call this before btf_dedup().
> +	 */
> +	if (btf_encoder__tag_kfuncs(encoder)) {
> +		fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__);
> +		return -1;
> +	}
> +
>  	if (btf__dedup(encoder->btf, NULL)) {
>  		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
>  		return -1;
> @@ -1712,6 +2058,7 @@ void btf_encoder__delete(struct btf_encoder *encoder)
>  
>  	btf_encoders__delete(encoder);
>  	__gobuffer__delete(&encoder->percpu_secinfo);
> +	__gobuffer__delete(&encoder->btf_funcs);
>  	zfree(&encoder->filename);
>  	btf__free(encoder->btf);
>  	encoder->btf = NULL;
> -- 
> 2.42.1
> 

  parent reply	other threads:[~2024-01-31 10:17 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-29  1:30 [PATCH dwarves v3] pahole: Inject kfunc decl tags into BTF Daniel Xu
2024-01-29 13:05 ` Alan Maguire
2024-01-29 14:24   ` Arnaldo Carvalho de Melo
2024-01-29 16:15     ` Daniel Xu
2024-01-29 18:12     ` Alan Maguire
2024-01-31 10:17 ` Jiri Olsa [this message]
2024-02-04 18:08   ` Daniel Xu
2024-02-06 12:50     ` Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZboeMyIvGChjaBLY@krava \
    --to=olsajiri@gmail.com \
    --cc=acme@kernel.org \
    --cc=andrii.nakryiko@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dxu@dxuuu.xyz \
    --cc=quentin@isovalent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.