linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
@ 2025-10-13 13:15 pengdonglin
  2025-10-13 23:40 ` Andrii Nakryiko
  0 siblings, 1 reply; 14+ messages in thread
From: pengdonglin @ 2025-10-13 13:15 UTC (permalink / raw)
  To: andrii
  Cc: linux-kernel, linux-trace-kernel, bpf, pengdonglin,
	Eduard Zingerman, Alexei Starovoitov, Andrii Nakryiko, Song Liu,
	Masami Hiramatsu, Steven Rostedt, pengdonglin

From: pengdonglin <pengdonglin@xiaomi.com>

Currently, when the funcgraph-args feature is in use, the
btf_find_by_name_kind function is invoked quite frequently. However,
this function only supports linear search. When the number of btf_type
entries to search through is large, such as in the vmlinux BTF which
contains over 80,000 named btf_types, it consumes a significant amount
of time.

This patch optimizes the btf_find_by_name_kind lookup by sorting BTF
types according to their names and kinds. Additionally, it modifies
the search direction. Now, it first searches the BTF and then its base.

It should be noted that this change incurs some additional memory and
boot-time overhead. Therefore, the option is disabled by default.

Here is a test case:

 # echo 1 > options/funcgraph-args
 # echo function_graph > current_tracer

Before:
 # time cat trace | wc -l
 124176

 real    0m16.154s
 user    0m0.000s
 sys     0m15.962s

After:
 # time cat trace | wc -l
 124176

 real    0m0.948s
 user    0m0.000s
 sys     0m0.973s

An improvement of more than 20 times can be observed.

Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Song Liu <song@kernel.org>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
Signed-off-by: pengdonglin <dolinux.peng@gmail.com>
---
 include/linux/btf.h |   1 +
 kernel/bpf/Kconfig  |  13 ++++
 kernel/bpf/btf.c    | 160 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 165 insertions(+), 9 deletions(-)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f06976ffb63f..ddc53a7ac7cd 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -220,6 +220,7 @@ bool btf_is_module(const struct btf *btf);
 bool btf_is_vmlinux(const struct btf *btf);
 struct module *btf_try_get_module(const struct btf *btf);
 u32 btf_nr_types(const struct btf *btf);
+u32 btf_type_cnt(const struct btf *btf);
 struct btf *btf_base_btf(const struct btf *btf);
 bool btf_type_is_i32(const struct btf_type *t);
 bool btf_type_is_i64(const struct btf_type *t);
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index eb3de35734f0..01d9d766c1dc 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -101,4 +101,17 @@ config BPF_LSM
 
 	  If you are unsure how to answer this question, answer N.
 
+config BPF_SORT_BTF_BY_NAME_KIND
+	bool "Sort BTF type by name and kind"
+	depends on DEBUG_INFO_BTF
+	default n
+	help
+	  Sort BTF types by name and kind to enable binary search, improving
+	  the performance of btf_find_by_name_kind. Currently applies to
+	  vmlinux and kernel module BTFs. Note that this option introduces
+	  extra memory and boot-time overhead.
+
+	  For instance, a BTF file with 80,000 named btf_types consumes
+	  approximately 312 KB of additional memory.
+
 endmenu # "BPF subsystem"
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0de8fc8a0e0b..aed7349e30b8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -250,6 +250,11 @@ struct btf_struct_ops_tab {
 	struct bpf_struct_ops_desc ops[];
 };
 
+struct btf_sorted_ids {
+	u32 cnt;
+	u32 ids[];
+};
+
 struct btf {
 	void *data;
 	struct btf_type **types;
@@ -268,6 +273,9 @@ struct btf {
 	struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
 	struct btf_struct_metas *struct_meta_tab;
 	struct btf_struct_ops_tab *struct_ops_tab;
+#ifdef CONFIG_BPF_SORT_BTF_BY_NAME_KIND
+	struct btf_sorted_ids *sorted_ids;
+#endif
 
 	/* split BTF support */
 	struct btf *base_btf;
@@ -470,6 +478,9 @@ static int btf_resolve(struct btf_verifier_env *env,
 static int btf_func_check(struct btf_verifier_env *env,
 			  const struct btf_type *t);
 
+static int cmp_name_kind(const char *sa, u8 ka,
+			 const char *sb, u8 kb);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
 	/* Some of them is not strictly a C modifier
@@ -544,22 +555,59 @@ u32 btf_nr_types(const struct btf *btf)
 	return total;
 }
 
+u32 btf_type_cnt(const struct btf *btf)
+{
+	return btf->start_id + btf->nr_types;
+}
+
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
 {
 	const struct btf_type *t;
+	struct btf_sorted_ids *sorted_ids = NULL;
 	const char *tname;
 	u32 i, total;
 
-	total = btf_nr_types(btf);
-	for (i = 1; i < total; i++) {
-		t = btf_type_by_id(btf, i);
-		if (BTF_INFO_KIND(t->info) != kind)
-			continue;
+	do {
+#ifdef CONFIG_BPF_SORT_BTF_BY_NAME_KIND
+		sorted_ids = btf->sorted_ids;
+#endif
+		if (sorted_ids) {
+			/* binary search */
+			u32 start, end, mid;
+			u32 *ids = sorted_ids->ids;
+			int ret;
+
+			start = 0;
+			end = sorted_ids->cnt - 1;
+			while (start <= end) {
+				mid = start + (end - start) / 2;
+				t = btf_type_by_id(btf, ids[mid]);
+				tname = btf_name_by_offset(btf, t->name_off);
+				ret = cmp_name_kind(tname, BTF_INFO_KIND(t->info),
+						    name, kind);
+				if (ret < 0)
+					start = mid + 1;
+				else if (ret > 0)
+					end = mid - 1;
+				else
+					return ids[mid];
+			}
+		} else {
+			/* linear search */
+			total = btf_type_cnt(btf);
+			for (i = btf->start_id; i < total; i++) {
+				t = btf_type_by_id(btf, i);
+				if (BTF_INFO_KIND(t->info) != kind)
+					continue;
+
+				tname = btf_name_by_offset(btf, t->name_off);
+				if (!strcmp(tname, name))
+					return i;
+			}
+		}
 
-		tname = btf_name_by_offset(btf, t->name_off);
-		if (!strcmp(tname, name))
-			return i;
-	}
+		btf = btf->base_btf;
+	} while (btf);
 
 	return -ENOENT;
 }
@@ -1737,12 +1785,29 @@ static void btf_free_struct_ops_tab(struct btf *btf)
 	btf->struct_ops_tab = NULL;
 }
 
+#ifdef CONFIG_BPF_SORT_BTF_BY_NAME_KIND
+static void btf_free_sorted_ids(struct btf *btf)
+{
+	struct btf_sorted_ids *sorted_ids = btf->sorted_ids;
+
+	if (!sorted_ids)
+		return;
+
+	kvfree(sorted_ids);
+	btf->sorted_ids = NULL;
+}
+#else
+static void btf_free_sorted_ids(struct btf *btf)
+{}
+#endif
+
 static void btf_free(struct btf *btf)
 {
 	btf_free_struct_meta_tab(btf);
 	btf_free_dtor_kfunc_tab(btf);
 	btf_free_kfunc_set_tab(btf);
 	btf_free_struct_ops_tab(btf);
+	btf_free_sorted_ids(btf);
 	kvfree(btf->types);
 	kvfree(btf->resolved_sizes);
 	kvfree(btf->resolved_ids);
@@ -6189,6 +6254,81 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
 	return kctx_type_id;
 }
 
+#ifdef CONFIG_BPF_SORT_BTF_BY_NAME_KIND
+static int cmp_name_kind(const char *sa, u8 ka, const char *sb, u8 kb)
+{
+	return strcmp(sa, sb) ?: (ka - kb);
+}
+
+static int btf_compare_name_kind(const void *a, const void *b, const void *priv)
+{
+	const struct btf *btf = priv;
+	const struct btf_type *ba, *bb;
+	u32 ia = *(const u32 *)a;
+	u32 ib = *(const u32 *)b;
+
+	ba = btf_type_by_id(btf, ia);
+	bb = btf_type_by_id(btf, ib);
+
+	return cmp_name_kind(btf_name_by_offset(btf, ba->name_off),
+			     BTF_INFO_KIND(ba->info),
+			     btf_name_by_offset(btf, bb->name_off),
+			     BTF_INFO_KIND(bb->info));
+}
+
+static void btf_sort_by_name_kind(struct btf *btf)
+{
+	const struct btf_type *t;
+	struct btf_sorted_ids *sorted_ids;
+	const char *name;
+	u32 *ids;
+	u32 total, cnt = 0;
+	u32 i, j = 0;
+
+	total = btf_type_cnt(btf);
+	for (i = btf->start_id; i < total; i++) {
+		t = btf_type_by_id(btf, i);
+		name = btf_name_by_offset(btf, t->name_off);
+		if (str_is_empty(name))
+			continue;
+		cnt++;
+	}
+
+	/* Use linear search when the number is below the threshold */
+	if (cnt < 8)
+		return;
+
+	sorted_ids = kvmalloc(struct_size(sorted_ids, ids, cnt), GFP_KERNEL);
+	if (!sorted_ids) {
+		pr_warn("Failed to allocate memory for sorted_ids\n");
+		return;
+	}
+
+	ids = sorted_ids->ids;
+	for (i = btf->start_id; i < total; i++) {
+		t = btf_type_by_id(btf, i);
+		name = btf_name_by_offset(btf, t->name_off);
+		if (str_is_empty(name))
+			continue;
+		ids[j++] = i;
+	}
+
+	sort_r(ids, cnt, sizeof(ids[0]), btf_compare_name_kind, NULL, btf);
+
+	sorted_ids->cnt = cnt;
+	btf->sorted_ids = sorted_ids;
+}
+#else
+static int cmp_name_kind(const char *sa, u8 ka, const char *sb, u8 kb)
+{
+	return 0;
+}
+
+static void btf_sort_by_name_kind(struct btf *btf)
+{
+}
+#endif
+
 BTF_ID_LIST_SINGLE(bpf_ctx_convert_btf_id, struct, bpf_ctx_convert)
 
 static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name,
@@ -6230,6 +6370,7 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name
 	if (err)
 		goto errout;
 
+	btf_sort_by_name_kind(btf);
 	refcount_set(&btf->refcnt, 1);
 
 	return btf;
@@ -6362,6 +6503,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
 		base_btf = vmlinux_btf;
 	}
 
+	btf_sort_by_name_kind(btf);
 	btf_verifier_env_free(env);
 	refcount_set(&btf->refcnt, 1);
 	return btf;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-13 13:15 [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup pengdonglin
@ 2025-10-13 23:40 ` Andrii Nakryiko
  2025-10-13 23:53   ` Alexei Starovoitov
  2025-10-14  1:54   ` Donglin Peng
  0 siblings, 2 replies; 14+ messages in thread
From: Andrii Nakryiko @ 2025-10-13 23:40 UTC (permalink / raw)
  To: pengdonglin
  Cc: andrii, linux-kernel, linux-trace-kernel, bpf, Eduard Zingerman,
	Alexei Starovoitov, Song Liu, Masami Hiramatsu, Steven Rostedt,
	pengdonglin

On Mon, Oct 13, 2025 at 6:16 AM pengdonglin <dolinux.peng@gmail.com> wrote:
>
> From: pengdonglin <pengdonglin@xiaomi.com>
>
> Currently, when the funcgraph-args feature is in use, the
> btf_find_by_name_kind function is invoked quite frequently. However,
> this function only supports linear search. When the number of btf_type
> entries to search through is large, such as in the vmlinux BTF which
> contains over 80,000 named btf_types, it consumes a significant amount
> of time.
>
> This patch optimizes the btf_find_by_name_kind lookup by sorting BTF
> types according to their names and kinds. Additionally, it modifies
> the search direction. Now, it first searches the BTF and then its base.

Well, the latter is a meaningful change outside of sorting. Split it
out and justify separately?

>
> It should be noted that this change incurs some additional memory and
> boot-time overhead. Therefore, the option is disabled by default.
>
> Here is a test case:
>
>  # echo 1 > options/funcgraph-args
>  # echo function_graph > current_tracer
>
> Before:
>  # time cat trace | wc -l
>  124176
>
>  real    0m16.154s
>  user    0m0.000s
>  sys     0m15.962s
>
> After:
>  # time cat trace | wc -l
>  124176
>
>  real    0m0.948s
>  user    0m0.000s
>  sys     0m0.973s
>
> An improvement of more than 20 times can be observed.
>
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: Song Liu <song@kernel.org>
> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> Signed-off-by: pengdonglin <dolinux.peng@gmail.com>
> ---
>  include/linux/btf.h |   1 +
>  kernel/bpf/Kconfig  |  13 ++++
>  kernel/bpf/btf.c    | 160 +++++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 165 insertions(+), 9 deletions(-)
>

Just a few observations (if we decide to do the sorting of BTF by name
in the kernel):

- given we always know kind we are searching for, I'd sort by kind,
then by name, it probably will be a touch faster because we'll be
quickly skipping lots of elements clustered by kind we don't care
about;

- instead of having BPF_SORT_BTF_BY_NAME_KIND, we should probably just
have a lazy sorting approach, and maybe employ a bit more
sophisticated heuristic. E.g., not by number of BTF types (or at least
not just by that), but by the total number of entries we had to skip
to find something. For small BTFs we might not reach this budget ever.
For vmlinux BTF we are almost definitely hitting it on
first-second-third search. Once the condition is hit, allocate
sorted_ids index, sort, remember. On subsequent searches use the
index.

WDYT?

[...]

> +static void btf_sort_by_name_kind(struct btf *btf)
> +{
> +       const struct btf_type *t;
> +       struct btf_sorted_ids *sorted_ids;
> +       const char *name;
> +       u32 *ids;
> +       u32 total, cnt = 0;
> +       u32 i, j = 0;
> +
> +       total = btf_type_cnt(btf);
> +       for (i = btf->start_id; i < total; i++) {
> +               t = btf_type_by_id(btf, i);
> +               name = btf_name_by_offset(btf, t->name_off);
> +               if (str_is_empty(name))
> +                       continue;
> +               cnt++;
> +       }
> +
> +       /* Use linear search when the number is below the threshold */
> +       if (cnt < 8)

kind of a random threshold, at least give it a name

> +               return;
> +
> +       sorted_ids = kvmalloc(struct_size(sorted_ids, ids, cnt), GFP_KERNEL);
> +       if (!sorted_ids) {
> +               pr_warn("Failed to allocate memory for sorted_ids\n");
> +               return;
> +       }

[...]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-13 23:40 ` Andrii Nakryiko
@ 2025-10-13 23:53   ` Alexei Starovoitov
  2025-10-14  0:15     ` Andrii Nakryiko
  2025-10-14  1:54   ` Donglin Peng
  1 sibling, 1 reply; 14+ messages in thread
From: Alexei Starovoitov @ 2025-10-13 23:53 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: pengdonglin, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> Just a few observations (if we decide to do the sorting of BTF by name
> in the kernel):

iirc we discussed it in the past and decided to do sorting in pahole
and let the kernel verify whether it's sorted or not.
Then no extra memory is needed.
Or was that idea discarded for some reason?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-13 23:53   ` Alexei Starovoitov
@ 2025-10-14  0:15     ` Andrii Nakryiko
  2025-10-14  0:22       ` Alexei Starovoitov
  0 siblings, 1 reply; 14+ messages in thread
From: Andrii Nakryiko @ 2025-10-14  0:15 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: pengdonglin, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > Just a few observations (if we decide to do the sorting of BTF by name
> > in the kernel):
>
> iirc we discussed it in the past and decided to do sorting in pahole
> and let the kernel verify whether it's sorted or not.
> Then no extra memory is needed.
> Or was that idea discarded for some reason?

Don't really remember at this point, tbh. Pre-sorting should work
(though I'd argue that then we should only sort by name to make this
sorting universally useful, doing linear search over kinds is fast,
IMO). Pre-sorting won't work for program BTFs, don't know how
important that is. This indexing on demand approach would be
universal. ¯\_(ツ)_/¯

Overall, paying 300KB for sorted index for vmlinux BTF for cases where
we repeatedly need this seems ok to me, tbh.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  0:15     ` Andrii Nakryiko
@ 2025-10-14  0:22       ` Alexei Starovoitov
  2025-10-14  1:54         ` Donglin Peng
  0 siblings, 1 reply; 14+ messages in thread
From: Alexei Starovoitov @ 2025-10-14  0:22 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: pengdonglin, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > Just a few observations (if we decide to do the sorting of BTF by name
> > > in the kernel):
> >
> > iirc we discussed it in the past and decided to do sorting in pahole
> > and let the kernel verify whether it's sorted or not.
> > Then no extra memory is needed.
> > Or was that idea discarded for some reason?
>
> Don't really remember at this point, tbh. Pre-sorting should work
> (though I'd argue that then we should only sort by name to make this
> sorting universally useful, doing linear search over kinds is fast,
> IMO). Pre-sorting won't work for program BTFs, don't know how
> important that is. This indexing on demand approach would be
> universal. ¯\_(ツ)_/¯
>
> Overall, paying 300KB for sorted index for vmlinux BTF for cases where
> we repeatedly need this seems ok to me, tbh.

If pahole sorting works I don't see why consuming even 300k is ok.
kallsyms are sorted during the build too.

In the other thread we discuss adding LOCSEC for ~6M. That thing should
be pahole-sorted too.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-13 23:40 ` Andrii Nakryiko
  2025-10-13 23:53   ` Alexei Starovoitov
@ 2025-10-14  1:54   ` Donglin Peng
  1 sibling, 0 replies; 14+ messages in thread
From: Donglin Peng @ 2025-10-14  1:54 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: andrii, linux-kernel, linux-trace-kernel, bpf, Eduard Zingerman,
	Alexei Starovoitov, Song Liu, Masami Hiramatsu, Steven Rostedt,
	pengdonglin

On Tue, Oct 14, 2025 at 7:40 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 6:16 AM pengdonglin <dolinux.peng@gmail.com> wrote:
> >
> > From: pengdonglin <pengdonglin@xiaomi.com>
> >
> > Currently, when the funcgraph-args feature is in use, the
> > btf_find_by_name_kind function is invoked quite frequently. However,
> > this function only supports linear search. When the number of btf_type
> > entries to search through is large, such as in the vmlinux BTF which
> > contains over 80,000 named btf_types, it consumes a significant amount
> > of time.
> >
> > This patch optimizes the btf_find_by_name_kind lookup by sorting BTF
> > types according to their names and kinds. Additionally, it modifies
> > the search direction. Now, it first searches the BTF and then its base.
>
> Well, the latter is a meaningful change outside of sorting. Split it
> out and justify separately?

Thanks, I will split it out in v2.

>
> >
> > It should be noted that this change incurs some additional memory and
> > boot-time overhead. Therefore, the option is disabled by default.
> >
> > Here is a test case:
> >
> >  # echo 1 > options/funcgraph-args
> >  # echo function_graph > current_tracer
> >
> > Before:
> >  # time cat trace | wc -l
> >  124176
> >
> >  real    0m16.154s
> >  user    0m0.000s
> >  sys     0m15.962s
> >
> > After:
> >  # time cat trace | wc -l
> >  124176
> >
> >  real    0m0.948s
> >  user    0m0.000s
> >  sys     0m0.973s
> >
> > An improvement of more than 20 times can be observed.
> >
> > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > Signed-off-by: pengdonglin <dolinux.peng@gmail.com>
> > ---
> >  include/linux/btf.h |   1 +
> >  kernel/bpf/Kconfig  |  13 ++++
> >  kernel/bpf/btf.c    | 160 +++++++++++++++++++++++++++++++++++++++++---
> >  3 files changed, 165 insertions(+), 9 deletions(-)
> >
>
> Just a few observations (if we decide to do the sorting of BTF by name
> in the kernel):
>
> - given we always know kind we are searching for, I'd sort by kind,
> then by name, it probably will be a touch faster because we'll be
> quickly skipping lots of elements clustered by kind we don't care
> about;

Good catch, thanks.

>
> - instead of having BPF_SORT_BTF_BY_NAME_KIND, we should probably just
> have a lazy sorting approach, and maybe employ a bit more
> sophisticated heuristic. E.g., not by number of BTF types (or at least
> not just by that), but by the total number of entries we had to skip
> to find something. For small BTFs we might not reach this budget ever.
> For vmlinux BTF we are almost definitely hitting it on
> first-second-third search. Once the condition is hit, allocate
> sorted_ids index, sort, remember. On subsequent searches use the
> index.

Thanks, I appreciate the suggestion and will include it in v2.
However, due to the
memory overhead, I believe a BPF_SORT_BTF_BY_NAME_KIND option might
be necessary.

>
> WDYT?
>
> [...]
>
> > +static void btf_sort_by_name_kind(struct btf *btf)
> > +{
> > +       const struct btf_type *t;
> > +       struct btf_sorted_ids *sorted_ids;
> > +       const char *name;
> > +       u32 *ids;
> > +       u32 total, cnt = 0;
> > +       u32 i, j = 0;
> > +
> > +       total = btf_type_cnt(btf);
> > +       for (i = btf->start_id; i < total; i++) {
> > +               t = btf_type_by_id(btf, i);
> > +               name = btf_name_by_offset(btf, t->name_off);
> > +               if (str_is_empty(name))
> > +                       continue;
> > +               cnt++;
> > +       }
> > +
> > +       /* Use linear search when the number is below the threshold */
> > +       if (cnt < 8)
>
> kind of a random threshold, at least give it a name

Thanks, I will fix it in v2.

>
> > +               return;
> > +
> > +       sorted_ids = kvmalloc(struct_size(sorted_ids, ids, cnt), GFP_KERNEL);
> > +       if (!sorted_ids) {
> > +               pr_warn("Failed to allocate memory for sorted_ids\n");
> > +               return;
> > +       }
>
> [...]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  0:22       ` Alexei Starovoitov
@ 2025-10-14  1:54         ` Donglin Peng
  2025-10-14  2:48           ` Alexei Starovoitov
  0 siblings, 1 reply; 14+ messages in thread
From: Donglin Peng @ 2025-10-14  1:54 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Tue, Oct 14, 2025 at 8:22 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > Just a few observations (if we decide to do the sorting of BTF by name
> > > > in the kernel):
> > >
> > > iirc we discussed it in the past and decided to do sorting in pahole
> > > and let the kernel verify whether it's sorted or not.
> > > Then no extra memory is needed.
> > > Or was that idea discarded for some reason?
> >
> > Don't really remember at this point, tbh. Pre-sorting should work
> > (though I'd argue that then we should only sort by name to make this
> > sorting universally useful, doing linear search over kinds is fast,
> > IMO). Pre-sorting won't work for program BTFs, don't know how
> > important that is. This indexing on demand approach would be
> > universal. ¯\_(ツ)_/¯
> >
> > Overall, paying 300KB for sorted index for vmlinux BTF for cases where
> > we repeatedly need this seems ok to me, tbh.
>
> If pahole sorting works I don't see why consuming even 300k is ok.
> kallsyms are sorted during the build too.

Thanks. We did discuss pre-sorting in pahole in the threads:

https://lore.kernel.org/all/CAADnVQLMHUNE95eBXdy6=+gHoFHRsihmQ75GZvGy-hSuHoaT5A@mail.gmail.com/
https://lore.kernel.org/all/CAEf4BzaXHrjoEWmEcvK62bqKuT3de__+juvGctR3=e8avRWpMQ@mail.gmail.com/

However, since that approach depends on newer pahole features and
btf_find_by_name_kind is already being called quite frequently, I suggest
we first implement sorting within the kernel, and subsequently add pre-sorting
support in pahole.

>
> In the other thread we discuss adding LOCSEC for ~6M. That thing should
> be pahole-sorted too.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  1:54         ` Donglin Peng
@ 2025-10-14  2:48           ` Alexei Starovoitov
  2025-10-14  4:53             ` Donglin Peng
  0 siblings, 1 reply; 14+ messages in thread
From: Alexei Starovoitov @ 2025-10-14  2:48 UTC (permalink / raw)
  To: Donglin Peng
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Mon, Oct 13, 2025 at 6:54 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> On Tue, Oct 14, 2025 at 8:22 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > Just a few observations (if we decide to do the sorting of BTF by name
> > > > > in the kernel):
> > > >
> > > > iirc we discussed it in the past and decided to do sorting in pahole
> > > > and let the kernel verify whether it's sorted or not.
> > > > Then no extra memory is needed.
> > > > Or was that idea discarded for some reason?
> > >
> > > Don't really remember at this point, tbh. Pre-sorting should work
> > > (though I'd argue that then we should only sort by name to make this
> > > sorting universally useful, doing linear search over kinds is fast,
> > > IMO). Pre-sorting won't work for program BTFs, don't know how
> > > important that is. This indexing on demand approach would be
> > > universal. ¯\_(ツ)_/¯
> > >
> > > Overall, paying 300KB for sorted index for vmlinux BTF for cases where
> > > we repeatedly need this seems ok to me, tbh.
> >
> > If pahole sorting works I don't see why consuming even 300k is ok.
> > kallsyms are sorted during the build too.
>
> Thanks. We did discuss pre-sorting in pahole in the threads:
>
> https://lore.kernel.org/all/CAADnVQLMHUNE95eBXdy6=+gHoFHRsihmQ75GZvGy-hSuHoaT5A@mail.gmail.com/
> https://lore.kernel.org/all/CAEf4BzaXHrjoEWmEcvK62bqKuT3de__+juvGctR3=e8avRWpMQ@mail.gmail.com/
>
> However, since that approach depends on newer pahole features and
> btf_find_by_name_kind is already being called quite frequently, I suggest
> we first implement sorting within the kernel, and subsequently add pre-sorting
> support in pahole.

and then what? Remove it from the kernel when pahole is newer?
I'd rather not do this churn in the first place.

Since you revived that thread from 2024 and did not
follow up with pahole changes since then, I don't believe that
you will do them if we land kernel changes first.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  2:48           ` Alexei Starovoitov
@ 2025-10-14  4:53             ` Donglin Peng
  2025-10-14  8:05               ` Alan Maguire
  2025-10-15  1:54               ` Alexei Starovoitov
  0 siblings, 2 replies; 14+ messages in thread
From: Donglin Peng @ 2025-10-14  4:53 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Tue, Oct 14, 2025 at 10:48 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 6:54 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > On Tue, Oct 14, 2025 at 8:22 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
> > > > <alexei.starovoitov@gmail.com> wrote:
> > > > >
> > > > > On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > Just a few observations (if we decide to do the sorting of BTF by name
> > > > > > in the kernel):
> > > > >
> > > > > iirc we discussed it in the past and decided to do sorting in pahole
> > > > > and let the kernel verify whether it's sorted or not.
> > > > > Then no extra memory is needed.
> > > > > Or was that idea discarded for some reason?
> > > >
> > > > Don't really remember at this point, tbh. Pre-sorting should work
> > > > (though I'd argue that then we should only sort by name to make this
> > > > sorting universally useful, doing linear search over kinds is fast,
> > > > IMO). Pre-sorting won't work for program BTFs, don't know how
> > > > important that is. This indexing on demand approach would be
> > > > universal. ¯\_(ツ)_/¯
> > > >
> > > > Overall, paying 300KB for sorted index for vmlinux BTF for cases where
> > > > we repeatedly need this seems ok to me, tbh.
> > >
> > > If pahole sorting works I don't see why consuming even 300k is ok.
> > > kallsyms are sorted during the build too.
> >
> > Thanks. We did discuss pre-sorting in pahole in the threads:
> >
> > https://lore.kernel.org/all/CAADnVQLMHUNE95eBXdy6=+gHoFHRsihmQ75GZvGy-hSuHoaT5A@mail.gmail.com/
> > https://lore.kernel.org/all/CAEf4BzaXHrjoEWmEcvK62bqKuT3de__+juvGctR3=e8avRWpMQ@mail.gmail.com/
> >
> > However, since that approach depends on newer pahole features and
> > btf_find_by_name_kind is already being called quite frequently, I suggest
> > we first implement sorting within the kernel, and subsequently add pre-sorting
> > support in pahole.
>
> and then what? Remove it from the kernel when pahole is newer?
> I'd rather not do this churn in the first place.

Apologies for the formatting issues in my previous email—sending this again
 for clarity.

Thank you for your feedback. Your concerns are completely valid.

I’d like to suggest a dual-mechanism approach:
1. If BTF is generated by a newer pahole (with pre-sorting support), the
    kernel would use the pre-sorted data directly.
2. For BTF from older pahole versions, the kernel would handle sorting
    at load time or later.

This would provide performance benefits immediately while preserving
 backward compatibility. The kernel-side sorting would remain intact
moving forward, avoiding future churn.

>
> Since you revived that thread from 2024 and did not
> follow up with pahole changes since then, I don't believe that
> you will do them if we land kernel changes first.

Regarding the pahole changes: this is now my highest priority. I’ve
already incorporated it into my development plan and will begin
working on the patches shortly.

What do you think about this approach? Would this be acceptable?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  4:53             ` Donglin Peng
@ 2025-10-14  8:05               ` Alan Maguire
  2025-10-15  1:12                 ` Donglin Peng
  2025-10-15  1:54               ` Alexei Starovoitov
  1 sibling, 1 reply; 14+ messages in thread
From: Alan Maguire @ 2025-10-14  8:05 UTC (permalink / raw)
  To: Donglin Peng, Alexei Starovoitov
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On 14/10/2025 05:53, Donglin Peng wrote:
> On Tue, Oct 14, 2025 at 10:48 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
>>
>> On Mon, Oct 13, 2025 at 6:54 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>>>
>>> On Tue, Oct 14, 2025 at 8:22 AM Alexei Starovoitov
>>> <alexei.starovoitov@gmail.com> wrote:
>>>>
>>>> On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
>>>> <andrii.nakryiko@gmail.com> wrote:
>>>>>
>>>>> On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
>>>>> <alexei.starovoitov@gmail.com> wrote:
>>>>>>
>>>>>> On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
>>>>>> <andrii.nakryiko@gmail.com> wrote:
>>>>>>>
>>>>>>> Just a few observations (if we decide to do the sorting of BTF by name
>>>>>>> in the kernel):
>>>>>>
>>>>>> iirc we discussed it in the past and decided to do sorting in pahole
>>>>>> and let the kernel verify whether it's sorted or not.
>>>>>> Then no extra memory is needed.
>>>>>> Or was that idea discarded for some reason?
>>>>>
>>>>> Don't really remember at this point, tbh. Pre-sorting should work
>>>>> (though I'd argue that then we should only sort by name to make this
>>>>> sorting universally useful, doing linear search over kinds is fast,
>>>>> IMO). Pre-sorting won't work for program BTFs, don't know how
>>>>> important that is. This indexing on demand approach would be
>>>>> universal. ¯\_(ツ)_/¯
>>>>>
>>>>> Overall, paying 300KB for sorted index for vmlinux BTF for cases where
>>>>> we repeatedly need this seems ok to me, tbh.
>>>>
>>>> If pahole sorting works I don't see why consuming even 300k is ok.
>>>> kallsyms are sorted during the build too.
>>>
>>> Thanks. We did discuss pre-sorting in pahole in the threads:
>>>
>>> https://lore.kernel.org/all/CAADnVQLMHUNE95eBXdy6=+gHoFHRsihmQ75GZvGy-hSuHoaT5A@mail.gmail.com/
>>> https://lore.kernel.org/all/CAEf4BzaXHrjoEWmEcvK62bqKuT3de__+juvGctR3=e8avRWpMQ@mail.gmail.com/
>>>
>>> However, since that approach depends on newer pahole features and
>>> btf_find_by_name_kind is already being called quite frequently, I suggest
>>> we first implement sorting within the kernel, and subsequently add pre-sorting
>>> support in pahole.
>>
>> and then what? Remove it from the kernel when pahole is newer?
>> I'd rather not do this churn in the first place.
> 
> Apologies for the formatting issues in my previous email—sending this again
>  for clarity.
> 
> Thank you for your feedback. Your concerns are completely valid.
> 
> I’d like to suggest a dual-mechanism approach:
> 1. If BTF is generated by a newer pahole (with pre-sorting support), the
>     kernel would use the pre-sorted data directly.
> 2. For BTF from older pahole versions, the kernel would handle sorting
>     at load time or later.
> 
> This would provide performance benefits immediately while preserving
>  backward compatibility. The kernel-side sorting would remain intact
> moving forward, avoiding future churn.
>

If you're taking the approach of doing both - which is best I think -
I'd suggest it might be helpful to look at the bpf_relocate.c code; it's
shared between libbpf and kernel, so you could potentially add shared
code to do sorting in libbpf (which pahole would use) and the kernel
would use too; this would help ensure the behaviour is identical.

Maybe for pahole/libbpf sorting could be done via a new BTF dedup()
option, since dedup is the time we finalize the BTF representation?

Alan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  8:05               ` Alan Maguire
@ 2025-10-15  1:12                 ` Donglin Peng
  0 siblings, 0 replies; 14+ messages in thread
From: Donglin Peng @ 2025-10-15  1:12 UTC (permalink / raw)
  To: Alan Maguire
  Cc: Alexei Starovoitov, Andrii Nakryiko, Andrii Nakryiko, LKML,
	linux-trace-kernel, bpf, Eduard Zingerman, Alexei Starovoitov,
	Song Liu, Masami Hiramatsu, Steven Rostedt, pengdonglin

On Tue, Oct 14, 2025 at 4:06 PM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 14/10/2025 05:53, Donglin Peng wrote:
> > On Tue, Oct 14, 2025 at 10:48 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> >>
> >> On Mon, Oct 13, 2025 at 6:54 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >>>
> >>> On Tue, Oct 14, 2025 at 8:22 AM Alexei Starovoitov
> >>> <alexei.starovoitov@gmail.com> wrote:
> >>>>
> >>>> On Mon, Oct 13, 2025 at 5:15 PM Andrii Nakryiko
> >>>> <andrii.nakryiko@gmail.com> wrote:
> >>>>>
> >>>>> On Mon, Oct 13, 2025 at 4:53 PM Alexei Starovoitov
> >>>>> <alexei.starovoitov@gmail.com> wrote:
> >>>>>>
> >>>>>> On Mon, Oct 13, 2025 at 4:40 PM Andrii Nakryiko
> >>>>>> <andrii.nakryiko@gmail.com> wrote:
> >>>>>>>
> >>>>>>> Just a few observations (if we decide to do the sorting of BTF by name
> >>>>>>> in the kernel):
> >>>>>>
> >>>>>> iirc we discussed it in the past and decided to do sorting in pahole
> >>>>>> and let the kernel verify whether it's sorted or not.
> >>>>>> Then no extra memory is needed.
> >>>>>> Or was that idea discarded for some reason?
> >>>>>
> >>>>> Don't really remember at this point, tbh. Pre-sorting should work
> >>>>> (though I'd argue that then we should only sort by name to make this
> >>>>> sorting universally useful, doing linear search over kinds is fast,
> >>>>> IMO). Pre-sorting won't work for program BTFs, don't know how
> >>>>> important that is. This indexing on demand approach would be
> >>>>> universal. ¯\_(ツ)_/¯
> >>>>>
> >>>>> Overall, paying 300KB for sorted index for vmlinux BTF for cases where
> >>>>> we repeatedly need this seems ok to me, tbh.
> >>>>
> >>>> If pahole sorting works I don't see why consuming even 300k is ok.
> >>>> kallsyms are sorted during the build too.
> >>>
> >>> Thanks. We did discuss pre-sorting in pahole in the threads:
> >>>
> >>> https://lore.kernel.org/all/CAADnVQLMHUNE95eBXdy6=+gHoFHRsihmQ75GZvGy-hSuHoaT5A@mail.gmail.com/
> >>> https://lore.kernel.org/all/CAEf4BzaXHrjoEWmEcvK62bqKuT3de__+juvGctR3=e8avRWpMQ@mail.gmail.com/
> >>>
> >>> However, since that approach depends on newer pahole features and
> >>> btf_find_by_name_kind is already being called quite frequently, I suggest
> >>> we first implement sorting within the kernel, and subsequently add pre-sorting
> >>> support in pahole.
> >>
> >> and then what? Remove it from the kernel when pahole is newer?
> >> I'd rather not do this churn in the first place.
> >
> > Apologies for the formatting issues in my previous email—sending this again
> >  for clarity.
> >
> > Thank you for your feedback. Your concerns are completely valid.
> >
> > I’d like to suggest a dual-mechanism approach:
> > 1. If BTF is generated by a newer pahole (with pre-sorting support), the
> >     kernel would use the pre-sorted data directly.
> > 2. For BTF from older pahole versions, the kernel would handle sorting
> >     at load time or later.
> >
> > This would provide performance benefits immediately while preserving
> >  backward compatibility. The kernel-side sorting would remain intact
> > moving forward, avoiding future churn.
> >
>
> If you're taking the approach of doing both - which is best I think -
> I'd suggest it might be helpful to look at the bpf_relocate.c code; it's
> shared between libbpf and kernel, so you could potentially add shared
> code to do sorting in libbpf (which pahole would use) and the kernel
> would use too; this would help ensure the behaviour is identical.
>
> Maybe for pahole/libbpf sorting could be done via a new BTF dedup()
> option, since dedup is the time we finalize the BTF representation?

Thanks for the suggestion. I'll look into that.
>
> Alan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-14  4:53             ` Donglin Peng
  2025-10-14  8:05               ` Alan Maguire
@ 2025-10-15  1:54               ` Alexei Starovoitov
  2025-10-15  3:43                 ` Donglin Peng
  1 sibling, 1 reply; 14+ messages in thread
From: Alexei Starovoitov @ 2025-10-15  1:54 UTC (permalink / raw)
  To: Donglin Peng
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Mon, Oct 13, 2025 at 9:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> I’d like to suggest a dual-mechanism approach:
> 1. If BTF is generated by a newer pahole (with pre-sorting support), the
>     kernel would use the pre-sorted data directly.
> 2. For BTF from older pahole versions, the kernel would handle sorting
>     at load time or later.

The problem with 2 is extra memory consumption for narrow
use case. The "time cat trace" example shows that search
is in critical path, but I suspect ftrace can do it differently.
I don't know why it's doing the search so much.
Everyelse in bpf we don't call it that often.
So optimizing the search is nice, but not at the expense
of so much extra memory.
Hence I don't think 2 is worth doing.

> Regarding the pahole changes: this is now my highest priority. I’ve
> already incorporated it into my development plan and will begin
> working on the patches shortly.

let's land pahole changes first.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-15  1:54               ` Alexei Starovoitov
@ 2025-10-15  3:43                 ` Donglin Peng
  2025-10-15  9:15                   ` Alan Maguire
  0 siblings, 1 reply; 14+ messages in thread
From: Donglin Peng @ 2025-10-15  3:43 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On Wed, Oct 15, 2025 at 9:54 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Oct 13, 2025 at 9:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > I’d like to suggest a dual-mechanism approach:
> > 1. If BTF is generated by a newer pahole (with pre-sorting support), the
> >     kernel would use the pre-sorted data directly.
> > 2. For BTF from older pahole versions, the kernel would handle sorting
> >     at load time or later.
>
> The problem with 2 is extra memory consumption for narrow
> use case. The "time cat trace" example shows that search
> is in critical path, but I suspect ftrace can do it differently.
> I don't know why it's doing the search so much.

Thanks. The reason is that ftrace supports outputting parameters of traced
functions through funcgraph-args, like this:

 0)                    |  vfs_write(file=0xffff888102b17380,
buf=0x7ffd1e9faaf7, count=0x1, pos=0xffffc90006f83ef0) {
 0)                    |    rw_verify_area(read_write=1,
file=0xffff888102b17380, ppos=0xffffc90006f83ef0, count=0x1) {
 0)                    |
security_file_permission(file=0xffff888102b17380, mask=2) {
 0)                    |
selinux_file_permission(file=0xffff888102b17380, mask=2) {
 0)   0.111 us    |          avc_policy_seqno();
 0)   0.380 us    |        }
 0)   0.585 us    |      }
 0)   0.782 us    |    }

which requires obtaining function parameter names and types from BTF.
However, there is currently no direct mapping from function addresses to
btf_type index information. Therefore, it first obtains the function name from
the function address, and then searches the BTF file by the function name
to get the corresponding btf_type.

> Everyelse in bpf we don't call it that often.
> So optimizing the search is nice, but not at the expense
> of so much extra memory.
> Hence I don't think 2 is worth doing.

Thanks, I agree.

>
> > Regarding the pahole changes: this is now my highest priority. I’ve
> > already incorporated it into my development plan and will begin
> > working on the patches shortly.
>
> let's land pahole changes first.

Understood, thanks

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup
  2025-10-15  3:43                 ` Donglin Peng
@ 2025-10-15  9:15                   ` Alan Maguire
  0 siblings, 0 replies; 14+ messages in thread
From: Alan Maguire @ 2025-10-15  9:15 UTC (permalink / raw)
  To: Donglin Peng, Alexei Starovoitov
  Cc: Andrii Nakryiko, Andrii Nakryiko, LKML, linux-trace-kernel, bpf,
	Eduard Zingerman, Alexei Starovoitov, Song Liu, Masami Hiramatsu,
	Steven Rostedt, pengdonglin

On 15/10/2025 04:43, Donglin Peng wrote:
> On Wed, Oct 15, 2025 at 9:54 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
>>
>> On Mon, Oct 13, 2025 at 9:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>>>
>>> I’d like to suggest a dual-mechanism approach:
>>> 1. If BTF is generated by a newer pahole (with pre-sorting support), the
>>>     kernel would use the pre-sorted data directly.
>>> 2. For BTF from older pahole versions, the kernel would handle sorting
>>>     at load time or later.
>>
>> The problem with 2 is extra memory consumption for narrow
>> use case. The "time cat trace" example shows that search
>> is in critical path, but I suspect ftrace can do it differently.
>> I don't know why it's doing the search so much.
> 
> Thanks. The reason is that ftrace supports outputting parameters of traced
> functions through funcgraph-args, like this:
> 
>  0)                    |  vfs_write(file=0xffff888102b17380,
> buf=0x7ffd1e9faaf7, count=0x1, pos=0xffffc90006f83ef0) {
>  0)                    |    rw_verify_area(read_write=1,
> file=0xffff888102b17380, ppos=0xffffc90006f83ef0, count=0x1) {
>  0)                    |
> security_file_permission(file=0xffff888102b17380, mask=2) {
>  0)                    |
> selinux_file_permission(file=0xffff888102b17380, mask=2) {
>  0)   0.111 us    |          avc_policy_seqno();
>  0)   0.380 us    |        }
>  0)   0.585 us    |      }
>  0)   0.782 us    |    }
> 
> which requires obtaining function parameter names and types from BTF.
> However, there is currently no direct mapping from function addresses to
> btf_type index information. Therefore, it first obtains the function name from
> the function address, and then searches the BTF file by the function name
> to get the corresponding btf_type.

The problem here is we have a lookup every time we collect function
args, right? Binary search of sorted function names will make that
better but it will still be slow if it has to happen every time we dump
function args. Would it make sense then perhaps to have a more tailored
solution like a cache of BTF type ids for functions that could be mapped
directly from kallsyms symbols? Mentioned this before [1] but maybe we
could figure something out now?

For example, we have to look up kallsym name for the address via
lookup_symbol_name(); it uses get_symbol_pos() internally to find the
index within the kallsyms_offsets array. If we had a similar array for
kallsyms_btf_ids we could use the same index to populate it with
function BTF ids, we could later do O(1) lookup. We would just need a
kallsyms lookup that returned the index, or indeed a new API which
returned the name and the BTF id (if we added such an index to kallsyms
code directly). We could even just populate the entries on first use and
then it would function as a cache. We would need a module+btf_id in the
index, so 64 bits per entry to support both module and kernel BTF. Seems
possible though?

[1]
https://lore.kernel.org/linux-trace-kernel/8455bc79-a684-476d-88bd-9f7ff9ffa637@oracle.com/

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-10-15  9:15 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-13 13:15 [RFC PATCH v1] btf: Sort BTF types by name and kind to optimize btf_find_by_name_kind lookup pengdonglin
2025-10-13 23:40 ` Andrii Nakryiko
2025-10-13 23:53   ` Alexei Starovoitov
2025-10-14  0:15     ` Andrii Nakryiko
2025-10-14  0:22       ` Alexei Starovoitov
2025-10-14  1:54         ` Donglin Peng
2025-10-14  2:48           ` Alexei Starovoitov
2025-10-14  4:53             ` Donglin Peng
2025-10-14  8:05               ` Alan Maguire
2025-10-15  1:12                 ` Donglin Peng
2025-10-15  1:54               ` Alexei Starovoitov
2025-10-15  3:43                 ` Donglin Peng
2025-10-15  9:15                   ` Alan Maguire
2025-10-14  1:54   ` Donglin Peng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).