* [PATCH bpf-next v4 1/4] fprobe: use rhltable for fprobe_ip_table
2025-08-15 6:47 [PATCH bpf-next v4 0/4] fprobe: use rhashtable for fprobe_ip_table Menglong Dong
@ 2025-08-15 6:47 ` Menglong Dong
2025-08-16 14:50 ` Masami Hiramatsu
2025-08-15 6:47 ` [PATCH bpf-next v4 2/4] selftests/bpf: move get_ksyms and get_addrs to trace_helpers.c Menglong Dong
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Menglong Dong @ 2025-08-15 6:47 UTC (permalink / raw)
To: mhiramat, olsajiri
Cc: rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
For now, all the kernel functions who are hooked by the fprobe will be
added to the hash table "fprobe_ip_table". The key of it is the function
address, and the value of it is "struct fprobe_hlist_node".
The budget of the hash table is FPROBE_IP_TABLE_SIZE, which is 256. And
this means the overhead of the hash table lookup will grow linearly if
the count of the functions in the fprobe more than 256. When we try to
hook all the kernel functions, the overhead will be huge.
Therefore, replace the hash table with rhltable to reduce the overhead.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v4:
- replace rhashtable_walk_enter with rhltable_walk_enter
v3:
- some format optimization
- handle the error that returned from rhltable_insert in
insert_fprobe_node
---
include/linux/fprobe.h | 3 +-
kernel/trace/fprobe.c | 155 +++++++++++++++++++++++------------------
2 files changed, 91 insertions(+), 67 deletions(-)
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 7964db96e41a..0a3bcd1718f3 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -7,6 +7,7 @@
#include <linux/ftrace.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
+#include <linux/rhashtable.h>
#include <linux/slab.h>
struct fprobe;
@@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
* @fp: The fprobe which owns this.
*/
struct fprobe_hlist_node {
- struct hlist_node hlist;
+ struct rhlist_head hlist;
unsigned long addr;
struct fprobe *fp;
};
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index c8034dfc1070..7aa21361227e 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -10,6 +10,7 @@
#include <linux/kprobes.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/rhashtable.h>
#include <linux/slab.h>
#include <linux/sort.h>
@@ -41,47 +42,46 @@
* - RCU hlist traversal under disabling preempt
*/
static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
-static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
+static struct rhltable fprobe_ip_table;
static DEFINE_MUTEX(fprobe_mutex);
-/*
- * Find first fprobe in the hlist. It will be iterated twice in the entry
- * probe, once for correcting the total required size, the second time is
- * calling back the user handlers.
- * Thus the hlist in the fprobe_table must be sorted and new probe needs to
- * be added *before* the first fprobe.
- */
-static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
+static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
{
- struct fprobe_hlist_node *node;
- struct hlist_head *head;
+ return hash_ptr(*(unsigned long **)data, 32);
+}
- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
- hlist_for_each_entry_rcu(node, head, hlist,
- lockdep_is_held(&fprobe_mutex)) {
- if (node->addr == ip)
- return node;
- }
- return NULL;
+static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ unsigned long key = *(unsigned long *)arg->key;
+ const struct fprobe_hlist_node *n = ptr;
+
+ return n->addr != key;
}
-NOKPROBE_SYMBOL(find_first_fprobe_node);
-/* Node insertion and deletion requires the fprobe_mutex */
-static void insert_fprobe_node(struct fprobe_hlist_node *node)
+static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
{
- unsigned long ip = node->addr;
- struct fprobe_hlist_node *next;
- struct hlist_head *head;
+ const struct fprobe_hlist_node *n = data;
+
+ return hash_ptr((void *)n->addr, 32);
+}
+
+static const struct rhashtable_params fprobe_rht_params = {
+ .head_offset = offsetof(struct fprobe_hlist_node, hlist),
+ .key_offset = offsetof(struct fprobe_hlist_node, addr),
+ .key_len = sizeof_field(struct fprobe_hlist_node, addr),
+ .hashfn = fprobe_node_hashfn,
+ .obj_hashfn = fprobe_node_obj_hashfn,
+ .obj_cmpfn = fprobe_node_cmp,
+ .automatic_shrinking = true,
+};
+/* Node insertion and deletion requires the fprobe_mutex */
+static int insert_fprobe_node(struct fprobe_hlist_node *node)
+{
lockdep_assert_held(&fprobe_mutex);
- next = find_first_fprobe_node(ip);
- if (next) {
- hlist_add_before_rcu(&node->hlist, &next->hlist);
- return;
- }
- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
- hlist_add_head_rcu(&node->hlist, head);
+ return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
}
/* Return true if there are synonims */
@@ -92,9 +92,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
/* Avoid double deleting */
if (READ_ONCE(node->fp) != NULL) {
WRITE_ONCE(node->fp, NULL);
- hlist_del_rcu(&node->hlist);
+ rhltable_remove(&fprobe_ip_table, &node->hlist,
+ fprobe_rht_params);
}
- return !!find_first_fprobe_node(node->addr);
+ return !!rhltable_lookup(&fprobe_ip_table, &node->addr,
+ fprobe_rht_params);
}
/* Check existence of the fprobe */
@@ -249,9 +251,10 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
struct ftrace_regs *fregs)
{
- struct fprobe_hlist_node *node, *first;
unsigned long *fgraph_data = NULL;
unsigned long func = trace->func;
+ struct fprobe_hlist_node *node;
+ struct rhlist_head *head, *pos;
unsigned long ret_ip;
int reserved_words;
struct fprobe *fp;
@@ -260,14 +263,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
if (WARN_ON_ONCE(!fregs))
return 0;
- first = node = find_first_fprobe_node(func);
- if (unlikely(!first))
- return 0;
-
+ rcu_read_lock();
+ head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
reserved_words = 0;
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
if (!fp || !fp->exit_handler)
continue;
@@ -278,17 +279,19 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
reserved_words +=
FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
}
- node = first;
+ rcu_read_unlock();
if (reserved_words) {
fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
if (unlikely(!fgraph_data)) {
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rcu_read_lock();
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
if (fp && !fprobe_disabled(fp))
fp->nmissed++;
}
+ rcu_read_unlock();
return 0;
}
}
@@ -299,12 +302,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
*/
ret_ip = ftrace_regs_get_return_address(fregs);
used = 0;
- hlist_for_each_entry_from_rcu(node, hlist) {
+ rhl_for_each_entry_rcu(node, pos, head, hlist) {
int data_size;
void *data;
if (node->addr != func)
- break;
+ continue;
fp = READ_ONCE(node->fp);
if (!fp || fprobe_disabled(fp))
continue;
@@ -448,25 +451,21 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
return 0;
}
-static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
- struct fprobe_addr_list *alist)
+static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
+ struct fprobe_addr_list *alist)
{
- struct fprobe_hlist_node *node;
int ret = 0;
- hlist_for_each_entry_rcu(node, head, hlist,
- lockdep_is_held(&fprobe_mutex)) {
- if (!within_module(node->addr, mod))
- continue;
- if (delete_fprobe_node(node))
- continue;
- /*
- * If failed to update alist, just continue to update hlist.
- * Therefore, at list user handler will not hit anymore.
- */
- if (!ret)
- ret = fprobe_addr_list_add(alist, node->addr);
- }
+ if (!within_module(node->addr, mod))
+ return;
+ if (delete_fprobe_node(node))
+ return;
+ /*
+ * If failed to update alist, just continue to update hlist.
+ * Therefore, at list user handler will not hit anymore.
+ */
+ if (!ret)
+ ret = fprobe_addr_list_add(alist, node->addr);
}
/* Handle module unloading to manage fprobe_ip_table. */
@@ -474,8 +473,9 @@ static int fprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
+ struct fprobe_hlist_node *node;
+ struct rhashtable_iter iter;
struct module *mod = data;
- int i;
if (val != MODULE_STATE_GOING)
return NOTIFY_DONE;
@@ -486,8 +486,16 @@ static int fprobe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;
mutex_lock(&fprobe_mutex);
- for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
- fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
+ rhltable_walk_enter(&fprobe_ip_table, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+ while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
+ fprobe_remove_node_in_module(mod, node, &alist);
+
+ rhashtable_walk_stop(&iter);
+ } while (node == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&iter);
if (alist.index < alist.size && alist.index > 0)
ftrace_set_filter_ips(&fprobe_graph_ops.ops,
@@ -727,8 +735,16 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
ret = fprobe_graph_add_ips(addrs, num);
if (!ret) {
add_fprobe_hash(fp);
- for (i = 0; i < hlist_array->size; i++)
- insert_fprobe_node(&hlist_array->array[i]);
+ for (i = 0; i < hlist_array->size; i++) {
+ ret = insert_fprobe_node(&hlist_array->array[i]);
+ if (ret)
+ break;
+ }
+ /* fallback on insert error */
+ if (ret) {
+ for (i--; i >= 0; i--)
+ delete_fprobe_node(&hlist_array->array[i]);
+ }
}
mutex_unlock(&fprobe_mutex);
@@ -824,3 +840,10 @@ int unregister_fprobe(struct fprobe *fp)
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);
+
+static int __init fprobe_initcall(void)
+{
+ rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
+ return 0;
+}
+late_initcall(fprobe_initcall);
--
2.50.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v4 1/4] fprobe: use rhltable for fprobe_ip_table
2025-08-15 6:47 ` [PATCH bpf-next v4 1/4] fprobe: use rhltable " Menglong Dong
@ 2025-08-16 14:50 ` Masami Hiramatsu
2025-08-17 1:43 ` Menglong Dong
0 siblings, 1 reply; 7+ messages in thread
From: Masami Hiramatsu @ 2025-08-16 14:50 UTC (permalink / raw)
To: Menglong Dong
Cc: olsajiri, rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
Hi Menglong,
Sorry, one more thing.
> @@ -260,14 +263,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> if (WARN_ON_ONCE(!fregs))
> return 0;
>
> - first = node = find_first_fprobe_node(func);
> - if (unlikely(!first))
> - return 0;
> -
> + rcu_read_lock();
Actually, we don't need these rcu_read_lock() in this function, because
the caller function_graph_enter_regs() uses ftrace_test_recursion_trylock()
which disables preemption. Thus we don't need to do this again here.
> + head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
> reserved_words = 0;
> - hlist_for_each_entry_from_rcu(node, hlist) {
> + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> if (node->addr != func)
> - break;
> + continue;
> fp = READ_ONCE(node->fp);
> if (!fp || !fp->exit_handler)
> continue;
> @@ -278,17 +279,19 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> reserved_words +=
> FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
> }
> - node = first;
> + rcu_read_unlock();
> if (reserved_words) {
> fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
> if (unlikely(!fgraph_data)) {
> - hlist_for_each_entry_from_rcu(node, hlist) {
> + rcu_read_lock();
Ditto.
> + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> if (node->addr != func)
> - break;
> + continue;
> fp = READ_ONCE(node->fp);
> if (fp && !fprobe_disabled(fp))
> fp->nmissed++;
> }
> + rcu_read_unlock();
> return 0;
> }
> }
Thank you,
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH bpf-next v4 1/4] fprobe: use rhltable for fprobe_ip_table
2025-08-16 14:50 ` Masami Hiramatsu
@ 2025-08-17 1:43 ` Menglong Dong
0 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-08-17 1:43 UTC (permalink / raw)
To: Masami Hiramatsu
Cc: olsajiri, rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
On Sat, Aug 16, 2025 at 10:50 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi Menglong,
>
> Sorry, one more thing.
>
> > @@ -260,14 +263,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> > if (WARN_ON_ONCE(!fregs))
> > return 0;
> >
> > - first = node = find_first_fprobe_node(func);
> > - if (unlikely(!first))
> > - return 0;
> > -
> > + rcu_read_lock();
>
> Actually, we don't need these rcu_read_lock() in this function, because
> the caller function_graph_enter_regs() uses ftrace_test_recursion_trylock()
> which disables preemption. Thus we don't need to do this again here.
Yeah, I understand it now. I wondered about this part
for a long time, as I didn't see any RCU lock in the
fprobe_entry(). Thanks for the explanation ;)
I'll send a V5 now.
>
> > + head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
> > reserved_words = 0;
> > - hlist_for_each_entry_from_rcu(node, hlist) {
> > + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> > if (node->addr != func)
> > - break;
> > + continue;
> > fp = READ_ONCE(node->fp);
> > if (!fp || !fp->exit_handler)
> > continue;
> > @@ -278,17 +279,19 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
> > reserved_words +=
> > FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
> > }
> > - node = first;
> > + rcu_read_unlock();
> > if (reserved_words) {
> > fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
> > if (unlikely(!fgraph_data)) {
> > - hlist_for_each_entry_from_rcu(node, hlist) {
> > + rcu_read_lock();
>
> Ditto.
>
> > + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> > if (node->addr != func)
> > - break;
> > + continue;
> > fp = READ_ONCE(node->fp);
> > if (fp && !fprobe_disabled(fp))
> > fp->nmissed++;
> > }
> > + rcu_read_unlock();
> > return 0;
> > }
> > }
>
> Thank you,
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH bpf-next v4 2/4] selftests/bpf: move get_ksyms and get_addrs to trace_helpers.c
2025-08-15 6:47 [PATCH bpf-next v4 0/4] fprobe: use rhashtable for fprobe_ip_table Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 1/4] fprobe: use rhltable " Menglong Dong
@ 2025-08-15 6:47 ` Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 3/4] selftests/bpf: skip recursive functions for kprobe_multi Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 4/4] selftests/bpf: add benchmark testing for kprobe-multi-all Menglong Dong
3 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-08-15 6:47 UTC (permalink / raw)
To: mhiramat, olsajiri
Cc: rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
We need to get all the kernel function that can be traced sometimes, so we
move the get_syms() and get_addrs() in kprobe_multi_test.c to
trace_helpers.c and rename it to bpf_get_ksyms() and bpf_get_addrs().
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
.../bpf/prog_tests/kprobe_multi_test.c | 220 +-----------------
tools/testing/selftests/bpf/trace_helpers.c | 214 +++++++++++++++++
tools/testing/selftests/bpf/trace_helpers.h | 3 +
3 files changed, 220 insertions(+), 217 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e19ef509ebf8..171706e78da8 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -422,220 +422,6 @@ static void test_unique_match(void)
kprobe_multi__destroy(skel);
}
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
-{
- return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
- return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
- if (kernel && strchr(buf, '['))
- return true;
- if (!kernel && !strchr(buf, '['))
- return true;
- return false;
-}
-
-static bool skip_entry(char *name)
-{
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- return true;
- if (!strcmp(name, "default_idle"))
- return true;
- if (!strncmp(name, "rcu_", 4))
- return true;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- return true;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
- return true;
- return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
- const char *res1, *res2;
- int len1, len2;
-
- res1 = strstr(name1, ".llvm.");
- res2 = strstr(name2, ".llvm.");
- len1 = res1 ? res1 - name1 : strlen(name1);
- len2 = res2 ? res2 - name2 : strlen(name2);
-
- if (len1 == len2)
- return strncmp(name1, name2, len1);
- if (len1 < len2)
- return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
- return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
- return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
- return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
- size_t cap = 0, cnt = 0;
- char *name = NULL, *ksym_name, **syms = NULL;
- struct hashmap *map;
- struct ksyms *ksyms;
- struct ksym *ks;
- char buf[256];
- FILE *f;
- int err = 0;
-
- ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
- if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
- return -EINVAL;
-
- /*
- * The available_filter_functions contains many duplicates,
- * but other than that all symbols are usable in kprobe multi
- * interface.
- * Filtering out duplicates by using hashmap__add, which won't
- * add existing entry.
- */
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
- if (!f)
- return -EINVAL;
-
- map = hashmap__new(symbol_hash, symbol_equal, NULL);
- if (IS_ERR(map)) {
- err = libbpf_get_error(map);
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
- continue;
- if (skip_entry(name))
- continue;
-
- ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
- if (!ks) {
- err = -EINVAL;
- goto error;
- }
-
- ksym_name = ks->name;
- err = hashmap__add(map, ksym_name, 0);
- if (err == -EEXIST) {
- err = 0;
- continue;
- }
- if (err)
- goto error;
-
- err = libbpf_ensure_mem((void **) &syms, &cap,
- sizeof(*syms), cnt + 1);
- if (err)
- goto error;
-
- syms[cnt++] = ksym_name;
- }
-
- *symsp = syms;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- hashmap__free(map);
- if (err)
- free(syms);
- return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
- unsigned long *addr, *addrs, *tmp_addrs;
- int err = 0, max_cnt, inc_cnt;
- char *name = NULL;
- size_t cnt = 0;
- char buf[256];
- FILE *f;
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
- if (!f)
- return -ENOENT;
-
- /* In my local setup, the number of entries is 50k+ so Let us initially
- * allocate space to hold 64k entries. If 64k is not enough, incrementally
- * increase 1k each time.
- */
- max_cnt = 65536;
- inc_cnt = 1024;
- addrs = malloc(max_cnt * sizeof(long));
- if (addrs == NULL) {
- err = -ENOMEM;
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
- continue;
- if (skip_entry(name))
- continue;
-
- if (cnt == max_cnt) {
- max_cnt += inc_cnt;
- tmp_addrs = realloc(addrs, max_cnt);
- if (!tmp_addrs) {
- err = -ENOMEM;
- goto error;
- }
- addrs = tmp_addrs;
- }
-
- addrs[cnt++] = (unsigned long)addr;
- }
-
- *addrsp = addrs;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- if (err)
- free(addrs);
- return err;
-}
-
static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
long attach_start_ns, attach_end_ns;
@@ -670,7 +456,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
char **syms = NULL;
size_t cnt = 0;
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -696,13 +482,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
size_t cnt = 0;
int err;
- err = get_addrs(&addrs, &cnt, kernel);
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
if (err == -ENOENT) {
test__skip();
return;
}
- if (!ASSERT_OK(err, "get_addrs"))
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
return;
skel = kprobe_multi_empty__open_and_load();
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 81943c6254e6..d24baf244d1f 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -17,6 +17,7 @@
#include <linux/limits.h>
#include <libelf.h>
#include <gelf.h>
+#include "bpf/hashmap.h"
#include "bpf/libbpf_internal.h"
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
@@ -519,3 +520,216 @@ void read_trace_pipe(void)
{
read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
}
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static bool skip_entry(char *name)
+{
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+ return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
+ struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ksyms)
+ return -EINVAL;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable to trace.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = ksym_name;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err)
+ free(syms);
+ return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt);
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
+ }
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 2ce873c9f9aa..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr);
int read_build_id(const char *path, char *build_id, size_t size);
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
+
#endif
--
2.50.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH bpf-next v4 3/4] selftests/bpf: skip recursive functions for kprobe_multi
2025-08-15 6:47 [PATCH bpf-next v4 0/4] fprobe: use rhashtable for fprobe_ip_table Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 1/4] fprobe: use rhltable " Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 2/4] selftests/bpf: move get_ksyms and get_addrs to trace_helpers.c Menglong Dong
@ 2025-08-15 6:47 ` Menglong Dong
2025-08-15 6:47 ` [PATCH bpf-next v4 4/4] selftests/bpf: add benchmark testing for kprobe-multi-all Menglong Dong
3 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-08-15 6:47 UTC (permalink / raw)
To: mhiramat, olsajiri
Cc: rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
Some functions is recursive for the kprobe_multi and impact the benchmark
results. So just skip them.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
tools/testing/selftests/bpf/trace_helpers.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index d24baf244d1f..9da9da51b132 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -559,6 +559,22 @@ static bool skip_entry(char *name)
if (!strncmp(name, "__ftrace_invalid_address__",
sizeof("__ftrace_invalid_address__") - 1))
return true;
+
+ if (!strcmp(name, "migrate_disable"))
+ return true;
+ if (!strcmp(name, "migrate_enable"))
+ return true;
+ if (!strcmp(name, "rcu_read_unlock_strict"))
+ return true;
+ if (!strcmp(name, "preempt_count_add"))
+ return true;
+ if (!strcmp(name, "preempt_count_sub"))
+ return true;
+ if (!strcmp(name, "__rcu_read_lock"))
+ return true;
+ if (!strcmp(name, "__rcu_read_unlock"))
+ return true;
+
return false;
}
--
2.50.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH bpf-next v4 4/4] selftests/bpf: add benchmark testing for kprobe-multi-all
2025-08-15 6:47 [PATCH bpf-next v4 0/4] fprobe: use rhashtable for fprobe_ip_table Menglong Dong
` (2 preceding siblings ...)
2025-08-15 6:47 ` [PATCH bpf-next v4 3/4] selftests/bpf: skip recursive functions for kprobe_multi Menglong Dong
@ 2025-08-15 6:47 ` Menglong Dong
3 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-08-15 6:47 UTC (permalink / raw)
To: mhiramat, olsajiri
Cc: rostedt, mathieu.desnoyers, hca, revest, linux-kernel,
linux-trace-kernel, bpf
For now, the benchmark for kprobe-multi is single, which means there is
only 1 function is hooked during testing. Add the testing
"kprobe-multi-all", which will hook all the kernel functions during
the benchmark. And the "kretprobe-multi-all" is added too.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v3:
- add testcase kretprobe-multi-all
- attach a empty kprobe-multi/kretprobe-multi prog to all the kernel
functions except bpf_get_numa_node_id to make the bench result more
accurate
---
tools/testing/selftests/bpf/bench.c | 4 ++
.../selftests/bpf/benchs/bench_trigger.c | 54 +++++++++++++++++++
.../selftests/bpf/benchs/run_bench_trigger.sh | 4 +-
.../selftests/bpf/progs/trigger_bench.c | 12 +++++
tools/testing/selftests/bpf/trace_helpers.c | 3 ++
5 files changed, 75 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index ddd73d06a1eb..29dbf937818a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -510,6 +510,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
@@ -578,6 +580,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 82327657846e..c6634a64a7c0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -226,6 +226,58 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ printf("failed to get ksyms\n");
+ exit(1);
+ }
+
+ printf("found %zu ksyms\n", cnt);
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ printf("failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
static void trigger_fexit_setup(void)
{
setup_ctx();
@@ -512,6 +564,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
- kprobe kprobe-multi \
- kretprobe kretprobe-multi \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
)
tests=("$@")
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 044a6d78923e..3d5f30c29ae3 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -97,6 +97,12 @@ int bench_trigger_kprobe_multi(void *ctx)
return 0;
}
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
@@ -104,6 +110,12 @@ int bench_trigger_kretprobe_multi(void *ctx)
return 0;
}
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9da9da51b132..78cf1aab09d8 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -575,6 +575,9 @@ static bool skip_entry(char *name)
if (!strcmp(name, "__rcu_read_unlock"))
return true;
+ if (!strcmp(name, "bpf_get_numa_node_id"))
+ return true;
+
return false;
}
--
2.50.1
^ permalink raw reply related [flat|nested] 7+ messages in thread