[PATCH v2 0/2] tracing: fprobe: optimization for entry only case

linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 0/2] tracing: fprobe: optimization for entry only case
@ 2025-10-10  3:38 Menglong Dong
  2025-10-10  3:38 ` [PATCH v2 1/2] " Menglong Dong
  2025-10-10  3:38 ` [PATCH v2 2/2] lib/test_fprobe: add testcase for mixed fprobe Menglong Dong
  0 siblings, 2 replies; 7+ messages in thread
From: Menglong Dong @ 2025-10-10  3:38 UTC (permalink / raw)
  To: mhiramat
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

The first patch is to optimize the fprobe for entry only case.
The second patch is to add testcase for mixed fprobe.

This series base on the "linux-trace" tree and "for-next" branch.

Changes since V1:
* merge the rename of fprobe_entry into one
* add some document for fprobe_fgraph_entry as Masami suggested
* use ftrace_test_recursion_trylock() in fprobe_ftrace_entry()
* add the testcase for mixed fprobe

Menglong Dong (2):
  tracing: fprobe: optimization for entry only case
  lib/test_fprobe: add testcase for mixed fprobe

 kernel/trace/fprobe.c   | 104 +++++++++++++++++++++++++++++++++++-----
 lib/tests/test_fprobe.c |  99 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 191 insertions(+), 12 deletions(-)

-- 
2.51.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/2] tracing: fprobe: optimization for entry only case
  2025-10-10  3:38 [PATCH v2 0/2] tracing: fprobe: optimization for entry only case Menglong Dong
@ 2025-10-10  3:38 ` Menglong Dong
  2025-10-12  4:07   ` Masami Hiramatsu
  2025-10-14 14:51   ` Masami Hiramatsu
  2025-10-10  3:38 ` [PATCH v2 2/2] lib/test_fprobe: add testcase for mixed fprobe Menglong Dong
  1 sibling, 2 replies; 7+ messages in thread
From: Menglong Dong @ 2025-10-10  3:38 UTC (permalink / raw)
  To: mhiramat
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

For now, fgraph is used for the fprobe, even if we need trace the entry
only. However, the performance of ftrace is better than fgraph, and we
can use ftrace_ops for this case.

Then performance of kprobe-multi increases from 54M to 69M. Before this
commit:

  $ ./benchs/run_bench_trigger.sh kprobe-multi
  kprobe-multi   :   54.663 ± 0.493M/s

After this commit:

  $ ./benchs/run_bench_trigger.sh kprobe-multi
  kprobe-multi   :   69.447 ± 0.143M/s

Mitigation is disable during the bench testing above.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v2:
- add some document for fprobe_fgraph_entry as Masami suggested
- merge the rename of fprobe_entry into current patch
- use ftrace_test_recursion_trylock() in fprobe_ftrace_entry()
---
 kernel/trace/fprobe.c | 104 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 93 insertions(+), 11 deletions(-)

diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 99d83c08b9e2..bb02d6d09d6a 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -254,8 +254,9 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
 	return ret;
 }
 
-static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
-			struct ftrace_regs *fregs)
+/* fgraph_ops callback, this processes fprobes which have exit_handler. */
+static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+			       struct ftrace_regs *fregs)
 {
 	unsigned long *fgraph_data = NULL;
 	unsigned long func = trace->func;
@@ -292,7 +293,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
 				if (node->addr != func)
 					continue;
 				fp = READ_ONCE(node->fp);
-				if (fp && !fprobe_disabled(fp))
+				if (fp && !fprobe_disabled(fp) && fp->exit_handler)
 					fp->nmissed++;
 			}
 			return 0;
@@ -312,11 +313,11 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
 		if (node->addr != func)
 			continue;
 		fp = READ_ONCE(node->fp);
-		if (!fp || fprobe_disabled(fp))
+		if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler))
 			continue;
 
 		data_size = fp->entry_data_size;
-		if (data_size && fp->exit_handler)
+		if (data_size)
 			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
 		else
 			data = NULL;
@@ -327,7 +328,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
 			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
 
 		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
-		if (!ret && fp->exit_handler) {
+		if (!ret) {
 			int size_words = SIZE_IN_LONG(data_size);
 
 			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
@@ -340,7 +341,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
 	/* If any exit_handler is set, data must be used. */
 	return used != 0;
 }
-NOKPROBE_SYMBOL(fprobe_entry);
+NOKPROBE_SYMBOL(fprobe_fgraph_entry);
 
 static void fprobe_return(struct ftrace_graph_ret *trace,
 			  struct fgraph_ops *gops,
@@ -379,11 +380,82 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
 NOKPROBE_SYMBOL(fprobe_return);
 
 static struct fgraph_ops fprobe_graph_ops = {
-	.entryfunc	= fprobe_entry,
+	.entryfunc	= fprobe_fgraph_entry,
 	.retfunc	= fprobe_return,
 };
 static int fprobe_graph_active;
 
+/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
+static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
+	struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct fprobe_hlist_node *node;
+	struct rhlist_head *head, *pos;
+	struct fprobe *fp;
+	int bit;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
+	rcu_read_lock();
+	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
+
+	rhl_for_each_entry_rcu(node, pos, head, hlist) {
+		if (node->addr != ip)
+			break;
+		fp = READ_ONCE(node->fp);
+		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
+			continue;
+
+		if (fprobe_shared_with_kprobes(fp))
+			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
+		else
+			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
+	}
+	rcu_read_unlock();
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(fprobe_ftrace_entry);
+
+static struct ftrace_ops fprobe_ftrace_ops = {
+	.func	= fprobe_ftrace_entry,
+	.flags	= FTRACE_OPS_FL_SAVE_REGS,
+};
+static int fprobe_ftrace_active;
+
+static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
+{
+	int ret;
+
+	lockdep_assert_held(&fprobe_mutex);
+
+	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
+	if (ret)
+		return ret;
+
+	if (!fprobe_ftrace_active) {
+		ret = register_ftrace_function(&fprobe_ftrace_ops);
+		if (ret) {
+			ftrace_free_filter(&fprobe_ftrace_ops);
+			return ret;
+		}
+	}
+	fprobe_ftrace_active++;
+	return 0;
+}
+
+static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
+{
+	lockdep_assert_held(&fprobe_mutex);
+
+	fprobe_ftrace_active--;
+	if (!fprobe_ftrace_active)
+		unregister_ftrace_function(&fprobe_ftrace_ops);
+	if (num)
+		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
+}
+
 /* Add @addrs to the ftrace filter and register fgraph if needed. */
 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
 {
@@ -498,9 +570,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
 	} while (node == ERR_PTR(-EAGAIN));
 	rhashtable_walk_exit(&iter);
 
-	if (alist.index > 0)
+	if (alist.index > 0) {
 		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
 				      alist.addrs, alist.index, 1, 0);
+		ftrace_set_filter_ips(&fprobe_ftrace_ops,
+				      alist.addrs, alist.index, 1, 0);
+	}
 	mutex_unlock(&fprobe_mutex);
 
 	kfree(alist.addrs);
@@ -733,7 +808,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
 	mutex_lock(&fprobe_mutex);
 
 	hlist_array = fp->hlist_array;
-	ret = fprobe_graph_add_ips(addrs, num);
+	if (fp->exit_handler)
+		ret = fprobe_graph_add_ips(addrs, num);
+	else
+		ret = fprobe_ftrace_add_ips(addrs, num);
+
 	if (!ret) {
 		add_fprobe_hash(fp);
 		for (i = 0; i < hlist_array->size; i++) {
@@ -829,7 +908,10 @@ int unregister_fprobe(struct fprobe *fp)
 	}
 	del_fprobe_hash(fp);
 
-	fprobe_graph_remove_ips(addrs, count);
+	if (fp->exit_handler)
+		fprobe_graph_remove_ips(addrs, count);
+	else
+		fprobe_ftrace_remove_ips(addrs, count);
 
 	kfree_rcu(hlist_array, rcu);
 	fp->hlist_array = NULL;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/2] lib/test_fprobe: add testcase for mixed fprobe
  2025-10-10  3:38 [PATCH v2 0/2] tracing: fprobe: optimization for entry only case Menglong Dong
  2025-10-10  3:38 ` [PATCH v2 1/2] " Menglong Dong
@ 2025-10-10  3:38 ` Menglong Dong
  1 sibling, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-10-10  3:38 UTC (permalink / raw)
  To: mhiramat
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

Add the testcase for the fprobe, which will hook the same target with two
fprobe: entry, entry+exit. And the two fprobes will be registered with
different order.

fgraph and ftrace are both used for the fprobe, and this testcase is for
the mixed situation.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
 lib/tests/test_fprobe.c | 99 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 1 deletion(-)

diff --git a/lib/tests/test_fprobe.c b/lib/tests/test_fprobe.c
index cf92111b5c79..108c7aa33cb4 100644
--- a/lib/tests/test_fprobe.c
+++ b/lib/tests/test_fprobe.c
@@ -12,7 +12,8 @@
 
 static struct kunit *current_test;
 
-static u32 rand1, entry_val, exit_val;
+static u32 rand1, entry_only_val, entry_val, exit_val;
+static u32 entry_only_count, entry_count, exit_count;
 
 /* Use indirect calls to avoid inlining the target functions */
 static u32 (*target)(u32 value);
@@ -190,6 +191,101 @@ static void test_fprobe_skip(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp));
 }
 
+/* Handler for fprobe entry only case */
+static notrace int entry_only_handler(struct fprobe *fp, unsigned long ip,
+				      unsigned long ret_ip,
+				      struct ftrace_regs *fregs, void *data)
+{
+	KUNIT_EXPECT_FALSE(current_test, preemptible());
+	KUNIT_EXPECT_EQ(current_test, ip, target_ip);
+
+	entry_only_count++;
+	entry_only_val = (rand1 / div_factor);
+
+	return 0;
+}
+
+static notrace int fprobe_entry_multi_handler(struct fprobe *fp, unsigned long ip,
+					      unsigned long ret_ip,
+					      struct ftrace_regs *fregs,
+					      void *data)
+{
+	KUNIT_EXPECT_FALSE(current_test, preemptible());
+	KUNIT_EXPECT_EQ(current_test, ip, target_ip);
+
+	entry_count++;
+	entry_val = (rand1 / div_factor);
+
+	return 0;
+}
+
+static notrace void fprobe_exit_multi_handler(struct fprobe *fp, unsigned long ip,
+					      unsigned long ret_ip,
+					      struct ftrace_regs *fregs,
+					      void *data)
+{
+	unsigned long ret = ftrace_regs_get_return_value(fregs);
+
+	KUNIT_EXPECT_FALSE(current_test, preemptible());
+	KUNIT_EXPECT_EQ(current_test, ip, target_ip);
+	KUNIT_EXPECT_EQ(current_test, ret, (rand1 / div_factor));
+
+	exit_count++;
+	exit_val = ret;
+}
+
+static void check_fprobe_multi(struct kunit *test)
+{
+	entry_only_count = entry_count = exit_count = 0;
+	entry_only_val = entry_val = exit_val = 0;
+
+	target(rand1);
+
+	/* Verify all handlers were called */
+	KUNIT_EXPECT_EQ(test, 1, entry_only_count);
+	KUNIT_EXPECT_EQ(test, 1, entry_count);
+	KUNIT_EXPECT_EQ(test, 1, exit_count);
+
+	/* Verify values are correct */
+	KUNIT_EXPECT_EQ(test, (rand1 / div_factor), entry_only_val);
+	KUNIT_EXPECT_EQ(test, (rand1 / div_factor), entry_val);
+	KUNIT_EXPECT_EQ(test, (rand1 / div_factor), exit_val);
+}
+
+/* Test multiple fprobes hooking the same target function */
+static void test_fprobe_multi(struct kunit *test)
+{
+	struct fprobe fp1 = {
+		.entry_handler = fprobe_entry_multi_handler,
+		.exit_handler = fprobe_exit_multi_handler,
+	};
+	struct fprobe fp2 = {
+		.entry_handler = entry_only_handler,
+	};
+
+	current_test = test;
+
+	/* Test Case 1: Register in order 1 -> 2 */
+	KUNIT_EXPECT_EQ(test, 0, register_fprobe(&fp1, "fprobe_selftest_target", NULL));
+	KUNIT_EXPECT_EQ(test, 0, register_fprobe(&fp2, "fprobe_selftest_target", NULL));
+
+	check_fprobe_multi(test);
+
+	/* Unregister all */
+	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp1));
+	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp2));
+
+	/* Test Case 2: Register in order 2 -> 1 */
+	KUNIT_EXPECT_EQ(test, 0, register_fprobe(&fp2, "fprobe_selftest_target", NULL));
+	KUNIT_EXPECT_EQ(test, 0, register_fprobe(&fp1, "fprobe_selftest_target", NULL));
+
+	check_fprobe_multi(test);
+
+	/* Unregister all */
+	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp1));
+	KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp2));
+}
+
 static unsigned long get_ftrace_location(void *func)
 {
 	unsigned long size, addr = (unsigned long)func;
@@ -217,6 +313,7 @@ static struct kunit_case fprobe_testcases[] = {
 	KUNIT_CASE(test_fprobe_syms),
 	KUNIT_CASE(test_fprobe_data),
 	KUNIT_CASE(test_fprobe_skip),
+	KUNIT_CASE(test_fprobe_multi),
 	{}
 };
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] tracing: fprobe: optimization for entry only case
  2025-10-10  3:38 ` [PATCH v2 1/2] " Menglong Dong
@ 2025-10-12  4:07   ` Masami Hiramatsu
  2025-10-13  1:20     ` Menglong Dong
  2025-10-14 14:51   ` Masami Hiramatsu
  1 sibling, 1 reply; 7+ messages in thread
From: Masami Hiramatsu @ 2025-10-12  4:07 UTC (permalink / raw)
  To: Menglong Dong
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

Hi Menglong,

On Fri, 10 Oct 2025 11:38:46 +0800
Menglong Dong <menglong8.dong@gmail.com> wrote:

> For now, fgraph is used for the fprobe, even if we need trace the entry
> only. However, the performance of ftrace is better than fgraph, and we
> can use ftrace_ops for this case.
> 
> Then performance of kprobe-multi increases from 54M to 69M. Before this
> commit:
> 
>   $ ./benchs/run_bench_trigger.sh kprobe-multi
>   kprobe-multi   :   54.663 ± 0.493M/s
> 
> After this commit:
> 
>   $ ./benchs/run_bench_trigger.sh kprobe-multi
>   kprobe-multi   :   69.447 ± 0.143M/s
> 
> Mitigation is disable during the bench testing above.
> 

Thanks for updating!

This looks good to me. Just a nit comment below;

[...]
> @@ -379,11 +380,82 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
>  NOKPROBE_SYMBOL(fprobe_return);
>  
>  static struct fgraph_ops fprobe_graph_ops = {
> -	.entryfunc	= fprobe_entry,
> +	.entryfunc	= fprobe_fgraph_entry,
>  	.retfunc	= fprobe_return,
>  };
>  static int fprobe_graph_active;
>  
> +/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
> +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> +	struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> +	struct fprobe_hlist_node *node;
> +	struct rhlist_head *head, *pos;
> +	struct fprobe *fp;
> +	int bit;
> +
> +	bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +	if (bit < 0)
> +		return;
> +

nit: We'd better to explain why we need this here too;

	/*
	 * ftrace_test_recursion_trylock() disables preemption, but
	 * rhltable_lookup() checks whether rcu_read_lcok is held.
	 * So we take rcu_read_lock() here.
	 */

> +	rcu_read_lock();
> +	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> +
> +	rhl_for_each_entry_rcu(node, pos, head, hlist) {
> +		if (node->addr != ip)
> +			break;
> +		fp = READ_ONCE(node->fp);
> +		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> +			continue;
> +
> +		if (fprobe_shared_with_kprobes(fp))
> +			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> +		else
> +			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> +	}
> +	rcu_read_unlock();
> +	ftrace_test_recursion_unlock(bit);
> +}
> +NOKPROBE_SYMBOL(fprobe_ftrace_entry);

Thank you,

> +
> +static struct ftrace_ops fprobe_ftrace_ops = {
> +	.func	= fprobe_ftrace_entry,
> +	.flags	= FTRACE_OPS_FL_SAVE_REGS,
> +};
> +static int fprobe_ftrace_active;
> +
> +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> +{
> +	int ret;
> +
> +	lockdep_assert_held(&fprobe_mutex);
> +
> +	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> +	if (ret)
> +		return ret;
> +
> +	if (!fprobe_ftrace_active) {
> +		ret = register_ftrace_function(&fprobe_ftrace_ops);
> +		if (ret) {
> +			ftrace_free_filter(&fprobe_ftrace_ops);
> +			return ret;
> +		}
> +	}
> +	fprobe_ftrace_active++;
> +	return 0;
> +}
> +
> +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> +{
> +	lockdep_assert_held(&fprobe_mutex);
> +
> +	fprobe_ftrace_active--;
> +	if (!fprobe_ftrace_active)
> +		unregister_ftrace_function(&fprobe_ftrace_ops);
> +	if (num)
> +		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> +}
> +
>  /* Add @addrs to the ftrace filter and register fgraph if needed. */
>  static int fprobe_graph_add_ips(unsigned long *addrs, int num)
>  {
> @@ -498,9 +570,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
>  	} while (node == ERR_PTR(-EAGAIN));
>  	rhashtable_walk_exit(&iter);
>  
> -	if (alist.index > 0)
> +	if (alist.index > 0) {
>  		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
>  				      alist.addrs, alist.index, 1, 0);
> +		ftrace_set_filter_ips(&fprobe_ftrace_ops,
> +				      alist.addrs, alist.index, 1, 0);
> +	}
>  	mutex_unlock(&fprobe_mutex);
>  
>  	kfree(alist.addrs);
> @@ -733,7 +808,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
>  	mutex_lock(&fprobe_mutex);
>  
>  	hlist_array = fp->hlist_array;
> -	ret = fprobe_graph_add_ips(addrs, num);
> +	if (fp->exit_handler)
> +		ret = fprobe_graph_add_ips(addrs, num);
> +	else
> +		ret = fprobe_ftrace_add_ips(addrs, num);
> +
>  	if (!ret) {
>  		add_fprobe_hash(fp);
>  		for (i = 0; i < hlist_array->size; i++) {
> @@ -829,7 +908,10 @@ int unregister_fprobe(struct fprobe *fp)
>  	}
>  	del_fprobe_hash(fp);
>  
> -	fprobe_graph_remove_ips(addrs, count);
> +	if (fp->exit_handler)
> +		fprobe_graph_remove_ips(addrs, count);
> +	else
> +		fprobe_ftrace_remove_ips(addrs, count);
>  
>  	kfree_rcu(hlist_array, rcu);
>  	fp->hlist_array = NULL;
> -- 
> 2.51.0
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] tracing: fprobe: optimization for entry only case
  2025-10-12  4:07   ` Masami Hiramatsu
@ 2025-10-13  1:20     ` Menglong Dong
  0 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-10-13  1:20 UTC (permalink / raw)
  To: Menglong Dong, Masami Hiramatsu
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

On 2025/10/12 12:07, Masami Hiramatsu wrote:
> Hi Menglong,
> 
> On Fri, 10 Oct 2025 11:38:46 +0800
> Menglong Dong <menglong8.dong@gmail.com> wrote:
> 
> > For now, fgraph is used for the fprobe, even if we need trace the entry
> > only. However, the performance of ftrace is better than fgraph, and we
> > can use ftrace_ops for this case.
> > 
> > Then performance of kprobe-multi increases from 54M to 69M. Before this
> > commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   54.663 ± 0.493M/s
> > 
> > After this commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   69.447 ± 0.143M/s
> > 
> > Mitigation is disable during the bench testing above.
> > 
> 
> Thanks for updating!
> 
> This looks good to me. Just a nit comment below;
> 
> [...]
> > @@ -379,11 +380,82 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
> >  NOKPROBE_SYMBOL(fprobe_return);
> >  
> >  static struct fgraph_ops fprobe_graph_ops = {
> > -	.entryfunc	= fprobe_entry,
> > +	.entryfunc	= fprobe_fgraph_entry,
> >  	.retfunc	= fprobe_return,
> >  };
> >  static int fprobe_graph_active;
> >  
> > +/* ftrace_ops callback, this processes fprobes which have only entry_handler. */
> > +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> > +	struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > +	struct fprobe_hlist_node *node;
> > +	struct rhlist_head *head, *pos;
> > +	struct fprobe *fp;
> > +	int bit;
> > +
> > +	bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > +	if (bit < 0)
> > +		return;
> > +
> 
> nit: We'd better to explain why we need this here too;
> 
> 	/*
> 	 * ftrace_test_recursion_trylock() disables preemption, but
> 	 * rhltable_lookup() checks whether rcu_read_lcok is held.
> 	 * So we take rcu_read_lock() here.
> 	 */

It's very nice! I'll send a V3 now.

Thanks!
Menglong Dong

> 
> > +	rcu_read_lock();
> > +	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> > +
> > +	rhl_for_each_entry_rcu(node, pos, head, hlist) {
> > +		if (node->addr != ip)
> > +			break;
> > +		fp = READ_ONCE(node->fp);
> > +		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> > +			continue;
> > +
> > +		if (fprobe_shared_with_kprobes(fp))
> > +			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +		else
> > +			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +	}
> > +	rcu_read_unlock();
> > +	ftrace_test_recursion_unlock(bit);
> > +}
> > +NOKPROBE_SYMBOL(fprobe_ftrace_entry);
> 
> Thank you,
> 
> > +
> > +static struct ftrace_ops fprobe_ftrace_ops = {
> > +	.func	= fprobe_ftrace_entry,
> > +	.flags	= FTRACE_OPS_FL_SAVE_REGS,
> > +};
> > +static int fprobe_ftrace_active;
> > +
> > +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> > +{
> > +	int ret;
> > +
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (!fprobe_ftrace_active) {
> > +		ret = register_ftrace_function(&fprobe_ftrace_ops);
> > +		if (ret) {
> > +			ftrace_free_filter(&fprobe_ftrace_ops);
> > +			return ret;
> > +		}
> > +	}
> > +	fprobe_ftrace_active++;
> > +	return 0;
> > +}
> > +
> > +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> > +{
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	fprobe_ftrace_active--;
> > +	if (!fprobe_ftrace_active)
> > +		unregister_ftrace_function(&fprobe_ftrace_ops);
> > +	if (num)
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> > +}
> > +
> >  /* Add @addrs to the ftrace filter and register fgraph if needed. */
> >  static int fprobe_graph_add_ips(unsigned long *addrs, int num)
> >  {
> > @@ -498,9 +570,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
> >  	} while (node == ERR_PTR(-EAGAIN));
> >  	rhashtable_walk_exit(&iter);
> >  
> > -	if (alist.index > 0)
> > +	if (alist.index > 0) {
> >  		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
> >  				      alist.addrs, alist.index, 1, 0);
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops,
> > +				      alist.addrs, alist.index, 1, 0);
> > +	}
> >  	mutex_unlock(&fprobe_mutex);
> >  
> >  	kfree(alist.addrs);
> > @@ -733,7 +808,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
> >  	mutex_lock(&fprobe_mutex);
> >  
> >  	hlist_array = fp->hlist_array;
> > -	ret = fprobe_graph_add_ips(addrs, num);
> > +	if (fp->exit_handler)
> > +		ret = fprobe_graph_add_ips(addrs, num);
> > +	else
> > +		ret = fprobe_ftrace_add_ips(addrs, num);
> > +
> >  	if (!ret) {
> >  		add_fprobe_hash(fp);
> >  		for (i = 0; i < hlist_array->size; i++) {
> > @@ -829,7 +908,10 @@ int unregister_fprobe(struct fprobe *fp)
> >  	}
> >  	del_fprobe_hash(fp);
> >  
> > -	fprobe_graph_remove_ips(addrs, count);
> > +	if (fp->exit_handler)
> > +		fprobe_graph_remove_ips(addrs, count);
> > +	else
> > +		fprobe_ftrace_remove_ips(addrs, count);
> >  
> >  	kfree_rcu(hlist_array, rcu);
> >  	fp->hlist_array = NULL;
> 
> 
> 





^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] tracing: fprobe: optimization for entry only case
  2025-10-10  3:38 ` [PATCH v2 1/2] " Menglong Dong
  2025-10-12  4:07   ` Masami Hiramatsu
@ 2025-10-14 14:51   ` Masami Hiramatsu
  2025-10-15  7:25     ` Menglong Dong
  1 sibling, 1 reply; 7+ messages in thread
From: Masami Hiramatsu @ 2025-10-14 14:51 UTC (permalink / raw)
  To: Menglong Dong
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

Hi Menglong,

I remember why I haven't implement this.

On Fri, 10 Oct 2025 11:38:46 +0800
Menglong Dong <menglong8.dong@gmail.com> wrote:

> +
> +static struct ftrace_ops fprobe_ftrace_ops = {
> +	.func	= fprobe_ftrace_entry,
> +	.flags	= FTRACE_OPS_FL_SAVE_REGS,

Actually, this flag is the problem. This can fail fprobe on architecture
which does not support CONFIG_DYNAMIC_FTRACE_WITH_REGS (e.g. arm64, riscv)

 * SAVE_REGS - The ftrace_ops wants regs saved at each function called
 *            and passed to the callback. If this flag is set, but the
 *            architecture does not support passing regs
 *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
 *            ftrace_ops will fail to register, unless the next flag
 *            is set.

fgraph has a special entry code for saving ftrace_regs.
So at least we need to fail back to fgraph if arch does not
support CONFIG_DYNAMIC_FTRACE_WITH_REGS.

Thank you,

-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] tracing: fprobe: optimization for entry only case
  2025-10-14 14:51   ` Masami Hiramatsu
@ 2025-10-15  7:25     ` Menglong Dong
  0 siblings, 0 replies; 7+ messages in thread
From: Menglong Dong @ 2025-10-15  7:25 UTC (permalink / raw)
  To: Menglong Dong, Masami Hiramatsu
  Cc: rostedt, mathieu.desnoyers, jiang.biao, linux-kernel,
	linux-trace-kernel, bpf

On 2025/10/14 22:51, Masami Hiramatsu wrote:
> Hi Menglong,
> 
> I remember why I haven't implement this.
> 
> On Fri, 10 Oct 2025 11:38:46 +0800
> Menglong Dong <menglong8.dong@gmail.com> wrote:
> 
> > +
> > +static struct ftrace_ops fprobe_ftrace_ops = {
> > +	.func	= fprobe_ftrace_entry,
> > +	.flags	= FTRACE_OPS_FL_SAVE_REGS,
> 
> Actually, this flag is the problem. This can fail fprobe on architecture
> which does not support CONFIG_DYNAMIC_FTRACE_WITH_REGS (e.g. arm64, riscv)
> 
>  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
>  *            and passed to the callback. If this flag is set, but the
>  *            architecture does not support passing regs
>  *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
>  *            ftrace_ops will fail to register, unless the next flag
>  *            is set.
> 
> fgraph has a special entry code for saving ftrace_regs.
> So at least we need to fail back to fgraph if arch does not
> support CONFIG_DYNAMIC_FTRACE_WITH_REGS.

Ah, I have be working on x86_64 and didn't notice it. You are
right, we do need fallback if CONFIG_DYNAMIC_FTRACE_WITH_REGS
not supported. I'll send a V4 later.

BTW, is the FTRACE_OPS_FL_SAVE_REGS necessary here? I guess
not all architectures save the function argument regs in
fentry_caller() like x86_64, that's why we need it here :/

Thanks!
Menglong Dong

> 
> Thank you,
> 
> 





^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-10-15  7:25 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-10  3:38 [PATCH v2 0/2] tracing: fprobe: optimization for entry only case Menglong Dong
2025-10-10  3:38 ` [PATCH v2 1/2] " Menglong Dong
2025-10-12  4:07   ` Masami Hiramatsu
2025-10-13  1:20     ` Menglong Dong
2025-10-14 14:51   ` Masami Hiramatsu
2025-10-15  7:25     ` Menglong Dong
2025-10-10  3:38 ` [PATCH v2 2/2] lib/test_fprobe: add testcase for mixed fprobe Menglong Dong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).