From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
To: Menglong Dong <menglong8.dong@gmail.com>
Cc: rostedt@goodmis.org, mathieu.desnoyers@efficios.com,
linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
bpf@vger.kernel.org
Subject: Re: [PATCH 2/2] tracing: fprobe: optimization for entry only case
Date: Wed, 24 Sep 2025 09:23:14 +0900 [thread overview]
Message-ID: <20250924092314.4b790ff9fbdb7693717669c2@kernel.org> (raw)
In-Reply-To: <20250923092001.1087678-2-dongml2@chinatelecom.cn>
Hi Menglong,
Please add a cover letter if you make a series of patches.
On Tue, 23 Sep 2025 17:20:01 +0800
Menglong Dong <menglong8.dong@gmail.com> wrote:
> For now, fgraph is used for the fprobe, even if we need trace the entry
> only. However, the performance of ftrace is better than fgraph, and we
> can use ftrace_ops for this case.
>
> Then performance of kprobe-multi increases from 54M to 69M. Before this
> commit:
>
> $ ./benchs/run_bench_trigger.sh kprobe-multi
> kprobe-multi : 54.663 ± 0.493M/s
>
> After this commit:
>
> $ ./benchs/run_bench_trigger.sh kprobe-multi
> kprobe-multi : 69.447 ± 0.143M/s
>
> Mitigation is disable during the bench testing above.
Hmm, indeed. If it is used only for entry, it can use ftrace.
Also, please merge [1/2] and [2/2]. [1/2] is meaningless (and do
nothing) without this change. Moreover, it changes the same file.
You can split the patch if "that cleanup is meaningful independently"
or "that changes different subsystem/component (thus you need an Ack
from another maintainer)".
But basically looks good to me. Just have some nits.
>
> Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> ---
> kernel/trace/fprobe.c | 88 +++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 81 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> index 1785fba367c9..de4ae075548d 100644
> --- a/kernel/trace/fprobe.c
> +++ b/kernel/trace/fprobe.c
> @@ -292,7 +292,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> if (node->addr != func)
> continue;
> fp = READ_ONCE(node->fp);
> - if (fp && !fprobe_disabled(fp))
> + if (fp && !fprobe_disabled(fp) && fp->exit_handler)
> fp->nmissed++;
> }
> return 0;
> @@ -312,11 +312,11 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> if (node->addr != func)
> continue;
> fp = READ_ONCE(node->fp);
> - if (!fp || fprobe_disabled(fp))
> + if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler))
> continue;
>
> data_size = fp->entry_data_size;
> - if (data_size && fp->exit_handler)
> + if (data_size)
> data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
> else
> data = NULL;
> @@ -327,7 +327,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
>
> /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
> - if (!ret && fp->exit_handler) {
> + if (!ret) {
> int size_words = SIZE_IN_LONG(data_size);
>
> if (write_fprobe_header(&fgraph_data[used], fp, size_words))
> @@ -384,6 +384,70 @@ static struct fgraph_ops fprobe_graph_ops = {
> };
> static int fprobe_graph_active;
>
> +/* ftrace_ops backend (entry-only) */
^ callback ?
Also, add similar comments on top of fprobe_fgraph_entry.
/* fgraph_ops callback, this processes fprobes which have exit_handler. */
> +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> + struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> + struct fprobe_hlist_node *node;
> + struct rhlist_head *head, *pos;
> + struct fprobe *fp;
> +
> + guard(rcu)();
> + head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> +
> + rhl_for_each_entry_rcu(node, pos, head, hlist) {
> + if (node->addr != ip)
> + break;
> + fp = READ_ONCE(node->fp);
> + if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> + continue;
> + /* entry-only path: no exit_handler nor per-call data */
> + if (fprobe_shared_with_kprobes(fp))
> + __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> + else
> + __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> + }
> +}
> +NOKPROBE_SYMBOL(fprobe_ftrace_entry);
OK.
> +
> +static struct ftrace_ops fprobe_ftrace_ops = {
> + .func = fprobe_ftrace_entry,
> + .flags = FTRACE_OPS_FL_SAVE_REGS,
[OT] I just wonder we can have FTRACE_OPS_FL_SAVE_FTRACE_REGS instead.
> +};
> +static int fprobe_ftrace_active;
> +
> +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> +{
> + int ret;
> +
> + lockdep_assert_held(&fprobe_mutex);
> +
> + ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> + if (ret)
> + return ret;
> +
> + if (!fprobe_ftrace_active) {
> + ret = register_ftrace_function(&fprobe_ftrace_ops);
> + if (ret) {
> + ftrace_free_filter(&fprobe_ftrace_ops);
> + return ret;
> + }
> + }
> + fprobe_ftrace_active++;
> + return 0;
> +}
> +
> +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> +{
> + lockdep_assert_held(&fprobe_mutex);
> +
> + fprobe_ftrace_active--;
> + if (!fprobe_ftrace_active)
> + unregister_ftrace_function(&fprobe_ftrace_ops);
> + if (num)
> + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> +}
> +
> /* Add @addrs to the ftrace filter and register fgraph if needed. */
> static int fprobe_graph_add_ips(unsigned long *addrs, int num)
> {
> @@ -500,9 +564,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
> } while (node == ERR_PTR(-EAGAIN));
> rhashtable_walk_exit(&iter);
>
> - if (alist.index < alist.size && alist.index > 0)
> + if (alist.index < alist.size && alist.index > 0) {
Oops, here is my bug. Let me fix it.
Thank you,
> ftrace_set_filter_ips(&fprobe_graph_ops.ops,
> alist.addrs, alist.index, 1, 0);
> + ftrace_set_filter_ips(&fprobe_ftrace_ops,
> + alist.addrs, alist.index, 1, 0);
> + }
> mutex_unlock(&fprobe_mutex);
>
> kfree(alist.addrs);
> @@ -735,7 +802,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
> mutex_lock(&fprobe_mutex);
>
> hlist_array = fp->hlist_array;
> - ret = fprobe_graph_add_ips(addrs, num);
> + if (fp->exit_handler)
> + ret = fprobe_graph_add_ips(addrs, num);
> + else
> + ret = fprobe_ftrace_add_ips(addrs, num);
> +
> if (!ret) {
> add_fprobe_hash(fp);
> for (i = 0; i < hlist_array->size; i++) {
> @@ -831,7 +902,10 @@ int unregister_fprobe(struct fprobe *fp)
> }
> del_fprobe_hash(fp);
>
> - fprobe_graph_remove_ips(addrs, count);
> + if (fp->exit_handler)
> + fprobe_graph_remove_ips(addrs, count);
> + else
> + fprobe_ftrace_remove_ips(addrs, count);
>
> kfree_rcu(hlist_array, rcu);
> fp->hlist_array = NULL;
> --
> 2.51.0
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
next prev parent reply other threads:[~2025-09-24 0:23 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-23 9:20 [PATCH 1/2] tracing: fprobe: rename fprobe_entry to fprobe_fgraph_entry Menglong Dong
2025-09-23 9:20 ` [PATCH 2/2] tracing: fprobe: optimization for entry only case Menglong Dong
2025-09-23 11:10 ` Jiri Olsa
2025-09-23 11:16 ` menglong.dong
2025-09-23 12:25 ` Jiri Olsa
2025-09-23 13:34 ` Menglong Dong
2025-09-23 16:25 ` Jiri Olsa
2025-09-24 0:23 ` Masami Hiramatsu [this message]
2025-09-24 1:33 ` menglong.dong
2025-09-23 9:38 ` [PATCH 1/2] tracing: fprobe: rename fprobe_entry to fprobe_fgraph_entry Steven Rostedt
2025-09-23 10:27 ` menglong.dong
2025-09-23 23:07 ` Masami Hiramatsu
2025-09-24 0:17 ` Menglong Dong
2025-09-24 8:13 ` Masami Hiramatsu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250924092314.4b790ff9fbdb7693717669c2@kernel.org \
--to=mhiramat@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=menglong8.dong@gmail.com \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).