All of lore.kernel.org
 help / color / mirror / Atom feed
From: menglong.dong@linux.dev
To: Masami Hiramatsu <mhiramat@kernel.org>
Cc: rostedt@goodmis.org, mathieu.desnoyers@efficios.com,
	linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
	bpf@vger.kernel.org
Subject: Re: [PATCH 2/2] tracing: fprobe: optimization for entry only case
Date: Wed, 24 Sep 2025 09:33:16 +0800	[thread overview]
Message-ID: <5014869.GXAFRqVoOG@7940hx> (raw)
In-Reply-To: <20250924092314.4b790ff9fbdb7693717669c2@kernel.org>

On 2025/9/24 08:23 Masami Hiramatsu <mhiramat@kernel.org> write:
> Hi Menglong,
> 
> Please add a cover letter if you make a series of patches.
> 
> On Tue, 23 Sep 2025 17:20:01 +0800
> Menglong Dong <menglong8.dong@gmail.com> wrote:
> 
> > For now, fgraph is used for the fprobe, even if we need trace the entry
> > only. However, the performance of ftrace is better than fgraph, and we
> > can use ftrace_ops for this case.
> > 
> > Then performance of kprobe-multi increases from 54M to 69M. Before this
> > commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   54.663 ± 0.493M/s
> > 
> > After this commit:
> > 
> >   $ ./benchs/run_bench_trigger.sh kprobe-multi
> >   kprobe-multi   :   69.447 ± 0.143M/s
> > 
> > Mitigation is disable during the bench testing above.
> 
> Hmm, indeed. If it is used only for entry, it can use ftrace.
> 
> Also, please merge [1/2] and [2/2]. [1/2] is meaningless (and do
> nothing) without this change. Moreover, it changes the same file.
> 
> You can split the patch if "that cleanup is meaningful independently"
> or "that changes different subsystem/component (thus you need an Ack
> from another maintainer)".

OK, I see now :)

> 
> But basically looks good to me. Just have some nits.
> 
> > 
> > Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> > ---
> >  kernel/trace/fprobe.c | 88 +++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 81 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> > index 1785fba367c9..de4ae075548d 100644
> > --- a/kernel/trace/fprobe.c
> > +++ b/kernel/trace/fprobe.c
> > @@ -292,7 +292,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  				if (node->addr != func)
> >  					continue;
> >  				fp = READ_ONCE(node->fp);
> > -				if (fp && !fprobe_disabled(fp))
> > +				if (fp && !fprobe_disabled(fp) && fp->exit_handler)
> >  					fp->nmissed++;
> >  			}
> >  			return 0;
> > @@ -312,11 +312,11 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  		if (node->addr != func)
> >  			continue;
> >  		fp = READ_ONCE(node->fp);
> > -		if (!fp || fprobe_disabled(fp))
> > +		if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler))
> >  			continue;
> >  
> >  		data_size = fp->entry_data_size;
> > -		if (data_size && fp->exit_handler)
> > +		if (data_size)
> >  			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
> >  		else
> >  			data = NULL;
> > @@ -327,7 +327,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops
> >  			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
> >  
> >  		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
> > -		if (!ret && fp->exit_handler) {
> > +		if (!ret) {
> >  			int size_words = SIZE_IN_LONG(data_size);
> >  
> >  			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
> > @@ -384,6 +384,70 @@ static struct fgraph_ops fprobe_graph_ops = {
> >  };
> >  static int fprobe_graph_active;
> >  
> 
> > +/* ftrace_ops backend (entry-only) */
>                  ^ callback ?

ACK

> 
> Also, add similar comments on top of fprobe_fgraph_entry. 
> 
> /* fgraph_ops callback, this processes fprobes which have exit_handler. */

ACK

> 
> > +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
> > +	struct ftrace_ops *ops, struct ftrace_regs *fregs)
> > +{
> > +	struct fprobe_hlist_node *node;
> > +	struct rhlist_head *head, *pos;
> > +	struct fprobe *fp;
> > +
> > +	guard(rcu)();
> > +	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
> > +
> > +	rhl_for_each_entry_rcu(node, pos, head, hlist) {
> > +		if (node->addr != ip)
> > +			break;
> > +		fp = READ_ONCE(node->fp);
> > +		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
> > +			continue;
> > +		/* entry-only path: no exit_handler nor per-call data */
> > +		if (fprobe_shared_with_kprobes(fp))
> > +			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +		else
> > +			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
> > +	}
> > +}
> > +NOKPROBE_SYMBOL(fprobe_ftrace_entry);
> 
> OK.
> 
> > +
> > +static struct ftrace_ops fprobe_ftrace_ops = {
> > +	.func	= fprobe_ftrace_entry,
> > +	.flags	= FTRACE_OPS_FL_SAVE_REGS,
> 
> [OT] I just wonder we can have FTRACE_OPS_FL_SAVE_FTRACE_REGS instead.

I'll give it a try.

Thanks!
Menglong Dong

> 
> > +};
> > +static int fprobe_ftrace_active;
> > +
> > +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
> > +{
> > +	int ret;
> > +
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (!fprobe_ftrace_active) {
> > +		ret = register_ftrace_function(&fprobe_ftrace_ops);
> > +		if (ret) {
> > +			ftrace_free_filter(&fprobe_ftrace_ops);
> > +			return ret;
> > +		}
> > +	}
> > +	fprobe_ftrace_active++;
> > +	return 0;
> > +}
> > +
> > +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
> > +{
> > +	lockdep_assert_held(&fprobe_mutex);
> > +
> > +	fprobe_ftrace_active--;
> > +	if (!fprobe_ftrace_active)
> > +		unregister_ftrace_function(&fprobe_ftrace_ops);
> > +	if (num)
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
> > +}
> > +
> >  /* Add @addrs to the ftrace filter and register fgraph if needed. */
> >  static int fprobe_graph_add_ips(unsigned long *addrs, int num)
> >  {
> > @@ -500,9 +564,12 @@ static int fprobe_module_callback(struct notifier_block *nb,
> >  	} while (node == ERR_PTR(-EAGAIN));
> >  	rhashtable_walk_exit(&iter);
> >  
> > -	if (alist.index < alist.size && alist.index > 0)
> > +	if (alist.index < alist.size && alist.index > 0) {
> 
> Oops, here is my bug. Let me fix it.
> 
> Thank you,
> 
> >  		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
> >  				      alist.addrs, alist.index, 1, 0);
> > +		ftrace_set_filter_ips(&fprobe_ftrace_ops,
> > +				      alist.addrs, alist.index, 1, 0);
> > +	}
> >  	mutex_unlock(&fprobe_mutex);
> >  
> >  	kfree(alist.addrs);
> > @@ -735,7 +802,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
> >  	mutex_lock(&fprobe_mutex);
> >  
> >  	hlist_array = fp->hlist_array;
> > -	ret = fprobe_graph_add_ips(addrs, num);
> > +	if (fp->exit_handler)
> > +		ret = fprobe_graph_add_ips(addrs, num);
> > +	else
> > +		ret = fprobe_ftrace_add_ips(addrs, num);
> > +
> >  	if (!ret) {
> >  		add_fprobe_hash(fp);
> >  		for (i = 0; i < hlist_array->size; i++) {
> > @@ -831,7 +902,10 @@ int unregister_fprobe(struct fprobe *fp)
> >  	}
> >  	del_fprobe_hash(fp);
> >  
> > -	fprobe_graph_remove_ips(addrs, count);
> > +	if (fp->exit_handler)
> > +		fprobe_graph_remove_ips(addrs, count);
> > +	else
> > +		fprobe_ftrace_remove_ips(addrs, count);
> >  
> >  	kfree_rcu(hlist_array, rcu);
> >  	fp->hlist_array = NULL;
> > -- 
> > 2.51.0
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
> 
> 





  reply	other threads:[~2025-09-24  1:33 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-23  9:20 [PATCH 1/2] tracing: fprobe: rename fprobe_entry to fprobe_fgraph_entry Menglong Dong
2025-09-23  9:20 ` [PATCH 2/2] tracing: fprobe: optimization for entry only case Menglong Dong
2025-09-23 11:10   ` Jiri Olsa
2025-09-23 11:16     ` menglong.dong
2025-09-23 12:25       ` Jiri Olsa
2025-09-23 13:34         ` Menglong Dong
2025-09-23 16:25           ` Jiri Olsa
2025-09-24  0:23   ` Masami Hiramatsu
2025-09-24  1:33     ` menglong.dong [this message]
2025-09-23  9:38 ` [PATCH 1/2] tracing: fprobe: rename fprobe_entry to fprobe_fgraph_entry Steven Rostedt
2025-09-23 10:27   ` menglong.dong
2025-09-23 23:07 ` Masami Hiramatsu
2025-09-24  0:17   ` Menglong Dong
2025-09-24  8:13     ` Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5014869.GXAFRqVoOG@7940hx \
    --to=menglong.dong@linux.dev \
    --cc=bpf@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.