* [PATCH 1/3] tracing: Expose functions to trace a synth event
@ 2025-03-18 18:08 Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 2/3] tracing: Rename find_synth_event() into synth_event_find() Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2() Douglas RAILLARD
0 siblings, 2 replies; 12+ messages in thread
From: Douglas RAILLARD @ 2025-03-18 18:08 UTC (permalink / raw)
To: rostedt
Cc: douglas.raillard, Masami Hiramatsu, Mathieu Desnoyers,
linux-kernel, linux-trace-kernel
From: Douglas Raillard <douglas.raillard@arm.com>
The current API for synth event only allow tracing by getting a "struct
trace_event_file *", which is associated with a specific ftrace instance
that has to be looked up ahead of time. In order to be able to emit such
synth event in all instances where the event has been enabled by a user,
another function is required, using a "struct synth_event *" that then
uses the underlying tracepoint system that the tracefs interface
manipulates.
Such function already exists for the histogram feature, so simply move
it to the common trace_events_synth.c code.
Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
---
include/linux/trace_events.h | 7 +++++++
kernel/trace/trace_events_hist.c | 27 ---------------------------
kernel/trace/trace_events_synth.c | 29 +++++++++++++++++++++++++++++
kernel/trace/trace_synth.h | 2 --
4 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 5caea596fef0..cbe389d0e144 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -517,6 +517,13 @@ struct dynevent_cmd {
extern int dynevent_create(struct dynevent_cmd *cmd);
+struct synth_event;
+
+extern struct synth_event *find_synth_event(const char *name);
+
+extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+ unsigned int *var_ref_idx);
+
extern int synth_event_delete(const char *name);
extern void synth_event_cmd_init(struct dynevent_cmd *cmd,
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 53dc6719181e..a2bc7a972763 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -814,33 +814,6 @@ static void hist_err_clear(void)
last_cmd_loc[0] = '\0';
}
-typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
- unsigned int *var_ref_idx);
-
-static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
- unsigned int *var_ref_idx)
-{
- struct tracepoint *tp = event->tp;
-
- if (unlikely(static_key_enabled(&tp->key))) {
- struct tracepoint_func *probe_func_ptr;
- synth_probe_func_t probe_func;
- void *__data;
-
- if (!(cpu_online(raw_smp_processor_id())))
- return;
-
- probe_func_ptr = rcu_dereference_sched((tp)->funcs);
- if (probe_func_ptr) {
- do {
- probe_func = probe_func_ptr->func;
- __data = probe_func_ptr->data;
- probe_func(__data, var_ref_vals, var_ref_idx);
- } while ((++probe_func_ptr)->func);
- }
- }
-}
-
static void action_trace(struct hist_trigger_data *hist_data,
struct tracing_map_elt *elt,
struct trace_buffer *buffer, void *rec,
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index e3f7d09e5512..9f0817eec3c2 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -845,6 +845,35 @@ struct synth_event *find_synth_event(const char *name)
return NULL;
}
+EXPORT_SYMBOL_GPL(find_synth_event);
+
+typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
+ unsigned int *var_ref_idx);
+
+void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ struct tracepoint *tp = event->tp;
+
+ if (unlikely(static_key_enabled(&tp->key))) {
+ struct tracepoint_func *probe_func_ptr;
+ synth_probe_func_t probe_func;
+ void *__data;
+
+ if (!(cpu_online(raw_smp_processor_id())))
+ return;
+
+ probe_func_ptr = rcu_dereference_sched((tp)->funcs);
+ if (probe_func_ptr) {
+ do {
+ probe_func = probe_func_ptr->func;
+ __data = probe_func_ptr->data;
+ probe_func(__data, var_ref_vals, var_ref_idx);
+ } while ((++probe_func_ptr)->func);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(trace_synth);
static struct trace_event_fields synth_event_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index 43f6fb6078db..425a0ec7c773 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -36,6 +36,4 @@ struct synth_event {
struct module *mod;
};
-extern struct synth_event *find_synth_event(const char *name);
-
#endif /* __TRACE_SYNTH_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/3] tracing: Rename find_synth_event() into synth_event_find()
2025-03-18 18:08 [PATCH 1/3] tracing: Expose functions to trace a synth event Douglas RAILLARD
@ 2025-03-18 18:08 ` Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2() Douglas RAILLARD
1 sibling, 0 replies; 12+ messages in thread
From: Douglas RAILLARD @ 2025-03-18 18:08 UTC (permalink / raw)
To: rostedt
Cc: douglas.raillard, Masami Hiramatsu, Mathieu Desnoyers,
linux-kernel, linux-trace-kernel
From: Douglas Raillard <douglas.raillard@arm.com>
Rename the freshly exposed function to comply with the existing naming
convention for synth event public API.
Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
---
include/linux/trace_events.h | 2 +-
kernel/trace/trace_events_hist.c | 8 ++++----
kernel/trace/trace_events_synth.c | 10 +++++-----
3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index cbe389d0e144..e069d84a73f0 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -519,7 +519,7 @@ extern int dynevent_create(struct dynevent_cmd *cmd);
struct synth_event;
-extern struct synth_event *find_synth_event(const char *name);
+extern struct synth_event *synth_event_find(const char *name);
extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
unsigned int *var_ref_idx);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index a2bc7a972763..7067f6fedb1a 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3964,7 +3964,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
else
synth_event_name = data->action_name;
- event = find_synth_event(synth_event_name);
+ event = synth_event_find(synth_event_name);
if (!event) {
hist_err(tr, HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name));
return -EINVAL;
@@ -6580,7 +6580,7 @@ static void hist_unreg_all(struct trace_event_file *file)
trace_event_trigger_enable_disable(file, 0);
se_name = trace_event_name(file->event_call);
- se = find_synth_event(se_name);
+ se = synth_event_find(se_name);
if (se)
se->ref--;
@@ -6699,7 +6699,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
se_name = trace_event_name(file->event_call);
- se = find_synth_event(se_name);
+ se = synth_event_find(se_name);
if (se)
se->ref--;
ret = 0;
@@ -6735,7 +6735,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_unreg;
se_name = trace_event_name(file->event_call);
- se = find_synth_event(se_name);
+ se = synth_event_find(se_name);
if (se)
se->ref++;
out:
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 9f0817eec3c2..4a9a44d37ffc 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -830,7 +830,7 @@ static struct tracepoint *alloc_synth_tracepoint(char *name)
return tp;
}
-struct synth_event *find_synth_event(const char *name)
+struct synth_event *synth_event_find(const char *name)
{
struct dyn_event *pos;
struct synth_event *event;
@@ -845,7 +845,7 @@ struct synth_event *find_synth_event(const char *name)
return NULL;
}
-EXPORT_SYMBOL_GPL(find_synth_event);
+EXPORT_SYMBOL_GPL(synth_event_find);
typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
unsigned int *var_ref_idx);
@@ -1317,7 +1317,7 @@ static int __create_synth_event(const char *name, const char *raw_fields)
mutex_lock(&event_mutex);
- event = find_synth_event(name);
+ event = synth_event_find(name);
if (event) {
synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name));
ret = -EEXIST;
@@ -1513,7 +1513,7 @@ int synth_event_delete(const char *event_name)
int ret = -ENOENT;
mutex_lock(&event_mutex);
- se = find_synth_event(event_name);
+ se = synth_event_find(event_name);
if (se) {
mod = se->mod;
ret = destroy_synth_event(se);
@@ -1622,7 +1622,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd)
if (ret)
return ret;
- se = find_synth_event(cmd->event_name);
+ se = synth_event_find(cmd->event_name);
if (WARN_ON(!se))
return -ENOENT;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-18 18:08 [PATCH 1/3] tracing: Expose functions to trace a synth event Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 2/3] tracing: Rename find_synth_event() into synth_event_find() Douglas RAILLARD
@ 2025-03-18 18:08 ` Douglas RAILLARD
2025-03-19 13:37 ` Masami Hiramatsu
1 sibling, 1 reply; 12+ messages in thread
From: Douglas RAILLARD @ 2025-03-18 18:08 UTC (permalink / raw)
To: rostedt
Cc: douglas.raillard, Masami Hiramatsu, Mathieu Desnoyers,
linux-kernel, linux-trace-kernel
From: Douglas Raillard <douglas.raillard@arm.com>
Rename the frehsly exposed trace_synth() to synth_event_trace2() to
comply with the existing naming convention. Since synth_event_trace()
already exists (and operates on a "struct trace_event_file *"), use a
new name for it.
Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
---
include/linux/trace_events.h | 2 +-
kernel/trace/trace_events_hist.c | 2 +-
kernel/trace/trace_events_synth.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index e069d84a73f0..753ce8aecfe4 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -521,7 +521,7 @@ struct synth_event;
extern struct synth_event *synth_event_find(const char *name);
-extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+extern void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
unsigned int *var_ref_idx);
extern int synth_event_delete(const char *name);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 7067f6fedb1a..ee0fee123c91 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -822,7 +822,7 @@ static void action_trace(struct hist_trigger_data *hist_data,
{
struct synth_event *event = data->synth_event;
- trace_synth(event, var_ref_vals, data->var_ref_idx);
+ synth_event_trace2(event, var_ref_vals, data->var_ref_idx);
}
struct hist_var_data {
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 4a9a44d37ffc..8837aa258479 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -850,7 +850,7 @@ EXPORT_SYMBOL_GPL(synth_event_find);
typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
unsigned int *var_ref_idx);
-void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
unsigned int *var_ref_idx)
{
struct tracepoint *tp = event->tp;
@@ -873,7 +873,7 @@ void trace_synth(struct synth_event *event, u64 *var_ref_vals,
}
}
}
-EXPORT_SYMBOL_GPL(trace_synth);
+EXPORT_SYMBOL_GPL(synth_event_trace2);
static struct trace_event_fields synth_event_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-18 18:08 ` [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2() Douglas RAILLARD
@ 2025-03-19 13:37 ` Masami Hiramatsu
2025-03-19 14:51 ` Douglas Raillard
0 siblings, 1 reply; 12+ messages in thread
From: Masami Hiramatsu @ 2025-03-19 13:37 UTC (permalink / raw)
To: Douglas RAILLARD
Cc: rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-kernel,
linux-trace-kernel
On Tue, 18 Mar 2025 18:08:12 +0000
Douglas RAILLARD <douglas.raillard@arm.com> wrote:
> From: Douglas Raillard <douglas.raillard@arm.com>
>
> Rename the frehsly exposed trace_synth() to synth_event_trace2() to
> comply with the existing naming convention. Since synth_event_trace()
> already exists (and operates on a "struct trace_event_file *"), use a
> new name for it.
>
I don't like this '2' and similar version digit naming for the functions.
Can you choose another better name?
BTW, can you also write a cover mail so that what is the goal of this
series, background and results?
Thank you,
> Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
> ---
> include/linux/trace_events.h | 2 +-
> kernel/trace/trace_events_hist.c | 2 +-
> kernel/trace/trace_events_synth.c | 4 ++--
> 3 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index e069d84a73f0..753ce8aecfe4 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -521,7 +521,7 @@ struct synth_event;
>
> extern struct synth_event *synth_event_find(const char *name);
>
> -extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> +extern void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
> unsigned int *var_ref_idx);
>
> extern int synth_event_delete(const char *name);
> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> index 7067f6fedb1a..ee0fee123c91 100644
> --- a/kernel/trace/trace_events_hist.c
> +++ b/kernel/trace/trace_events_hist.c
> @@ -822,7 +822,7 @@ static void action_trace(struct hist_trigger_data *hist_data,
> {
> struct synth_event *event = data->synth_event;
>
> - trace_synth(event, var_ref_vals, data->var_ref_idx);
> + synth_event_trace2(event, var_ref_vals, data->var_ref_idx);
> }
>
> struct hist_var_data {
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4a9a44d37ffc..8837aa258479 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -850,7 +850,7 @@ EXPORT_SYMBOL_GPL(synth_event_find);
> typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
> unsigned int *var_ref_idx);
>
> -void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> +void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
> unsigned int *var_ref_idx)
> {
> struct tracepoint *tp = event->tp;
> @@ -873,7 +873,7 @@ void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> }
> }
> }
> -EXPORT_SYMBOL_GPL(trace_synth);
> +EXPORT_SYMBOL_GPL(synth_event_trace2);
>
> static struct trace_event_fields synth_event_fields_array[] = {
> { .type = TRACE_FUNCTION_TYPE,
> --
> 2.43.0
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-19 13:37 ` Masami Hiramatsu
@ 2025-03-19 14:51 ` Douglas Raillard
2025-03-24 6:29 ` Masami Hiramatsu
0 siblings, 1 reply; 12+ messages in thread
From: Douglas Raillard @ 2025-03-19 14:51 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: rostedt, Mathieu Desnoyers, linux-kernel, linux-trace-kernel
On 19-03-2025 13:37, Masami Hiramatsu (Google) wrote:
> On Tue, 18 Mar 2025 18:08:12 +0000
> Douglas RAILLARD <douglas.raillard@arm.com> wrote:
>
>> From: Douglas Raillard <douglas.raillard@arm.com>
>>
>> Rename the frehsly exposed trace_synth() to synth_event_trace2() to
>> comply with the existing naming convention. Since synth_event_trace()
>> already exists (and operates on a "struct trace_event_file *"), use a
>> new name for it.
>>
>
> I don't like this '2' and similar version digit naming for the functions.
> Can you choose another better name?
I was hoping for some suggestions as I don't like it either :)
The natural prefix for functions operating on "struct synth_event *" would by "synth_event_*",
but most of the existing API already uses the "synth_event_*" prefix, and is using
"struct trace_event_file*".
> BTW, can you also write a cover mail so that what is the goal of this
> series, background and results?
Ok, I'll respin the series with a proper cover letter. The gist is I was following the doc [1] on how
to create a synthetic event, trying to apply that to a kernel module we have that needs to create new events.
Unfortunately, it turned out that all the exposed APIs to emit the events (such as synth_event_trace()) require
getting a "struct trace_event_file*" before the call. This breaks when a user starts creating instances in tracefs,
as each instance will have its own "struct trace_event_file*". The way this is done for normal trace events is
that each instance registers a probe on the tracepoint with the struct trace_event_file* as the void *data pointer.
Then when the tracepoint is called, all the probes are called and the event data is copied in all instances in which
it was enabled.
Although the synthetic event API does create that tracepoint and has an appropriate probe function, none of the exposed API
I could find make use of it. The exception is trace_events_hist.c had its own private version of it that actually calls all
the probes of the tracepoint "manually", so that the event is correctly emitted in all the instances it was enabled in. This
is the function touched by this patch.
So that means that as it stands:
1. The exposed API is only really usable with the "NULL" struct trace_event_file*, which maps to the top-level one.
2. If a user creates an instance and enables the event in it using tracefs, the code that emits the event using the existing API
will completely ignore that and keep emitting the event in the top-level instance that it was wired to do.
Approximately nothing in the synth event API that takes a "struct trace_event_file*" will work properly with user-created instances.
[1] https://docs.kernel.org/trace/events.html#dyamically-creating-synthetic-event-definitions
>
> Thank you,
>
>> Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
>> ---
>> include/linux/trace_events.h | 2 +-
>> kernel/trace/trace_events_hist.c | 2 +-
>> kernel/trace/trace_events_synth.c | 4 ++--
>> 3 files changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
>> index e069d84a73f0..753ce8aecfe4 100644
>> --- a/include/linux/trace_events.h
>> +++ b/include/linux/trace_events.h
>> @@ -521,7 +521,7 @@ struct synth_event;
>>
>> extern struct synth_event *synth_event_find(const char *name);
>>
>> -extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
>> +extern void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
>> unsigned int *var_ref_idx);
>>
>> extern int synth_event_delete(const char *name);
>> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
>> index 7067f6fedb1a..ee0fee123c91 100644
>> --- a/kernel/trace/trace_events_hist.c
>> +++ b/kernel/trace/trace_events_hist.c
>> @@ -822,7 +822,7 @@ static void action_trace(struct hist_trigger_data *hist_data,
>> {
>> struct synth_event *event = data->synth_event;
>>
>> - trace_synth(event, var_ref_vals, data->var_ref_idx);
>> + synth_event_trace2(event, var_ref_vals, data->var_ref_idx);
>> }
>>
>> struct hist_var_data {
>> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
>> index 4a9a44d37ffc..8837aa258479 100644
>> --- a/kernel/trace/trace_events_synth.c
>> +++ b/kernel/trace/trace_events_synth.c
>> @@ -850,7 +850,7 @@ EXPORT_SYMBOL_GPL(synth_event_find);
>> typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
>> unsigned int *var_ref_idx);
>>
>> -void trace_synth(struct synth_event *event, u64 *var_ref_vals,
>> +void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
>> unsigned int *var_ref_idx)
>> {
>> struct tracepoint *tp = event->tp;
>> @@ -873,7 +873,7 @@ void trace_synth(struct synth_event *event, u64 *var_ref_vals,
>> }
>> }
>> }
>> -EXPORT_SYMBOL_GPL(trace_synth);
>> +EXPORT_SYMBOL_GPL(synth_event_trace2);
>>
>> static struct trace_event_fields synth_event_fields_array[] = {
>> { .type = TRACE_FUNCTION_TYPE,
>> --
>> 2.43.0
>>
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-19 14:51 ` Douglas Raillard
@ 2025-03-24 6:29 ` Masami Hiramatsu
2025-03-24 14:30 ` Steven Rostedt
2025-03-25 16:05 ` Douglas Raillard
0 siblings, 2 replies; 12+ messages in thread
From: Masami Hiramatsu @ 2025-03-24 6:29 UTC (permalink / raw)
To: Douglas Raillard
Cc: rostedt, Mathieu Desnoyers, linux-kernel, linux-trace-kernel
On Wed, 19 Mar 2025 14:51:42 +0000
Douglas Raillard <douglas.raillard@arm.com> wrote:
> On 19-03-2025 13:37, Masami Hiramatsu (Google) wrote:
> > On Tue, 18 Mar 2025 18:08:12 +0000
> > Douglas RAILLARD <douglas.raillard@arm.com> wrote:
> >
> >> From: Douglas Raillard <douglas.raillard@arm.com>
> >>
> >> Rename the frehsly exposed trace_synth() to synth_event_trace2() to
> >> comply with the existing naming convention. Since synth_event_trace()
> >> already exists (and operates on a "struct trace_event_file *"), use a
> >> new name for it.
> >>
> >
> > I don't like this '2' and similar version digit naming for the functions.
> > Can you choose another better name?
>
> I was hoping for some suggestions as I don't like it either :)
>
> The natural prefix for functions operating on "struct synth_event *" would by "synth_event_*",
> but most of the existing API already uses the "synth_event_*" prefix, and is using
> "struct trace_event_file*".
>
> > BTW, can you also write a cover mail so that what is the goal of this
> > series, background and results?
>
> Ok, I'll respin the series with a proper cover letter. The gist is I was following the doc [1] on how
> to create a synthetic event, trying to apply that to a kernel module we have that needs to create new events.
>
> Unfortunately, it turned out that all the exposed APIs to emit the events (such as synth_event_trace()) require
> getting a "struct trace_event_file*" before the call. This breaks when a user starts creating instances in tracefs,
> as each instance will have its own "struct trace_event_file*".
Yeah, because those are mainly for the tests, and we are expecting that if
any modules wants to emit its events, it will define new trace-events and
use it instead of synthetic events. The synthetic events are for
programming via tracefs, not reporting from the kernel modules.
It is confusing if any synthetic events are reported without any origin of
real trace event. (so, it is an intermadiate event type.) IOW, We expect
that synthetic event is reported by other events via event trigger.
The current APIs are just for testing.
Hmm, I should hide those by CONFIG_SYNTH_EVENT_TESTS.
> The way this is done for normal trace events is
> that each instance registers a probe on the tracepoint with the struct trace_event_file* as the void *data pointer.
> Then when the tracepoint is called, all the probes are called and the event data is copied in all instances in which
> it was enabled.
>
> Although the synthetic event API does create that tracepoint and has an appropriate probe function, none of the exposed API
> I could find make use of it. The exception is trace_events_hist.c had its own private version of it that actually calls all
> the probes of the tracepoint "manually", so that the event is correctly emitted in all the instances it was enabled in. This
> is the function touched by this patch.
No, please don't touch the synthetic events for reporting any events via
kernel modules. Those should use normal trace events for avoiding the
confusion.
Would you have any reason not to use normal trace events?
Thank you,
>
> So that means that as it stands:
> 1. The exposed API is only really usable with the "NULL" struct trace_event_file*, which maps to the top-level one.
> 2. If a user creates an instance and enables the event in it using tracefs, the code that emits the event using the existing API
> will completely ignore that and keep emitting the event in the top-level instance that it was wired to do.
>
> Approximately nothing in the synth event API that takes a "struct trace_event_file*" will work properly with user-created instances.
>
> [1] https://docs.kernel.org/trace/events.html#dyamically-creating-synthetic-event-definitions
>
> >
> > Thank you,
> >
> >> Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
> >> ---
> >> include/linux/trace_events.h | 2 +-
> >> kernel/trace/trace_events_hist.c | 2 +-
> >> kernel/trace/trace_events_synth.c | 4 ++--
> >> 3 files changed, 4 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> >> index e069d84a73f0..753ce8aecfe4 100644
> >> --- a/include/linux/trace_events.h
> >> +++ b/include/linux/trace_events.h
> >> @@ -521,7 +521,7 @@ struct synth_event;
> >>
> >> extern struct synth_event *synth_event_find(const char *name);
> >>
> >> -extern void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> >> +extern void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
> >> unsigned int *var_ref_idx);
> >>
> >> extern int synth_event_delete(const char *name);
> >> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> >> index 7067f6fedb1a..ee0fee123c91 100644
> >> --- a/kernel/trace/trace_events_hist.c
> >> +++ b/kernel/trace/trace_events_hist.c
> >> @@ -822,7 +822,7 @@ static void action_trace(struct hist_trigger_data *hist_data,
> >> {
> >> struct synth_event *event = data->synth_event;
> >>
> >> - trace_synth(event, var_ref_vals, data->var_ref_idx);
> >> + synth_event_trace2(event, var_ref_vals, data->var_ref_idx);
> >> }
> >>
> >> struct hist_var_data {
> >> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> >> index 4a9a44d37ffc..8837aa258479 100644
> >> --- a/kernel/trace/trace_events_synth.c
> >> +++ b/kernel/trace/trace_events_synth.c
> >> @@ -850,7 +850,7 @@ EXPORT_SYMBOL_GPL(synth_event_find);
> >> typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
> >> unsigned int *var_ref_idx);
> >>
> >> -void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> >> +void synth_event_trace2(struct synth_event *event, u64 *var_ref_vals,
> >> unsigned int *var_ref_idx)
> >> {
> >> struct tracepoint *tp = event->tp;
> >> @@ -873,7 +873,7 @@ void trace_synth(struct synth_event *event, u64 *var_ref_vals,
> >> }
> >> }
> >> }
> >> -EXPORT_SYMBOL_GPL(trace_synth);
> >> +EXPORT_SYMBOL_GPL(synth_event_trace2);
> >>
> >> static struct trace_event_fields synth_event_fields_array[] = {
> >> { .type = TRACE_FUNCTION_TYPE,
> >> --
> >> 2.43.0
> >>
> >
> >
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-24 6:29 ` Masami Hiramatsu
@ 2025-03-24 14:30 ` Steven Rostedt
2025-03-25 16:05 ` Douglas Raillard
1 sibling, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2025-03-24 14:30 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: Douglas Raillard, Mathieu Desnoyers, linux-kernel,
linux-trace-kernel
On Mon, 24 Mar 2025 15:29:45 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> Yeah, because those are mainly for the tests, and we are expecting that if
> any modules wants to emit its events, it will define new trace-events and
> use it instead of synthetic events. The synthetic events are for
> programming via tracefs, not reporting from the kernel modules.
> It is confusing if any synthetic events are reported without any origin of
> real trace event. (so, it is an intermadiate event type.) IOW, We expect
> that synthetic event is reported by other events via event trigger.
> The current APIs are just for testing.
>
> Hmm, I should hide those by CONFIG_SYNTH_EVENT_TESTS.
Perhaps we should remove synth_event_trace() from the include/linux header
file, and move it to kernel/trace/trace.h? That way it is only used for
internal purposes, and not exposed to modules. You could wrap the export
with a #ifdef CONFIG_SYNTH_EVENT_TESTS.
-- Steve
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-24 6:29 ` Masami Hiramatsu
2025-03-24 14:30 ` Steven Rostedt
@ 2025-03-25 16:05 ` Douglas Raillard
2025-03-25 16:25 ` Steven Rostedt
1 sibling, 1 reply; 12+ messages in thread
From: Douglas Raillard @ 2025-03-25 16:05 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: rostedt, Mathieu Desnoyers, linux-kernel, linux-trace-kernel
On 24-03-2025 06:29, Masami Hiramatsu (Google) wrote:
> On Wed, 19 Mar 2025 14:51:42 +0000
> Douglas Raillard <douglas.raillard@arm.com> wrote:
>
>> On 19-03-2025 13:37, Masami Hiramatsu (Google) wrote:
>>> On Tue, 18 Mar 2025 18:08:12 +0000
>>> Douglas RAILLARD <douglas.raillard@arm.com> wrote:
>>>
>>>> From: Douglas Raillard <douglas.raillard@arm.com>
>>>>
>>>> Rename the frehsly exposed trace_synth() to synth_event_trace2() to
>>>> comply with the existing naming convention. Since synth_event_trace()
>>>> already exists (and operates on a "struct trace_event_file *"), use a
>>>> new name for it.
>>>>
>>>
>>> I don't like this '2' and similar version digit naming for the functions.
>>> Can you choose another better name?
>>
>> I was hoping for some suggestions as I don't like it either :)
>>
>> The natural prefix for functions operating on "struct synth_event *" would by "synth_event_*",
>> but most of the existing API already uses the "synth_event_*" prefix, and is using
>> "struct trace_event_file*".
>>
>>> BTW, can you also write a cover mail so that what is the goal of this
>>> series, background and results?
>>
>> Ok, I'll respin the series with a proper cover letter. The gist is I was following the doc [1] on how
>> to create a synthetic event, trying to apply that to a kernel module we have that needs to create new events.
>>
>> Unfortunately, it turned out that all the exposed APIs to emit the events (such as synth_event_trace()) require
>> getting a "struct trace_event_file*" before the call. This breaks when a user starts creating instances in tracefs,
>> as each instance will have its own "struct trace_event_file*".
>
> Yeah, because those are mainly for the tests, and we are expecting that if
> any modules wants to emit its events, it will define new trace-events and
> use it instead of synthetic events. The synthetic events are for
> programming via tracefs, not reporting from the kernel modules.
> It is confusing if any synthetic events are reported without any origin of
> real trace event. (so, it is an intermadiate event type.) IOW, We expect
> that synthetic event is reported by other events via event trigger.
> The current APIs are just for testing.
>
> Hmm, I should hide those by CONFIG_SYNTH_EVENT_TESTS.
>
>> The way this is done for normal trace events is
>> that each instance registers a probe on the tracepoint with the struct trace_event_file* as the void *data pointer.
>> Then when the tracepoint is called, all the probes are called and the event data is copied in all instances in which
>> it was enabled.
>>
>> Although the synthetic event API does create that tracepoint and has an appropriate probe function, none of the exposed API
>> I could find make use of it. The exception is trace_events_hist.c had its own private version of it that actually calls all
>> the probes of the tracepoint "manually", so that the event is correctly emitted in all the instances it was enabled in. This
>> is the function touched by this patch.
>
> No, please don't touch the synthetic events for reporting any events via
> kernel modules. Those should use normal trace events for avoiding the
> confusion.
>
> Would you have any reason not to use normal trace events?
Yes, the dynamic API was exactly what I needed unfortunately. I'm porting the kernel module we have to Rust (using our own bindings as
we cannot mandate CONFIG_RUST=y). While I have some support for C code generation based on the Rust source, it is significantly more
convenient to simply use a dynamic API. In the current state of my code, defining an event is as simple as:
let f = new_event! {
lisa__myevent2,
fields: {
field1: u8,
field3: &CStr,
field2: u64,
}
}?;
// Emit the event
f(1, c"hello", 2);
f(3, c"world", 4);
So it's as non-confusing can be: the event name is stated plainly and the field names and types are mentioned once, with no repetition.
The result can simply be called to emit the event, and when dropped, the event is unregistered.
On top of that in ways unrelated to Rust:
1. We have some really high traffic event (PELT-related) that include some data that is not always needed (e.g. taskgroup name).
We currently regularly hit memory size limitation on our test device (pixel 6), and using trace-cmd record instead of
trace-cmd start is not always a good idea as this will have an effect on scheduling, disturbing the very thing we are trying
to observe. Having the dynamic API means we could simply omit the fields in the event that we don't care about in a specific
experiment via dynamic configuration.
2. Some events may or may not be supported on the system based on some runtime condition. Currently, there is no great way for
the module to feed back that info. No matter what, the event exists, even if the machinery that is supposed to emit it is
disabled for whatever reason. If some user starts tracing the event "everything will work" and there will be no event in the
trace. That may make the user think a condition did not trigger, whereas in fact the whole machinery was simply not operating.
Being able to register or not the event dynamically solves that cleanly, as enabling the event will simply fail as it won't
have been registered in the first place.
3. We will likely at some point relay events coming from some non-CPU part of the system into the ftrace buffer. If and when that
happens, it would be ideal if that producer can simply declare the events it supports, and our module dynamically create the
matching synthetic events. That would avoid any problem coming from mismatching source that would otherwise plague such setup.
So considering things seem to work overall with not much tuning needed, it would be sad to have to revert to TRACE_EVENT() macro
and end up having to do more work for a worse result. If that's the route you choose though, it may be nice to amend the
documentation to clearly state this API is testing-only and not supported in normal use case, as it currently reads:
The trace event subsystem provides an in-kernel API allowing modules
or other kernel code to generate user-defined 'synthetic' events at
will, which can be used to either augment the existing trace stream
and/or signal that a particular important state has occurred.
>
> Thank you,
>
Thank you,
Douglas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-25 16:05 ` Douglas Raillard
@ 2025-03-25 16:25 ` Steven Rostedt
2025-03-25 18:16 ` Douglas Raillard
0 siblings, 1 reply; 12+ messages in thread
From: Steven Rostedt @ 2025-03-25 16:25 UTC (permalink / raw)
To: Douglas Raillard
Cc: Masami Hiramatsu (Google), Mathieu Desnoyers, linux-kernel,
linux-trace-kernel, Alice Ryhl
On Tue, 25 Mar 2025 16:05:00 +0000
Douglas Raillard <douglas.raillard@arm.com> wrote:
> Yes, the dynamic API was exactly what I needed unfortunately. I'm porting the kernel module we have to Rust (using our own bindings as
> we cannot mandate CONFIG_RUST=y). While I have some support for C code generation based on the Rust source, it is significantly more
> convenient to simply use a dynamic API. In the current state of my code, defining an event is as simple as:
>
> let f = new_event! {
> lisa__myevent2,
> fields: {
> field1: u8,
> field3: &CStr,
> field2: u64,
> }
> }?;
>
> // Emit the event
> f(1, c"hello", 2);
> f(3, c"world", 4);
>
> So it's as non-confusing can be: the event name is stated plainly and the field names and types are mentioned once, with no repetition.
> The result can simply be called to emit the event, and when dropped, the event is unregistered.
Interesting.
>
>
> On top of that in ways unrelated to Rust:
> 1. We have some really high traffic event (PELT-related) that include some data that is not always needed (e.g. taskgroup name).
> We currently regularly hit memory size limitation on our test device (pixel 6), and using trace-cmd record instead of
> trace-cmd start is not always a good idea as this will have an effect on scheduling, disturbing the very thing we are trying
> to observe. Having the dynamic API means we could simply omit the fields in the event that we don't care about in a specific
> experiment via dynamic configuration.
>
> 2. Some events may or may not be supported on the system based on some runtime condition. Currently, there is no great way for
> the module to feed back that info. No matter what, the event exists, even if the machinery that is supposed to emit it is
> disabled for whatever reason. If some user starts tracing the event "everything will work" and there will be no event in the
> trace. That may make the user think a condition did not trigger, whereas in fact the whole machinery was simply not operating.
> Being able to register or not the event dynamically solves that cleanly, as enabling the event will simply fail as it won't
> have been registered in the first place.
>
> 3. We will likely at some point relay events coming from some non-CPU part of the system into the ftrace buffer. If and when that
> happens, it would be ideal if that producer can simply declare the events it supports, and our module dynamically create the
> matching synthetic events. That would avoid any problem coming from mismatching source that would otherwise plague such setup.
>
> So considering things seem to work overall with not much tuning needed, it would be sad to have to revert to TRACE_EVENT() macro
> and end up having to do more work for a worse result. If that's the route you choose though, it may be nice to amend the
> documentation to clearly state this API is testing-only and not supported in normal use case, as it currently reads:
>
> The trace event subsystem provides an in-kernel API allowing modules
> or other kernel code to generate user-defined 'synthetic' events at
> will, which can be used to either augment the existing trace stream
> and/or signal that a particular important state has occurred.
Note, there is also a CUSTOM_TRACE_EVENT() macro that can attach to any
tracepoint (including those that have a TRACE_EVENT() already attached to
them) and create a new trace event that shows up into tracefs.
I still do not think "synthetic events" should be used for this purpose.
They are complex enough with the user interface we shouldn't have them
created in the kernel. That documentation should be rewritten to not make
it sound like the kernel or modules can create them.
That said, it doesn't mean you can't use dynamic events for this purpose.
Now what exactly is that "new_event!" doing?
I guess, what's different to that than a kprobe event? A kprobe event is a
dynamic trace event in the kernel. Could you do the same with that? Or is
this adding a nop in the code like a real event would?
I'm not against rust dynamic events, but I do not think synthetic events
are the answer. It will just cause more confusion.
I also added Alice to the Cc, as she has created trace events for Rust.
-- Steve
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-25 16:25 ` Steven Rostedt
@ 2025-03-25 18:16 ` Douglas Raillard
2025-03-25 20:13 ` Douglas Raillard
0 siblings, 1 reply; 12+ messages in thread
From: Douglas Raillard @ 2025-03-25 18:16 UTC (permalink / raw)
To: Steven Rostedt
Cc: Masami Hiramatsu (Google), Mathieu Desnoyers, linux-kernel,
linux-trace-kernel, Alice Ryhl
On 25-03-2025 16:25, Steven Rostedt wrote:
> On Tue, 25 Mar 2025 16:05:00 +0000
> Douglas Raillard <douglas.raillard@arm.com> wrote:
>
>> Yes, the dynamic API was exactly what I needed unfortunately. I'm porting the kernel module we have to Rust (using our own bindings as
>> we cannot mandate CONFIG_RUST=y). While I have some support for C code generation based on the Rust source, it is significantly more
>> convenient to simply use a dynamic API. In the current state of my code, defining an event is as simple as:
>>
>> let f = new_event! {
>> lisa__myevent2,
>> fields: {
>> field1: u8,
>> field3: &CStr,
>> field2: u64,
>> }
>> }?;
>>
>> // Emit the event
>> f(1, c"hello", 2);
>> f(3, c"world", 4);
>>
>> So it's as non-confusing can be: the event name is stated plainly and the field names and types are mentioned once, with no repetition.
>> The result can simply be called to emit the event, and when dropped, the event is unregistered.
>
> Interesting.
>
>>
>>
>> On top of that in ways unrelated to Rust:
>> 1. We have some really high traffic event (PELT-related) that include some data that is not always needed (e.g. taskgroup name).
>> We currently regularly hit memory size limitation on our test device (pixel 6), and using trace-cmd record instead of
>> trace-cmd start is not always a good idea as this will have an effect on scheduling, disturbing the very thing we are trying
>> to observe. Having the dynamic API means we could simply omit the fields in the event that we don't care about in a specific
>> experiment via dynamic configuration.
>>
>> 2. Some events may or may not be supported on the system based on some runtime condition. Currently, there is no great way for
>> the module to feed back that info. No matter what, the event exists, even if the machinery that is supposed to emit it is
>> disabled for whatever reason. If some user starts tracing the event "everything will work" and there will be no event in the
>> trace. That may make the user think a condition did not trigger, whereas in fact the whole machinery was simply not operating.
>> Being able to register or not the event dynamically solves that cleanly, as enabling the event will simply fail as it won't
>> have been registered in the first place.
>>
>> 3. We will likely at some point relay events coming from some non-CPU part of the system into the ftrace buffer. If and when that
>> happens, it would be ideal if that producer can simply declare the events it supports, and our module dynamically create the
>> matching synthetic events. That would avoid any problem coming from mismatching source that would otherwise plague such setup.
>>
>> So considering things seem to work overall with not much tuning needed, it would be sad to have to revert to TRACE_EVENT() macro
>> and end up having to do more work for a worse result. If that's the route you choose though, it may be nice to amend the
>> documentation to clearly state this API is testing-only and not supported in normal use case, as it currently reads:
>>
>> The trace event subsystem provides an in-kernel API allowing modules
>> or other kernel code to generate user-defined 'synthetic' events at
>> will, which can be used to either augment the existing trace stream
>> and/or signal that a particular important state has occurred.
>
> Note, there is also a CUSTOM_TRACE_EVENT() macro that can attach to any
> tracepoint (including those that have a TRACE_EVENT() already attached to
> them) and create a new trace event that shows up into tracefs.
I think I came across that at some point indeed. We probably could make use
of it for some of the events we emit, but not all. Also this was introduced
in 2022, but I need to support v5.15 kernels (2021), so maybe one day but not today :(
The event that does not fit this use case is emitted from a workqueue worker that
fires at regular interval to poll for some data. So there is no particular tracepoint
to attach to, we just emit an event from our own code. In the future, we will
probably end up having more of that sort.
> I still do not think "synthetic events" should be used for this purpose.
> They are complex enough with the user interface we shouldn't have them
> created in the kernel. That documentation should be rewritten to not make
> it sound like the kernel or modules can create them.
>
> That said, it doesn't mean you can't use dynamic events for this purpose.
> Now what exactly is that "new_event!" doing?
That new_event!() macro takes the name and the list of fields and creates two related
things:
1. An EventDesc value that contains the a Vec of the field names and their type.
Upon creation, this registers the synthetic event and unregisters it when dropped.
2. An associated closure that takes one parameter per field, builds an array out of them,
and using the captured EventDesc emits the events.
Since the closure captures the EventDesc, the EventDesc cannot be dropped while some code
is able to call the closure, and since the EventDesc is responsible for the lifecycle of
the kernel synthetic event, there is no risk of "use after unregister".
The value returned by the macro is the closure, so it can just be called like a normal
function, but dropping it will also drop the captured EventDesc and unregister the event.
The field types have to implement a FieldTy trait, which exposes both the type name
as expected by the synthetic events API and conversion of a value to u64 for preparing
the array passed to synth_event_trace_array():
pub trait FieldTy {
const NAME: &'static str;
fn to_u64(self) -> u64;
}
Once the whole thing is shipping, I plan on adding a way to pass a runtime list of fields
to actually enable to new_event!(), so that the event size can be dynamically reduced to
what is needed and save precious MB of RAM in the buffer, e.g.:
// Coming from some runtime user config, via module parameter or sysfs
let enabled_fields = vec!["field", "field2"];
let f = crate::runtime::traceevent::new_event! {
lisa__myevent2,
fields: {
field1: u8,
field3: &CStr,
field2: u64,
},
enabled_fields,
}?;
> I guess, what's different to that than a kprobe event? A kprobe event is a
> dynamic trace event in the kernel. Could you do the same with that? Or is
> this adding a nop in the code like a real event would?
Considering we not only need to emit events from existing tracepoints but also just from
our own code (workqueue worker gathering data not obtained via tracepoints), I don't think
kprobe-based solution is best unless I missed some usage of it.
>
> I'm not against rust dynamic events, but I do not think synthetic events
> are the answer. It will just cause more confusion.
Is there some way of creating an event that would not hardcode the detail at compile-time
like the TRACE_EVENT() macro does ? In ways that are not designed to specifically feed a
well-known upstream data source into ftrace like tracepoints or kprobes. Our kernel module
is essentially sitting at the same layer as TRACE_CUSTOM_EVENT() or kprobe events, where
it takes some data source (some of it being from our own module) and feeds it into ftrace.
Maybe what I'm looking for is the dynevent_cmd API ? From the kernel doc:
Both the synthetic event and k/ret/probe event APIs are built on top of a
lower-level “dynevent_cmd” event command API, which is also available for more
specialized applications, or as the basis of other higher-level trace event
APIs.
I'd rather avoid shipping an out-of-tree reimplementation of synthetic events if possible,
but if that API is public and usable from a module I could live with that as long as it allows
creating "well behaved" events (available in tracefs, enable/disable working,
instances working etc) and is stable enough that the same code can reasonably be expected to work
from v5.15 to now with minor tweaks only.
> I also added Alice to the Cc, as she has created trace events for Rust.
Interesting, I could not find anything in the upstream bindings. All there seems to be is
a crude tracepoint binding that only allows emitting a tracepoint already defined in C.
If that's of any interest, I also have code to attach probes to tracepoints dynamically from Rust [1]
My plan B for emitting events is to use the infrastructure I have to include C-based function in the
Rust codebase to sneakily use the TRACE_EVENT() macro like any non-confusing module [2].
[1] The tracepoint probe bindings I have looks like that:
// Create a probe that increments a captured atomic counter.
//
// The new_probe!() macro creates:
// 1. The closure
// 2. A probe function that uses the first void* arg to pass the closure pointer
// and then calls it
// 3. A Probe value that ties together both 1. and 2.. This is the value returned
// by the macro.
let x = AtomicUsize::new(0);
let probe = new_probe!(
// That dropper is responsible for dropping all the probes attached to it,
// so that we pay only once for tracepoint_synchronize_unregister() rather
// than for each probe.
&dropper,
// Essentially a closure, with slightly odd syntax for boring reasons.
(preempt: bool, prev: *const c_void, next:* const c_void, prev_state: c_ulong) {
let x = x.fetch_add(1, Ordering::SeqCst);
crate::runtime::printk::pr_info!("SCHED_SWITCH {x}");
}
);
// Lookup the tracepoint dynamically. It's unsafe as the lifetime of a
// struct tracepoint is unknown if defined in a module, 'static otherwise.
// Also unsafe since the tp signature is not checked, as the lookup is 100% dynamic.
// I could probably make use of typeof(trace_foobar) function to typecheck that against the
// C API if the lookup was not dynamic, at the cost of failing to load the module rather than
// being able to dynamically deal with the failure if the tp does not exist.
let tp = unsafe {
Tracepoint::<(bool, *const c_void, * const c_void, c_ulong)>::lookup("sched_switch").expect("tp not found")
};
// Attach the probe to the tracepoint. If the signature does not match, it won't typecheck.
let registered = tp.register_probe(&probe);
// When dropped, the RegisteredProbe value detaches the probe from the
// tracepoint. The probe will only be actually deallocated when its
// associated dropper is dropped, after which the dropper calls
// tracepoint_synchronize_unregister().
drop(registered);
[2] FFI infrastructure to include C code inside the Rust code for bindings writing.
This is a function defined and usable inside the Rust code that has a body written in C:
[cfunc]
fn myfunction(param: u64) -> bool {
r#"
// Any C code allowed, we are just before the function
// definition
#include <linux/foo.h>
";
r#"
// Any C code allowed, we are inside a function here.
return call_foo_function(param);
";
}
The r#" "# syntax is just Rust multiline string literals. The optional first literal gets dumped before the function,
the mandatory second one inside, and an optional 3rd one after. The #[cfunc] proc macro takes care of generating the C
prototype matching the Rust one and to wire everything in a way that CFI is happy with. Since the C code is generated
at compile time as a string, any const operation is allowed on it, so I could work my way to a snippet of C that
uses TRACE_EVENT() in the first string literal and calls the trace_* function in the function body.
The C code ends up in a section of the Rust object file that is extracted with objdump in the Makefile and fed back to
Kbuild. That's very convenient to use and avoids splitting away one-liners like that, does not break randomly
like cbindgen on some Rust code I don't control coming from 3rd party crates, and since the module is built at runtime
by the user, it also has the big advantage of not requiring to build cbindgen in the "tepid" path.
> -- Steve
Douglas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
2025-03-25 18:16 ` Douglas Raillard
@ 2025-03-25 20:13 ` Douglas Raillard
0 siblings, 0 replies; 12+ messages in thread
From: Douglas Raillard @ 2025-03-25 20:13 UTC (permalink / raw)
To: Steven Rostedt
Cc: Masami Hiramatsu (Google), Mathieu Desnoyers, linux-kernel,
linux-trace-kernel, Alice Ryhl
On 25-03-2025 18:16, Douglas Raillard wrote:
> On 25-03-2025 16:25, Steven Rostedt wrote:
>> On Tue, 25 Mar 2025 16:05:00 +0000
>> Douglas Raillard <douglas.raillard@arm.com> wrote:
>>
>>> Yes, the dynamic API was exactly what I needed unfortunately. I'm porting the kernel module we have to Rust (using our own bindings as
>>> we cannot mandate CONFIG_RUST=y). While I have some support for C code generation based on the Rust source, it is significantly more
>>> convenient to simply use a dynamic API. In the current state of my code, defining an event is as simple as:
>>>
>>> let f = new_event! {
>>> lisa__myevent2,
>>> fields: {
>>> field1: u8,
>>> field3: &CStr,
>>> field2: u64,
>>> }
>>> }?;
>>> // Emit the event
>>> f(1, c"hello", 2);
>>> f(3, c"world", 4);
>>>
>>> So it's as non-confusing can be: the event name is stated plainly and the field names and types are mentioned once, with no repetition.
>>> The result can simply be called to emit the event, and when dropped, the event is unregistered.
>>
>> Interesting.
>>
>>>
>>>
>>> On top of that in ways unrelated to Rust:
>>> 1. We have some really high traffic event (PELT-related) that include some data that is not always needed (e.g. taskgroup name).
>>> We currently regularly hit memory size limitation on our test device (pixel 6), and using trace-cmd record instead of
>>> trace-cmd start is not always a good idea as this will have an effect on scheduling, disturbing the very thing we are trying
>>> to observe. Having the dynamic API means we could simply omit the fields in the event that we don't care about in a specific
>>> experiment via dynamic configuration.
>>>
>>> 2. Some events may or may not be supported on the system based on some runtime condition. Currently, there is no great way for
>>> the module to feed back that info. No matter what, the event exists, even if the machinery that is supposed to emit it is
>>> disabled for whatever reason. If some user starts tracing the event "everything will work" and there will be no event in the
>>> trace. That may make the user think a condition did not trigger, whereas in fact the whole machinery was simply not operating.
>>> Being able to register or not the event dynamically solves that cleanly, as enabling the event will simply fail as it won't
>>> have been registered in the first place.
>>>
>>> 3. We will likely at some point relay events coming from some non-CPU part of the system into the ftrace buffer. If and when that
>>> happens, it would be ideal if that producer can simply declare the events it supports, and our module dynamically create the
>>> matching synthetic events. That would avoid any problem coming from mismatching source that would otherwise plague such setup.
>>>
>>> So considering things seem to work overall with not much tuning needed, it would be sad to have to revert to TRACE_EVENT() macro
>>> and end up having to do more work for a worse result. If that's the route you choose though, it may be nice to amend the
>>> documentation to clearly state this API is testing-only and not supported in normal use case, as it currently reads:
>>>
>>> The trace event subsystem provides an in-kernel API allowing modules
>>> or other kernel code to generate user-defined 'synthetic' events at
>>> will, which can be used to either augment the existing trace stream
>>> and/or signal that a particular important state has occurred.
>>
>> Note, there is also a CUSTOM_TRACE_EVENT() macro that can attach to any
>> tracepoint (including those that have a TRACE_EVENT() already attached to
>> them) and create a new trace event that shows up into tracefs.
>
> I think I came across that at some point indeed. We probably could make use
> of it for some of the events we emit, but not all. Also this was introduced
> in 2022, but I need to support v5.15 kernels (2021), so maybe one day but not today :(
>
> The event that does not fit this use case is emitted from a workqueue worker that
> fires at regular interval to poll for some data. So there is no particular tracepoint
> to attach to, we just emit an event from our own code. In the future, we will
> probably end up having more of that sort.
>
>> I still do not think "synthetic events" should be used for this purpose.
>> They are complex enough with the user interface we shouldn't have them
>> created in the kernel. That documentation should be rewritten to not make
>> it sound like the kernel or modules can create them.
>>
>> That said, it doesn't mean you can't use dynamic events for this purpose.
>> Now what exactly is that "new_event!" doing?
>
> That new_event!() macro takes the name and the list of fields and creates two related
> things:
> 1. An EventDesc value that contains the a Vec of the field names and their type.
> Upon creation, this registers the synthetic event and unregisters it when dropped.
>
> 2. An associated closure that takes one parameter per field, builds an array out of them,
> and using the captured EventDesc emits the events.
>
> Since the closure captures the EventDesc, the EventDesc cannot be dropped while some code
> is able to call the closure, and since the EventDesc is responsible for the lifecycle of
> the kernel synthetic event, there is no risk of "use after unregister".
>
> The value returned by the macro is the closure, so it can just be called like a normal
> function, but dropping it will also drop the captured EventDesc and unregister the event.
>
> The field types have to implement a FieldTy trait, which exposes both the type name
> as expected by the synthetic events API and conversion of a value to u64 for preparing
> the array passed to synth_event_trace_array():
>
> pub trait FieldTy {
> const NAME: &'static str;
> fn to_u64(self) -> u64;
> }
>
> Once the whole thing is shipping, I plan on adding a way to pass a runtime list of fields
> to actually enable to new_event!(), so that the event size can be dynamically reduced to
> what is needed and save precious MB of RAM in the buffer, e.g.:
>
> // Coming from some runtime user config, via module parameter or sysfs
> let enabled_fields = vec!["field", "field2"];
>
> let f = crate::runtime::traceevent::new_event! {
> lisa__myevent2,
> fields: {
> field1: u8,
> field3: &CStr,
> field2: u64,
> },
> enabled_fields,
> }?;
>
>> I guess, what's different to that than a kprobe event? A kprobe event is a
>> dynamic trace event in the kernel. Could you do the same with that? Or is
>> this adding a nop in the code like a real event would?
>
> Considering we not only need to emit events from existing tracepoints but also just from
> our own code (workqueue worker gathering data not obtained via tracepoints), I don't think
> kprobe-based solution is best unless I missed some usage of it.
>
>>
>> I'm not against rust dynamic events, but I do not think synthetic events
>> are the answer. It will just cause more confusion.
>
> Is there some way of creating an event that would not hardcode the detail at compile-time
> like the TRACE_EVENT() macro does ? In ways that are not designed to specifically feed a
> well-known upstream data source into ftrace like tracepoints or kprobes. Our kernel module
> is essentially sitting at the same layer as TRACE_CUSTOM_EVENT() or kprobe events, where
> it takes some data source (some of it being from our own module) and feeds it into ftrace.
>
> Maybe what I'm looking for is the dynevent_cmd API ? From the kernel doc:
>
> Both the synthetic event and k/ret/probe event APIs are built on top of a
> lower-level “dynevent_cmd” event command API, which is also available for more
> specialized applications, or as the basis of other higher-level trace event
> APIs.
Answering to myself, this is a no-go, as this is indeed the basis of other high-level trace event
APIs, but not ones created from modules since almost none of that API is exported.
>
> I'd rather avoid shipping an out-of-tree reimplementation of synthetic events if possible,
> but if that API is public and usable from a module I could live with that as long as it allows
> creating "well behaved" events (available in tracefs, enable/disable working,
> instances working etc) and is stable enough that the same code can reasonably be expected to work
> from v5.15 to now with minor tweaks only.
>
>> I also added Alice to the Cc, as she has created trace events for Rust.
>
> Interesting, I could not find anything in the upstream bindings. All there seems to be is
> a crude tracepoint binding that only allows emitting a tracepoint already defined in C.
> If that's of any interest, I also have code to attach probes to tracepoints dynamically from Rust [1]
>
> My plan B for emitting events is to use the infrastructure I have to include C-based function in the
> Rust codebase to sneakily use the TRACE_EVENT() macro like any non-confusing module [2].
Actually plan B can't fly, there is too much similar but different repetition in the TRACE_EVENT() macro
(TP_ARGS() and TP_PROTO()) and the C syntax is not helping either (trailing comma forbidden).
This will have to be plan C with a proc macro defining a custom syntax and generating the string literal
at compile time, or defining it in C sources and dealing with copy/pasted Rust bindings.
> [1] The tracepoint probe bindings I have looks like that:
>
> // Create a probe that increments a captured atomic counter.
> //
> // The new_probe!() macro creates:
> // 1. The closure
> // 2. A probe function that uses the first void* arg to pass the closure pointer
> // and then calls it
> // 3. A Probe value that ties together both 1. and 2.. This is the value returned
> // by the macro.
> let x = AtomicUsize::new(0);
> let probe = new_probe!(
> // That dropper is responsible for dropping all the probes attached to it,
> // so that we pay only once for tracepoint_synchronize_unregister() rather
> // than for each probe.
> &dropper,
> // Essentially a closure, with slightly odd syntax for boring reasons.
> (preempt: bool, prev: *const c_void, next:* const c_void, prev_state: c_ulong) {
> let x = x.fetch_add(1, Ordering::SeqCst);
> crate::runtime::printk::pr_info!("SCHED_SWITCH {x}");
> }
> );
>
> // Lookup the tracepoint dynamically. It's unsafe as the lifetime of a
> // struct tracepoint is unknown if defined in a module, 'static otherwise.
> // Also unsafe since the tp signature is not checked, as the lookup is 100% dynamic.
> // I could probably make use of typeof(trace_foobar) function to typecheck that against the
> // C API if the lookup was not dynamic, at the cost of failing to load the module rather than
> // being able to dynamically deal with the failure if the tp does not exist.
> let tp = unsafe {
> Tracepoint::<(bool, *const c_void, * const c_void, c_ulong)>::lookup("sched_switch").expect("tp not found")
> };
>
> // Attach the probe to the tracepoint. If the signature does not match, it won't typecheck.
> let registered = tp.register_probe(&probe);
>
> // When dropped, the RegisteredProbe value detaches the probe from the
> // tracepoint. The probe will only be actually deallocated when its
> // associated dropper is dropped, after which the dropper calls
> // tracepoint_synchronize_unregister().
> drop(registered);
>
>
> [2] FFI infrastructure to include C code inside the Rust code for bindings writing.
> This is a function defined and usable inside the Rust code that has a body written in C:
>
> [cfunc]
> fn myfunction(param: u64) -> bool {
> r#"
> // Any C code allowed, we are just before the function
> // definition
> #include <linux/foo.h>
> ";
> r#"
> // Any C code allowed, we are inside a function here.
> return call_foo_function(param);
> ";
> }
>
> The r#" "# syntax is just Rust multiline string literals. The optional first literal gets dumped before the function,
> the mandatory second one inside, and an optional 3rd one after. The #[cfunc] proc macro takes care of generating the C
> prototype matching the Rust one and to wire everything in a way that CFI is happy with. Since the C code is generated
> at compile time as a string, any const operation is allowed on it, so I could work my way to a snippet of C that
> uses TRACE_EVENT() in the first string literal and calls the trace_* function in the function body.
>
> The C code ends up in a section of the Rust object file that is extracted with objdump in the Makefile and fed back to
> Kbuild. That's very convenient to use and avoids splitting away one-liners like that, does not break randomly
> like cbindgen on some Rust code I don't control coming from 3rd party crates, and since the module is built at runtime
> by the user, it also has the big advantage of not requiring to build cbindgen in the "tepid" path.
>
>> -- Steve
>
>
> Douglas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2()
@ 2025-03-25 21:40 Adnan-khan Ruzwan1
0 siblings, 0 replies; 12+ messages in thread
From: Adnan-khan Ruzwan1 @ 2025-03-25 21:40 UTC (permalink / raw)
To: douglas.raillard
Cc: aliceryhl, linux-kernel, linux-trace-kernel, mathieu.desnoyers,
mhiramat, rostedt
Sent from my iPhone
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2025-03-25 21:40 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-18 18:08 [PATCH 1/3] tracing: Expose functions to trace a synth event Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 2/3] tracing: Rename find_synth_event() into synth_event_find() Douglas RAILLARD
2025-03-18 18:08 ` [PATCH 3/3] tracing: Rename trace_synth() to synth_event_trace2() Douglas RAILLARD
2025-03-19 13:37 ` Masami Hiramatsu
2025-03-19 14:51 ` Douglas Raillard
2025-03-24 6:29 ` Masami Hiramatsu
2025-03-24 14:30 ` Steven Rostedt
2025-03-25 16:05 ` Douglas Raillard
2025-03-25 16:25 ` Steven Rostedt
2025-03-25 18:16 ` Douglas Raillard
2025-03-25 20:13 ` Douglas Raillard
-- strict thread matches above, loose matches on Subject: below --
2025-03-25 21:40 Adnan-khan Ruzwan1
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).