* [PATCH v2] tracing: Allow perf to read synthetic events
@ 2026-05-13 19:00 Steven Rostedt
2026-05-14 18:28 ` Namhyung Kim
2026-05-14 18:47 ` Steven Rostedt
0 siblings, 2 replies; 3+ messages in thread
From: Steven Rostedt @ 2026-05-13 19:00 UTC (permalink / raw)
To: LKML, Linux Trace Kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Arnaldo Carvalho de Melo,
Jiri Olsa, Namhyung Kim, Peter Zijlstra, Ian Rogers
From: Steven Rostedt <rostedt@goodmis.org>
Currently, perf can not enable synthetic events. When it does, it either
causes a warning in the kernel or errors with "no such device".
Add the necessary code to allow perf to also attach to synthetic events.
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Changes since v1: https://patch.msgid.link/20251217113920.50b56246@gandalf.local.home
- Forward ported to v7.1-rc2
kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
1 file changed, 94 insertions(+), 27 deletions(-)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 39ac4eba0702..e6871230bde9 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -499,28 +499,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
return len;
}
-static void trace_event_raw_event_synth(void *__data,
- u64 *var_ref_vals,
- unsigned int *var_ref_idx)
+static __always_inline int get_field_size(struct synth_event *event,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
{
- unsigned int i, n_u64, val_idx, len, data_size = 0;
- struct trace_event_file *trace_file = __data;
- struct synth_trace_event *entry;
- struct trace_event_buffer fbuffer;
- struct trace_buffer *buffer;
- struct synth_event *event;
- int fields_size = 0;
-
- event = trace_file->event_call->data;
-
- if (trace_trigger_soft_disabled(trace_file))
- return;
+ int fields_size;
fields_size = event->n_u64 * sizeof(u64);
- for (i = 0; i < event->n_dynamic_fields; i++) {
+ for (int i = 0; i < event->n_dynamic_fields; i++) {
unsigned int field_pos = event->dynamic_fields[i]->field_pos;
char *str_val;
+ int val_idx;
+ int len;
val_idx = var_ref_idx[field_pos];
str_val = (char *)(long)var_ref_vals[val_idx];
@@ -535,18 +526,18 @@ static void trace_event_raw_event_synth(void *__data,
fields_size += len;
}
+ return fields_size;
+}
- /*
- * Avoid ring buffer recursion detection, as this event
- * is being performed within another event.
- */
- buffer = trace_file->tr->array_buffer.buffer;
- guard(ring_buffer_nest)(buffer);
-
- entry = trace_event_buffer_reserve(&fbuffer, trace_file,
- sizeof(*entry) + fields_size);
- if (!entry)
- return;
+static __always_inline void write_synth_entry(struct synth_event *event,
+ struct synth_trace_event *entry,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ int data_size = 0;
+ int i, n_u64;
+ int val_idx;
+ int len;
for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
val_idx = var_ref_idx[i];
@@ -587,10 +578,83 @@ static void trace_event_raw_event_synth(void *__data,
n_u64++;
}
}
+}
+
+static void trace_event_raw_event_synth(void *__data,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ struct trace_event_file *trace_file = __data;
+ struct synth_trace_event *entry;
+ struct trace_event_buffer fbuffer;
+ struct trace_buffer *buffer;
+ struct synth_event *event;
+ int fields_size;
+
+ event = trace_file->event_call->data;
+
+ if (trace_trigger_soft_disabled(trace_file))
+ return;
+
+ fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+ /*
+ * Avoid ring buffer recursion detection, as this event
+ * is being performed within another event.
+ */
+ buffer = trace_file->tr->array_buffer.buffer;
+ guard(ring_buffer_nest)(buffer);
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + fields_size);
+ if (!entry)
+ return;
+
+ write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
trace_event_buffer_commit(&fbuffer);
}
+#ifdef CONFIG_PERF_EVENTS
+static void perf_event_raw_event_synth(void *__data,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ struct trace_event_call *call = __data;
+ struct synth_trace_event *entry;
+ struct hlist_head *perf_head;
+ struct synth_event *event;
+ struct pt_regs *regs;
+ int fields_size;
+ size_t size;
+ int context;
+
+ event = call->data;
+
+ perf_head = this_cpu_ptr(call->perf_events);
+
+ if (!perf_head || hlist_empty(perf_head))
+ return;
+
+ fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+ size = ALIGN(sizeof(*entry) + fields_size, 8);
+
+ entry = perf_trace_buf_alloc(size, ®s, &context);
+
+ if (unlikely(!entry))
+ return;
+
+ write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
+
+ perf_fetch_caller_regs(regs);
+
+ perf_trace_buf_submit(entry, size, context,
+ call->event.type, 1, regs,
+ perf_head, NULL);
+}
+#endif
+
static void free_synth_event_print_fmt(struct trace_event_call *call)
{
if (call) {
@@ -917,6 +981,9 @@ static int register_synth_event(struct synth_event *event)
call->flags = TRACE_EVENT_FL_TRACEPOINT;
call->class->reg = synth_event_reg;
call->class->probe = trace_event_raw_event_synth;
+#ifdef CONFIG_PERF_EVENTS
+ call->class->perf_probe = perf_event_raw_event_synth;
+#endif
call->data = event;
call->tp = event->tp;
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] tracing: Allow perf to read synthetic events
2026-05-13 19:00 [PATCH v2] tracing: Allow perf to read synthetic events Steven Rostedt
@ 2026-05-14 18:28 ` Namhyung Kim
2026-05-14 18:47 ` Steven Rostedt
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2026-05-14 18:28 UTC (permalink / raw)
To: Steven Rostedt
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Jiri Olsa, Peter Zijlstra, Ian Rogers
On Wed, May 13, 2026 at 03:00:07PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
>
> Add the necessary code to allow perf to also attach to synthetic events.
>
> Reported-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Thanks,
Namhyung
> ---
> Changes since v1: https://patch.msgid.link/20251217113920.50b56246@gandalf.local.home
>
> - Forward ported to v7.1-rc2
>
> kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
> 1 file changed, 94 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 39ac4eba0702..e6871230bde9 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -499,28 +499,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
> return len;
> }
>
> -static void trace_event_raw_event_synth(void *__data,
> - u64 *var_ref_vals,
> - unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> {
> - unsigned int i, n_u64, val_idx, len, data_size = 0;
> - struct trace_event_file *trace_file = __data;
> - struct synth_trace_event *entry;
> - struct trace_event_buffer fbuffer;
> - struct trace_buffer *buffer;
> - struct synth_event *event;
> - int fields_size = 0;
> -
> - event = trace_file->event_call->data;
> -
> - if (trace_trigger_soft_disabled(trace_file))
> - return;
> + int fields_size;
>
> fields_size = event->n_u64 * sizeof(u64);
>
> - for (i = 0; i < event->n_dynamic_fields; i++) {
> + for (int i = 0; i < event->n_dynamic_fields; i++) {
> unsigned int field_pos = event->dynamic_fields[i]->field_pos;
> char *str_val;
> + int val_idx;
> + int len;
>
> val_idx = var_ref_idx[field_pos];
> str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -535,18 +526,18 @@ static void trace_event_raw_event_synth(void *__data,
>
> fields_size += len;
> }
> + return fields_size;
> +}
>
> - /*
> - * Avoid ring buffer recursion detection, as this event
> - * is being performed within another event.
> - */
> - buffer = trace_file->tr->array_buffer.buffer;
> - guard(ring_buffer_nest)(buffer);
> -
> - entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> - sizeof(*entry) + fields_size);
> - if (!entry)
> - return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> + struct synth_trace_event *entry,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + int data_size = 0;
> + int i, n_u64;
> + int val_idx;
> + int len;
>
> for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
> val_idx = var_ref_idx[i];
> @@ -587,10 +578,83 @@ static void trace_event_raw_event_synth(void *__data,
> n_u64++;
> }
> }
> +}
> +
> +static void trace_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_file *trace_file = __data;
> + struct synth_trace_event *entry;
> + struct trace_event_buffer fbuffer;
> + struct trace_buffer *buffer;
> + struct synth_event *event;
> + int fields_size;
> +
> + event = trace_file->event_call->data;
> +
> + if (trace_trigger_soft_disabled(trace_file))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + /*
> + * Avoid ring buffer recursion detection, as this event
> + * is being performed within another event.
> + */
> + buffer = trace_file->tr->array_buffer.buffer;
> + guard(ring_buffer_nest)(buffer);
> +
> + entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> + sizeof(*entry) + fields_size);
> + if (!entry)
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>
> trace_event_buffer_commit(&fbuffer);
> }
>
> +#ifdef CONFIG_PERF_EVENTS
> +static void perf_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_call *call = __data;
> + struct synth_trace_event *entry;
> + struct hlist_head *perf_head;
> + struct synth_event *event;
> + struct pt_regs *regs;
> + int fields_size;
> + size_t size;
> + int context;
> +
> + event = call->data;
> +
> + perf_head = this_cpu_ptr(call->perf_events);
> +
> + if (!perf_head || hlist_empty(perf_head))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> + entry = perf_trace_buf_alloc(size, ®s, &context);
> +
> + if (unlikely(!entry))
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> + perf_fetch_caller_regs(regs);
> +
> + perf_trace_buf_submit(entry, size, context,
> + call->event.type, 1, regs,
> + perf_head, NULL);
> +}
> +#endif
> +
> static void free_synth_event_print_fmt(struct trace_event_call *call)
> {
> if (call) {
> @@ -917,6 +981,9 @@ static int register_synth_event(struct synth_event *event)
> call->flags = TRACE_EVENT_FL_TRACEPOINT;
> call->class->reg = synth_event_reg;
> call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> + call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
> call->data = event;
> call->tp = event->tp;
>
> --
> 2.53.0
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] tracing: Allow perf to read synthetic events
2026-05-13 19:00 [PATCH v2] tracing: Allow perf to read synthetic events Steven Rostedt
2026-05-14 18:28 ` Namhyung Kim
@ 2026-05-14 18:47 ` Steven Rostedt
1 sibling, 0 replies; 3+ messages in thread
From: Steven Rostedt @ 2026-05-14 18:47 UTC (permalink / raw)
To: LKML, Linux Trace Kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Arnaldo Carvalho de Melo,
Jiri Olsa, Namhyung Kim, Peter Zijlstra, Ian Rogers, sashiko
Replying to Sashiko's comments:
https://sashiko.dev/?list=org.kernel.vger.linux-trace-kernel#/patchset/20260513150007.3b280e87%40gandalf.local.home
> commit b3956023bf5f1ccf5838187c1c14f3dcf5e37c3d
> Author: Steven Rostedt <rostedt@goodmis.org>
> tracing: Allow perf to read synthetic events
> This commit adds support for perf to read synthetic events. It extracts the
> field sizing and entry writing logic into separate helper functions, and
> implements a new probe function to attach perf events.
> > diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> > index 39ac4eba07024..e6871230bde96 100644
> > --- a/kernel/trace/trace_events_synth.c
> > +++ b/kernel/trace/trace_events_synth.c
> > @@ -499,28 +499,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
> [ ... ]
> > +static __always_inline void write_synth_entry(struct synth_event *event,
> > + struct synth_trace_event *entry,
> > + u64 *var_ref_vals,
> > + unsigned int *var_ref_idx)
> > +{
> > + int data_size = 0;
> Since get_field_size() and write_synth_entry() are being refactored here, is
> there a risk of a time-of-check to time-of-use buffer overflow with
> dynamic string fields?
> get_field_size() computes the string length for allocation by calling
> fetch_store_strlen(). The ring buffer event slot is then allocated based on
> this length.
> write_synth_entry() populates the buffer by calling trace_string(), which
> calls fetch_store_strlen() a second time to recalculate the string's length,
> assigning it to data->as_dynamic.len.
> If a string from user space grows between the two fetch_store_strlen() calls,
> fetch_store_string() will blindly copy more bytes than the allocated buffer
> space. Can this cause out-of-bounds writes and corrupt kernel memory?
> This regression wasn't introduced by this patch, but pre-existed in the
> ftrace implementation.
The string cannot grow between the two calls. When a dynamic event is
enabled (as it is required to be here) deleting or modifying it will be
denied via an -EBUSY return. That's the only way I'm aware of that can have
user space change the size of the string.
> [ ... ]
> > +#ifdef CONFIG_PERF_EVENTS
> > +static void perf_event_raw_event_synth(void *__data,
> > + u64 *var_ref_vals,
> > + unsigned int *var_ref_idx)
> > +{
> [ ... ]
> > + size = ALIGN(sizeof(*entry) + fields_size, 8);
> > +
> > + entry = perf_trace_buf_alloc(size, ®s, &context);
> > +
> > + if (unlikely(!entry))
> > + return;
> > +
> > + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> > +
> > + perf_fetch_caller_regs(regs);
> > +
> > + perf_trace_buf_submit(entry, size, context,
> > + call->event.type, 1, regs,
> > + perf_head, NULL);
> > +}
> > +#endif
> Could this leak uninitialized per-CPU kernel memory to userspace?
> perf_trace_buf_alloc() and trace_event_buffer_reserve() allocate memory from
> per-CPU buffers without zero-initializing it (except for trailing alignment
> padding).
Both of theses per-CPU memory locations are zeroed when created. It is only
populated with trace data. Yeah, it may leak previous trace data, but that
data should also be visible for whoever created it in the first place.
-- Steve
> write_synth_entry() populates an array of 8-byte union trace_synth_field.
> When a field is smaller than 8 bytes (e.g., as_u8), only those specific bytes
> are written, leaving the remaining padding bytes uninitialized. Similarly, for
> empty stacktraces, get_field_size() reserves 8 bytes, but trace_stack()
> terminates immediately and writes 0 bytes, leaving the 8-byte gap completely
> uninitialized.
> perf_trace_buf_submit() copies the entire requested buffer size to the perf
> ring buffer. Can userspace read this buffer, leaking uninitialized kernel
> memory from previous events or kernel operations?
> This regression also wasn't introduced by this patch, but pre-existed in the
> ftrace implementation.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-05-14 18:47 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 19:00 [PATCH v2] tracing: Allow perf to read synthetic events Steven Rostedt
2026-05-14 18:28 ` Namhyung Kim
2026-05-14 18:47 ` Steven Rostedt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox