* [PATCH] tracing: Allow perf to read synthetic events
@ 2025-12-17 16:39 Steven Rostedt
2025-12-17 18:58 ` Ian Rogers
2025-12-28 22:59 ` Jiri Olsa
0 siblings, 2 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-17 16:39 UTC (permalink / raw)
To: LKML, Linux Trace Kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Ian Rogers,
Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra
From: Steven Rostedt <rostedt@goodmis.org>
Currently, perf can not enable synthetic events. When it does, it either
causes a warning in the kernel or errors with "no such device".
Add the necessary code to allow perf to also attach to synthetic events.
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
1 file changed, 94 insertions(+), 27 deletions(-)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 4554c458b78c..026e06f28958 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
return len;
}
-static notrace void trace_event_raw_event_synth(void *__data,
- u64 *var_ref_vals,
- unsigned int *var_ref_idx)
+static __always_inline int get_field_size(struct synth_event *event,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
{
- unsigned int i, n_u64, val_idx, len, data_size = 0;
- struct trace_event_file *trace_file = __data;
- struct synth_trace_event *entry;
- struct trace_event_buffer fbuffer;
- struct trace_buffer *buffer;
- struct synth_event *event;
- int fields_size = 0;
-
- event = trace_file->event_call->data;
-
- if (trace_trigger_soft_disabled(trace_file))
- return;
+ int fields_size;
fields_size = event->n_u64 * sizeof(u64);
- for (i = 0; i < event->n_dynamic_fields; i++) {
+ for (int i = 0; i < event->n_dynamic_fields; i++) {
unsigned int field_pos = event->dynamic_fields[i]->field_pos;
char *str_val;
+ int val_idx;
+ int len;
val_idx = var_ref_idx[field_pos];
str_val = (char *)(long)var_ref_vals[val_idx];
@@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
fields_size += len;
}
+ return fields_size;
+}
- /*
- * Avoid ring buffer recursion detection, as this event
- * is being performed within another event.
- */
- buffer = trace_file->tr->array_buffer.buffer;
- guard(ring_buffer_nest)(buffer);
-
- entry = trace_event_buffer_reserve(&fbuffer, trace_file,
- sizeof(*entry) + fields_size);
- if (!entry)
- return;
+static __always_inline void write_synth_entry(struct synth_event *event,
+ struct synth_trace_event *entry,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ int data_size = 0;
+ int i, n_u64;
+ int val_idx;
+ int len;
for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
val_idx = var_ref_idx[i];
@@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
n_u64++;
}
}
+}
+
+static notrace void trace_event_raw_event_synth(void *__data,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ struct trace_event_file *trace_file = __data;
+ struct synth_trace_event *entry;
+ struct trace_event_buffer fbuffer;
+ struct trace_buffer *buffer;
+ struct synth_event *event;
+ int fields_size;
+
+ event = trace_file->event_call->data;
+
+ if (trace_trigger_soft_disabled(trace_file))
+ return;
+
+ fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+ /*
+ * Avoid ring buffer recursion detection, as this event
+ * is being performed within another event.
+ */
+ buffer = trace_file->tr->array_buffer.buffer;
+ guard(ring_buffer_nest)(buffer);
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + fields_size);
+ if (!entry)
+ return;
+
+ write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
trace_event_buffer_commit(&fbuffer);
}
+#ifdef CONFIG_PERF_EVENTS
+static notrace void perf_event_raw_event_synth(void *__data,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+{
+ struct trace_event_call *call = __data;
+ struct synth_trace_event *entry;
+ struct hlist_head *perf_head;
+ struct synth_event *event;
+ struct pt_regs *regs;
+ int fields_size;
+ size_t size;
+ int context;
+
+ event = call->data;
+
+ perf_head = this_cpu_ptr(call->perf_events);
+
+ if (!perf_head || hlist_empty(perf_head))
+ return;
+
+ fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+ size = ALIGN(sizeof(*entry) + fields_size, 8);
+
+ entry = perf_trace_buf_alloc(size, ®s, &context);
+
+ if (unlikely(!entry))
+ return;
+
+ write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
+
+ perf_fetch_caller_regs(regs);
+
+ perf_trace_buf_submit(entry, size, context,
+ call->event.type, 1, regs,
+ perf_head, NULL);
+}
+#endif
+
static void free_synth_event_print_fmt(struct trace_event_call *call)
{
if (call) {
@@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
call->flags = TRACE_EVENT_FL_TRACEPOINT;
call->class->reg = synth_event_reg;
call->class->probe = trace_event_raw_event_synth;
+#ifdef CONFIG_PERF_EVENTS
+ call->class->perf_probe = perf_event_raw_event_synth;
+#endif
call->data = event;
call->tp = event->tp;
--
2.51.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] tracing: Allow perf to read synthetic events
2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
@ 2025-12-17 18:58 ` Ian Rogers
2025-12-17 19:08 ` Steven Rostedt
2025-12-28 22:59 ` Jiri Olsa
1 sibling, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2025-12-17 18:58 UTC (permalink / raw)
To: Steven Rostedt
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra
On Wed, Dec 17, 2025 at 8:37 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> From: Steven Rostedt <rostedt@goodmis.org>
>
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
>
> Add the necessary code to allow perf to also attach to synthetic events.
>
> Reported-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This is great! Any chance of an example of using it? I'd like to add a test.
Thanks,
Ian
> ---
> kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
> 1 file changed, 94 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4554c458b78c..026e06f28958 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
> return len;
> }
>
> -static notrace void trace_event_raw_event_synth(void *__data,
> - u64 *var_ref_vals,
> - unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> {
> - unsigned int i, n_u64, val_idx, len, data_size = 0;
> - struct trace_event_file *trace_file = __data;
> - struct synth_trace_event *entry;
> - struct trace_event_buffer fbuffer;
> - struct trace_buffer *buffer;
> - struct synth_event *event;
> - int fields_size = 0;
> -
> - event = trace_file->event_call->data;
> -
> - if (trace_trigger_soft_disabled(trace_file))
> - return;
> + int fields_size;
>
> fields_size = event->n_u64 * sizeof(u64);
>
> - for (i = 0; i < event->n_dynamic_fields; i++) {
> + for (int i = 0; i < event->n_dynamic_fields; i++) {
> unsigned int field_pos = event->dynamic_fields[i]->field_pos;
> char *str_val;
> + int val_idx;
> + int len;
>
> val_idx = var_ref_idx[field_pos];
> str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
>
> fields_size += len;
> }
> + return fields_size;
> +}
>
> - /*
> - * Avoid ring buffer recursion detection, as this event
> - * is being performed within another event.
> - */
> - buffer = trace_file->tr->array_buffer.buffer;
> - guard(ring_buffer_nest)(buffer);
> -
> - entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> - sizeof(*entry) + fields_size);
> - if (!entry)
> - return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> + struct synth_trace_event *entry,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + int data_size = 0;
> + int i, n_u64;
> + int val_idx;
> + int len;
>
> for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
> val_idx = var_ref_idx[i];
> @@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
> n_u64++;
> }
> }
> +}
> +
> +static notrace void trace_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_file *trace_file = __data;
> + struct synth_trace_event *entry;
> + struct trace_event_buffer fbuffer;
> + struct trace_buffer *buffer;
> + struct synth_event *event;
> + int fields_size;
> +
> + event = trace_file->event_call->data;
> +
> + if (trace_trigger_soft_disabled(trace_file))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + /*
> + * Avoid ring buffer recursion detection, as this event
> + * is being performed within another event.
> + */
> + buffer = trace_file->tr->array_buffer.buffer;
> + guard(ring_buffer_nest)(buffer);
> +
> + entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> + sizeof(*entry) + fields_size);
> + if (!entry)
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>
> trace_event_buffer_commit(&fbuffer);
> }
>
> +#ifdef CONFIG_PERF_EVENTS
> +static notrace void perf_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_call *call = __data;
> + struct synth_trace_event *entry;
> + struct hlist_head *perf_head;
> + struct synth_event *event;
> + struct pt_regs *regs;
> + int fields_size;
> + size_t size;
> + int context;
> +
> + event = call->data;
> +
> + perf_head = this_cpu_ptr(call->perf_events);
> +
> + if (!perf_head || hlist_empty(perf_head))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> + entry = perf_trace_buf_alloc(size, ®s, &context);
> +
> + if (unlikely(!entry))
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> + perf_fetch_caller_regs(regs);
> +
> + perf_trace_buf_submit(entry, size, context,
> + call->event.type, 1, regs,
> + perf_head, NULL);
> +}
> +#endif
> +
> static void free_synth_event_print_fmt(struct trace_event_call *call)
> {
> if (call) {
> @@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
> call->flags = TRACE_EVENT_FL_TRACEPOINT;
> call->class->reg = synth_event_reg;
> call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> + call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
> call->data = event;
> call->tp = event->tp;
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] tracing: Allow perf to read synthetic events
2025-12-17 18:58 ` Ian Rogers
@ 2025-12-17 19:08 ` Steven Rostedt
0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-17 19:08 UTC (permalink / raw)
To: Ian Rogers
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra
On Wed, 17 Dec 2025 10:58:25 -0800
Ian Rogers <irogers@google.com> wrote:
> This is great! Any chance of an example of using it? I'd like to add a test.
I tested it with:
# trace-cmd sqlhist -e -n futex_wait select TIMESTAMP_DELTA_USECS as lat from sys_enter_futex as start join sys_exit_futex as end on start.common_pid = end.common_pid
# perf record -e synthetic:futex_wait
# perf script
bash 1043 [002] 49.501707: synthetic:futex_wait: lat=8
in:imuxsock 610 [005] 58.904344: synthetic:futex_wait: lat=27
rs:main Q:Reg 612 [007] 58.904478: synthetic:futex_wait: lat=10847333
rs:main Q:Reg 612 [007] 58.904507: synthetic:futex_wait: lat=6
ls 1050 [004] 59.283561: synthetic:futex_wait: lat=8
in:imklog 611 [004] 69.084959: synthetic:futex_wait: lat=55
rs:main Q:Reg 612 [007] 69.085014: synthetic:futex_wait: lat=10180378
rs:main Q:Reg 612 [007] 69.085041: synthetic:futex_wait: lat=5
But feel free to use any other synthetic event or perf command.
-- Steve
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] tracing: Allow perf to read synthetic events
2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
2025-12-17 18:58 ` Ian Rogers
@ 2025-12-28 22:59 ` Jiri Olsa
2025-12-29 22:49 ` Steven Rostedt
1 sibling, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2025-12-28 22:59 UTC (permalink / raw)
To: Steven Rostedt
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
Peter Zijlstra
On Wed, Dec 17, 2025 at 11:39:20AM -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
>
> Add the necessary code to allow perf to also attach to synthetic events.
>
> Reported-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
hi,
I don't see the crash, but perf record/script gives me 'FAILED TO PARSE' in perf script:
# cd /sys/kernel/tracing
# echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
# echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
# echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
# echo 1 > events/synthetic/block_lat/enable
# perf record -e 'synthetic:block_lat' -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.259 MB perf.data (1 samples) ]
# perf script
kworker/u33:2-w 244 [000] 1707.836263: synthetic:block_lat: [FAILED TO PARSE] pid=244 delta=21 stack=ARRAY[0b, 00, 00, 00, 00, 00, 00, 00, 1d, 72, 9d, 82, ff, ff, ff, ff, 0d, 7d, 9d, 82, ff, ff, ff, ff, 3d, 3d, 9e, 82, ff, ff, ff, ff, 05, 91, 9d, 82, ff, ff, ff, ff, 40, 7a, 42, 81, ff, ff, ff, ff, 5e, f4, 0c, 82, ff, ff, ff, ff, 43, 8d, 0c, 82, ff, ff, ff, ff, 82, 2d, 89, 81, ff, ff, ff, ff, 9b, 39, 89, 81, ff, ff, ff, ff, a6, 5a, 9c, 82, ff, ff, ff, ff, 2f, 01, 00, 81, ff, ff, ff, ff]
not sure it's fixed in the latest libtraceevent, mine is
libtraceevent-1.8.4-3.fc42.x86_64
jirka
> ---
> kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
> 1 file changed, 94 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4554c458b78c..026e06f28958 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
> return len;
> }
>
> -static notrace void trace_event_raw_event_synth(void *__data,
> - u64 *var_ref_vals,
> - unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> {
> - unsigned int i, n_u64, val_idx, len, data_size = 0;
> - struct trace_event_file *trace_file = __data;
> - struct synth_trace_event *entry;
> - struct trace_event_buffer fbuffer;
> - struct trace_buffer *buffer;
> - struct synth_event *event;
> - int fields_size = 0;
> -
> - event = trace_file->event_call->data;
> -
> - if (trace_trigger_soft_disabled(trace_file))
> - return;
> + int fields_size;
>
> fields_size = event->n_u64 * sizeof(u64);
>
> - for (i = 0; i < event->n_dynamic_fields; i++) {
> + for (int i = 0; i < event->n_dynamic_fields; i++) {
> unsigned int field_pos = event->dynamic_fields[i]->field_pos;
> char *str_val;
> + int val_idx;
> + int len;
>
> val_idx = var_ref_idx[field_pos];
> str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
>
> fields_size += len;
> }
> + return fields_size;
> +}
>
> - /*
> - * Avoid ring buffer recursion detection, as this event
> - * is being performed within another event.
> - */
> - buffer = trace_file->tr->array_buffer.buffer;
> - guard(ring_buffer_nest)(buffer);
> -
> - entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> - sizeof(*entry) + fields_size);
> - if (!entry)
> - return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> + struct synth_trace_event *entry,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + int data_size = 0;
> + int i, n_u64;
> + int val_idx;
> + int len;
>
> for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
> val_idx = var_ref_idx[i];
> @@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
> n_u64++;
> }
> }
> +}
> +
> +static notrace void trace_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_file *trace_file = __data;
> + struct synth_trace_event *entry;
> + struct trace_event_buffer fbuffer;
> + struct trace_buffer *buffer;
> + struct synth_event *event;
> + int fields_size;
> +
> + event = trace_file->event_call->data;
> +
> + if (trace_trigger_soft_disabled(trace_file))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + /*
> + * Avoid ring buffer recursion detection, as this event
> + * is being performed within another event.
> + */
> + buffer = trace_file->tr->array_buffer.buffer;
> + guard(ring_buffer_nest)(buffer);
> +
> + entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> + sizeof(*entry) + fields_size);
> + if (!entry)
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>
> trace_event_buffer_commit(&fbuffer);
> }
>
> +#ifdef CONFIG_PERF_EVENTS
> +static notrace void perf_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_call *call = __data;
> + struct synth_trace_event *entry;
> + struct hlist_head *perf_head;
> + struct synth_event *event;
> + struct pt_regs *regs;
> + int fields_size;
> + size_t size;
> + int context;
> +
> + event = call->data;
> +
> + perf_head = this_cpu_ptr(call->perf_events);
> +
> + if (!perf_head || hlist_empty(perf_head))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> + entry = perf_trace_buf_alloc(size, ®s, &context);
> +
> + if (unlikely(!entry))
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> + perf_fetch_caller_regs(regs);
> +
> + perf_trace_buf_submit(entry, size, context,
> + call->event.type, 1, regs,
> + perf_head, NULL);
> +}
> +#endif
> +
> static void free_synth_event_print_fmt(struct trace_event_call *call)
> {
> if (call) {
> @@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
> call->flags = TRACE_EVENT_FL_TRACEPOINT;
> call->class->reg = synth_event_reg;
> call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> + call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
> call->data = event;
> call->tp = event->tp;
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] tracing: Allow perf to read synthetic events
2025-12-28 22:59 ` Jiri Olsa
@ 2025-12-29 22:49 ` Steven Rostedt
2025-12-29 23:36 ` Steven Rostedt
0 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2025-12-29 22:49 UTC (permalink / raw)
To: Jiri Olsa
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
Peter Zijlstra
On Sun, 28 Dec 2025 23:59:34 +0100
Jiri Olsa <olsajiri@gmail.com> wrote:
> hi,
> I don't see the crash, but perf record/script gives me 'FAILED TO PARSE' in perf script:
>
> # cd /sys/kernel/tracing
> # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
> # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
> # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
> # echo 1 > events/synthetic/block_lat/enable
>
> # perf record -e 'synthetic:block_lat' -a
> ^C[ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.259 MB perf.data (1 samples) ]
> # perf script
> kworker/u33:2-w 244 [000] 1707.836263: synthetic:block_lat: [FAILED TO PARSE] pid=244 delta=21 stack=ARRAY[0b, 00, 00, 00, 00, 00, 00, 00, 1d, 72, 9d, 82, ff, ff, ff, ff, 0d, 7d, 9d, 82, ff, ff, ff, ff, 3d, 3d, 9e, 82, ff, ff, ff, ff, 05, 91, 9d, 82, ff, ff, ff, ff, 40, 7a, 42, 81, ff, ff, ff, ff, 5e, f4, 0c, 82, ff, ff, ff, ff, 43, 8d, 0c, 82, ff, ff, ff, ff, 82, 2d, 89, 81, ff, ff, ff, ff, 9b, 39, 89, 81, ff, ff, ff, ff, a6, 5a, 9c, 82, ff, ff, ff, ff, 2f, 01, 00, 81, ff, ff, ff, ff]
>
> not sure it's fixed in the latest libtraceevent, mine is
> libtraceevent-1.8.4-3.fc42.x86_64
Yeah, it's not supported yet by libtraceevent (fails for trace-cmd record
as well). But this should be easily fixed:
># trace-cmd list -e block_lat -F --full
system: synthetic
name: block_lat
ID: 1871
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:pid_t pid; offset:8; size:4; signed:1;
field:u64 delta; offset:16; size:8; signed:0;
field:__data_loc unsigned long[] stack; offset:24; size:8; signed:1;
print fmt: "pid=%d delta=%llu stack=%s", REC->pid, REC->delta, __get_stacktrace(stack)
Looks like I only need to add "__get_stacktrace()" to the libtraceevent parsing.
Thanks,
-- Steve
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] tracing: Allow perf to read synthetic events
2025-12-29 22:49 ` Steven Rostedt
@ 2025-12-29 23:36 ` Steven Rostedt
0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-29 23:36 UTC (permalink / raw)
To: Jiri Olsa
Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
Peter Zijlstra
On Mon, 29 Dec 2025 17:49:01 -0500
Steven Rostedt <steven@rostedt.org> wrote:
> Looks like I only need to add "__get_stacktrace()" to the libtraceevent parsing.
The below patch appears to work:
<...>-22 [001] d..5. 2891.837516: block_lat: pid=22 delta=159 stack=
=> __schedule+0xded
=> schedule+0x123
=> io_schedule+0x44
=> bit_wait_io+0x11
=> __wait_on_bit+0x4a
=> out_of_line_wait_on_bit+0x9d
=> ext4_read_bh+0x95
=> ext4_bread+0x51
=> __ext4_read_dirblock+0x45
=> htree_dirblock_to_tree+0x76
=> ext4_htree_fill_tree+0x3b1
=> ext4_readdir+0xa9b
=> iterate_dir+0xef
=> __se_sys_getdents64+0x76
=> do_syscall_64+0x93
<...>-254 [004] d..5. 2892.173786: block_lat: pid=254 delta=57 stack=
=> __schedule+0xded
=> schedule+0x123
=> io_schedule+0x44
=> bit_wait_io+0x11
=> __wait_on_bit+0x4a
=> out_of_line_wait_on_bit+0x9d
=> ext4_read_bh+0x95
=> ext4_bread+0x51
=> __ext4_read_dirblock+0x45
=> htree_dirblock_to_tree+0x76
=> ext4_htree_fill_tree+0x1e5
=> ext4_readdir+0xa9b
=> iterate_dir+0xef
=> __se_sys_getdents64+0x76
=> do_syscall_64+0x93
-- Steve
diff --git a/include/traceevent/event-parse.h b/include/traceevent/event-parse.h
index ebfc2c7..9c1abfa 100644
--- a/include/traceevent/event-parse.h
+++ b/include/traceevent/event-parse.h
@@ -138,6 +138,11 @@ struct tep_print_arg_bitmask {
struct tep_format_field *field;
};
+struct tep_print_arg_stacktrace {
+ char *stacktrace;
+ struct tep_format_field *field;
+};
+
struct tep_print_arg_field {
char *name;
struct tep_format_field *field;
@@ -215,6 +220,7 @@ enum tep_print_arg_type {
TEP_PRINT_DYNAMIC_ARRAY_LEN,
TEP_PRINT_HEX_STR,
TEP_PRINT_CPUMASK,
+ TEP_PRINT_STACKTRACE,
};
struct tep_print_arg {
@@ -231,6 +237,7 @@ struct tep_print_arg {
struct tep_print_arg_func func;
struct tep_print_arg_string string;
struct tep_print_arg_bitmask bitmask;
+ struct tep_print_arg_stacktrace stacktrace;
struct tep_print_arg_op op;
struct tep_print_arg_dynarray dynarray;
};
diff --git a/src/event-parse.c b/src/event-parse.c
index 939b0a8..09d9092 100644
--- a/src/event-parse.c
+++ b/src/event-parse.c
@@ -1129,6 +1129,9 @@ static void free_arg(struct tep_print_arg *arg)
free_arg(arg->op.left);
free_arg(arg->op.right);
break;
+ case TEP_PRINT_STACKTRACE:
+ free(arg->stacktrace.stacktrace);
+ break;
case TEP_PRINT_FUNC:
while (arg->func.args) {
farg = arg->func.args;
@@ -2895,6 +2898,7 @@ static int arg_num_eval(struct tep_print_arg *arg, long long *val)
case TEP_PRINT_BSTRING:
case TEP_PRINT_BITMASK:
case TEP_PRINT_CPUMASK:
+ case TEP_PRINT_STACKTRACE:
default:
do_warning("invalid eval type %d", arg->type);
ret = 0;
@@ -2925,6 +2929,7 @@ static char *arg_eval (struct tep_print_arg *arg)
case TEP_PRINT_BSTRING:
case TEP_PRINT_BITMASK:
case TEP_PRINT_CPUMASK:
+ case TEP_PRINT_STACKTRACE:
default:
do_warning("invalid eval type %d", arg->type);
break;
@@ -3462,6 +3467,34 @@ process_cpumask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
return type;
}
+static enum tep_event_type
+process_stacktrace(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+ enum tep_event_type type;
+ char *token;
+
+ if (read_expect_type(event->tep, TEP_EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = TEP_PRINT_STACKTRACE;
+ arg->stacktrace.stacktrace = token;
+ arg->stacktrace.field = NULL;
+
+ if (read_expected(event->tep, TEP_EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ type = read_token(event->tep, &token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return TEP_EVENT_ERROR;
+}
+
static struct tep_function_handler *
find_func_handler(struct tep_handle *tep, char *func_name)
{
@@ -3750,6 +3783,10 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
free_token(token);
return process_dynamic_array_len(event, arg, tok);
}
+ if (strcmp(token, "__get_stacktrace") == 0) {
+ free_token(token);
+ return process_stacktrace(event, arg, tok);
+ }
if (strcmp(token, "__builtin_expect") == 0) {
free_token(token);
return process_builtin_expect(event, arg, tok);
@@ -4414,6 +4451,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
case TEP_PRINT_BSTRING:
case TEP_PRINT_BITMASK:
case TEP_PRINT_CPUMASK:
+ case TEP_PRINT_STACKTRACE:
return 0;
case TEP_PRINT_FUNC: {
struct trace_seq s;
@@ -4859,6 +4897,33 @@ more:
free(str);
}
+static void print_stacktrace_to_seq(struct tep_handle *tep,
+ struct trace_seq *s, const char *format,
+ int len_arg, const void *data, int size)
+{
+ int nr_funcs = size / tep->long_size;
+ struct func_map *func;
+ unsigned long long val;
+
+ trace_seq_putc(s, '\n');
+
+ /* The first entry is a counter, skip it */
+ data += tep->long_size;
+
+ for (int i = 1; i < nr_funcs; i++) {
+ trace_seq_puts(s, "=> ");
+ val = tep_read_number(tep, data, tep->long_size);
+ func = find_func(tep, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ trace_seq_printf(s, "+0x%llx\n", val - func->addr);
+ } else {
+ trace_seq_printf(s, "%llx\n", val);
+ }
+ data += tep->long_size;
+ }
+}
+
static void print_str_arg(struct trace_seq *s, void *data, int size,
struct tep_event *event, const char *format,
int len_arg, struct tep_print_arg *arg)
@@ -5097,6 +5162,17 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
data + offset, len);
break;
}
+ case TEP_PRINT_STACKTRACE: {
+ if (!arg->stacktrace.field) {
+ arg->stacktrace.field = tep_find_any_field(event, arg->stacktrace.stacktrace);
+ if (!arg->stacktrace.field)
+ break;
+ }
+ dynamic_offset_field(tep, arg->stacktrace.field, data, size, &offset, &len);
+ print_stacktrace_to_seq(tep, s, format, len_arg,
+ data + offset, len);
+ break;
+ }
case TEP_PRINT_OP:
/*
* The only op for string should be ? :
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-12-29 23:36 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
2025-12-17 18:58 ` Ian Rogers
2025-12-17 19:08 ` Steven Rostedt
2025-12-28 22:59 ` Jiri Olsa
2025-12-29 22:49 ` Steven Rostedt
2025-12-29 23:36 ` Steven Rostedt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).