linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] tracing: Allow perf to read synthetic events
@ 2025-12-17 16:39 Steven Rostedt
  2025-12-17 18:58 ` Ian Rogers
  2025-12-28 22:59 ` Jiri Olsa
  0 siblings, 2 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-17 16:39 UTC (permalink / raw)
  To: LKML, Linux Trace Kernel
  Cc: Masami Hiramatsu, Mathieu Desnoyers, Ian Rogers,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra

From: Steven Rostedt <rostedt@goodmis.org>

Currently, perf can not enable synthetic events. When it does, it either
causes a warning in the kernel or errors with "no such device".

Add the necessary code to allow perf to also attach to synthetic events.

Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 27 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 4554c458b78c..026e06f28958 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
 	return len;
 }
 
-static notrace void trace_event_raw_event_synth(void *__data,
-						u64 *var_ref_vals,
-						unsigned int *var_ref_idx)
+static __always_inline int get_field_size(struct synth_event *event,
+					  u64 *var_ref_vals,
+					  unsigned int *var_ref_idx)
 {
-	unsigned int i, n_u64, val_idx, len, data_size = 0;
-	struct trace_event_file *trace_file = __data;
-	struct synth_trace_event *entry;
-	struct trace_event_buffer fbuffer;
-	struct trace_buffer *buffer;
-	struct synth_event *event;
-	int fields_size = 0;
-
-	event = trace_file->event_call->data;
-
-	if (trace_trigger_soft_disabled(trace_file))
-		return;
+	int fields_size;
 
 	fields_size = event->n_u64 * sizeof(u64);
 
-	for (i = 0; i < event->n_dynamic_fields; i++) {
+	for (int i = 0; i < event->n_dynamic_fields; i++) {
 		unsigned int field_pos = event->dynamic_fields[i]->field_pos;
 		char *str_val;
+		int val_idx;
+		int len;
 
 		val_idx = var_ref_idx[field_pos];
 		str_val = (char *)(long)var_ref_vals[val_idx];
@@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
 
 		fields_size += len;
 	}
+	return fields_size;
+}
 
-	/*
-	 * Avoid ring buffer recursion detection, as this event
-	 * is being performed within another event.
-	 */
-	buffer = trace_file->tr->array_buffer.buffer;
-	guard(ring_buffer_nest)(buffer);
-
-	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
-					   sizeof(*entry) + fields_size);
-	if (!entry)
-		return;
+static __always_inline void write_synth_entry(struct synth_event *event,
+					      struct synth_trace_event *entry,
+					      u64 *var_ref_vals,
+					      unsigned int *var_ref_idx)
+{
+	int data_size = 0;
+	int i, n_u64;
+	int val_idx;
+	int len;
 
 	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
 		val_idx = var_ref_idx[i];
@@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
 			n_u64++;
 		}
 	}
+}
+
+static notrace void trace_event_raw_event_synth(void *__data,
+						u64 *var_ref_vals,
+						unsigned int *var_ref_idx)
+{
+	struct trace_event_file *trace_file = __data;
+	struct synth_trace_event *entry;
+	struct trace_event_buffer fbuffer;
+	struct trace_buffer *buffer;
+	struct synth_event *event;
+	int fields_size;
+
+	event = trace_file->event_call->data;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+	/*
+	 * Avoid ring buffer recursion detection, as this event
+	 * is being performed within another event.
+	 */
+	buffer = trace_file->tr->array_buffer.buffer;
+	guard(ring_buffer_nest)(buffer);
+
+	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+					   sizeof(*entry) + fields_size);
+	if (!entry)
+		return;
+
+	write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
 
 	trace_event_buffer_commit(&fbuffer);
 }
 
+#ifdef CONFIG_PERF_EVENTS
+static notrace void perf_event_raw_event_synth(void *__data,
+					       u64 *var_ref_vals,
+					       unsigned int *var_ref_idx)
+{
+	struct trace_event_call *call = __data;
+	struct synth_trace_event *entry;
+	struct hlist_head *perf_head;
+	struct synth_event *event;
+	struct pt_regs *regs;
+	int fields_size;
+	size_t size;
+	int context;
+
+	event = call->data;
+
+	perf_head = this_cpu_ptr(call->perf_events);
+
+	if (!perf_head || hlist_empty(perf_head))
+		return;
+
+	fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
+
+	size = ALIGN(sizeof(*entry) + fields_size, 8);
+
+	entry = perf_trace_buf_alloc(size, &regs, &context);
+
+	if (unlikely(!entry))
+		return;
+
+	write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
+
+	perf_fetch_caller_regs(regs);
+
+	perf_trace_buf_submit(entry, size, context,
+			      call->event.type, 1, regs,
+			      perf_head, NULL);
+}
+#endif
+
 static void free_synth_event_print_fmt(struct trace_event_call *call)
 {
 	if (call) {
@@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
 	call->flags = TRACE_EVENT_FL_TRACEPOINT;
 	call->class->reg = synth_event_reg;
 	call->class->probe = trace_event_raw_event_synth;
+#ifdef CONFIG_PERF_EVENTS
+	call->class->perf_probe = perf_event_raw_event_synth;
+#endif
 	call->data = event;
 	call->tp = event->tp;
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] tracing: Allow perf to read synthetic events
  2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
@ 2025-12-17 18:58 ` Ian Rogers
  2025-12-17 19:08   ` Steven Rostedt
  2025-12-28 22:59 ` Jiri Olsa
  1 sibling, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2025-12-17 18:58 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra

On Wed, Dec 17, 2025 at 8:37 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> From: Steven Rostedt <rostedt@goodmis.org>
>
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
>
> Add the necessary code to allow perf to also attach to synthetic events.
>
> Reported-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

This is great! Any chance of an example of using it? I'd like to add a test.

Thanks,
Ian

> ---
>  kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
>  1 file changed, 94 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4554c458b78c..026e06f28958 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
>         return len;
>  }
>
> -static notrace void trace_event_raw_event_synth(void *__data,
> -                                               u64 *var_ref_vals,
> -                                               unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> +                                         u64 *var_ref_vals,
> +                                         unsigned int *var_ref_idx)
>  {
> -       unsigned int i, n_u64, val_idx, len, data_size = 0;
> -       struct trace_event_file *trace_file = __data;
> -       struct synth_trace_event *entry;
> -       struct trace_event_buffer fbuffer;
> -       struct trace_buffer *buffer;
> -       struct synth_event *event;
> -       int fields_size = 0;
> -
> -       event = trace_file->event_call->data;
> -
> -       if (trace_trigger_soft_disabled(trace_file))
> -               return;
> +       int fields_size;
>
>         fields_size = event->n_u64 * sizeof(u64);
>
> -       for (i = 0; i < event->n_dynamic_fields; i++) {
> +       for (int i = 0; i < event->n_dynamic_fields; i++) {
>                 unsigned int field_pos = event->dynamic_fields[i]->field_pos;
>                 char *str_val;
> +               int val_idx;
> +               int len;
>
>                 val_idx = var_ref_idx[field_pos];
>                 str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
>
>                 fields_size += len;
>         }
> +       return fields_size;
> +}
>
> -       /*
> -        * Avoid ring buffer recursion detection, as this event
> -        * is being performed within another event.
> -        */
> -       buffer = trace_file->tr->array_buffer.buffer;
> -       guard(ring_buffer_nest)(buffer);
> -
> -       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> -                                          sizeof(*entry) + fields_size);
> -       if (!entry)
> -               return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> +                                             struct synth_trace_event *entry,
> +                                             u64 *var_ref_vals,
> +                                             unsigned int *var_ref_idx)
> +{
> +       int data_size = 0;
> +       int i, n_u64;
> +       int val_idx;
> +       int len;
>
>         for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
>                 val_idx = var_ref_idx[i];
> @@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
>                         n_u64++;
>                 }
>         }
> +}
> +
> +static notrace void trace_event_raw_event_synth(void *__data,
> +                                               u64 *var_ref_vals,
> +                                               unsigned int *var_ref_idx)
> +{
> +       struct trace_event_file *trace_file = __data;
> +       struct synth_trace_event *entry;
> +       struct trace_event_buffer fbuffer;
> +       struct trace_buffer *buffer;
> +       struct synth_event *event;
> +       int fields_size;
> +
> +       event = trace_file->event_call->data;
> +
> +       if (trace_trigger_soft_disabled(trace_file))
> +               return;
> +
> +       fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> +       /*
> +        * Avoid ring buffer recursion detection, as this event
> +        * is being performed within another event.
> +        */
> +       buffer = trace_file->tr->array_buffer.buffer;
> +       guard(ring_buffer_nest)(buffer);
> +
> +       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> +                                          sizeof(*entry) + fields_size);
> +       if (!entry)
> +               return;
> +
> +       write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>
>         trace_event_buffer_commit(&fbuffer);
>  }
>
> +#ifdef CONFIG_PERF_EVENTS
> +static notrace void perf_event_raw_event_synth(void *__data,
> +                                              u64 *var_ref_vals,
> +                                              unsigned int *var_ref_idx)
> +{
> +       struct trace_event_call *call = __data;
> +       struct synth_trace_event *entry;
> +       struct hlist_head *perf_head;
> +       struct synth_event *event;
> +       struct pt_regs *regs;
> +       int fields_size;
> +       size_t size;
> +       int context;
> +
> +       event = call->data;
> +
> +       perf_head = this_cpu_ptr(call->perf_events);
> +
> +       if (!perf_head || hlist_empty(perf_head))
> +               return;
> +
> +       fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> +       size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> +       entry = perf_trace_buf_alloc(size, &regs, &context);
> +
> +       if (unlikely(!entry))
> +               return;
> +
> +       write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> +       perf_fetch_caller_regs(regs);
> +
> +       perf_trace_buf_submit(entry, size, context,
> +                             call->event.type, 1, regs,
> +                             perf_head, NULL);
> +}
> +#endif
> +
>  static void free_synth_event_print_fmt(struct trace_event_call *call)
>  {
>         if (call) {
> @@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
>         call->flags = TRACE_EVENT_FL_TRACEPOINT;
>         call->class->reg = synth_event_reg;
>         call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> +       call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
>         call->data = event;
>         call->tp = event->tp;
>
> --
> 2.51.0
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tracing: Allow perf to read synthetic events
  2025-12-17 18:58 ` Ian Rogers
@ 2025-12-17 19:08   ` Steven Rostedt
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-17 19:08 UTC (permalink / raw)
  To: Ian Rogers
  Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
	Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim, Peter Zijlstra

On Wed, 17 Dec 2025 10:58:25 -0800
Ian Rogers <irogers@google.com> wrote:

> This is great! Any chance of an example of using it? I'd like to add a test.

I tested it with:

  # trace-cmd sqlhist -e -n futex_wait select TIMESTAMP_DELTA_USECS as lat from sys_enter_futex as start join sys_exit_futex as end on start.common_pid = end.common_pid
  # perf record -e synthetic:futex_wait
  # perf script
            bash    1043 [002]    49.501707: synthetic:futex_wait: lat=8
     in:imuxsock     610 [005]    58.904344: synthetic:futex_wait: lat=27
   rs:main Q:Reg     612 [007]    58.904478: synthetic:futex_wait: lat=10847333
   rs:main Q:Reg     612 [007]    58.904507: synthetic:futex_wait: lat=6
              ls    1050 [004]    59.283561: synthetic:futex_wait: lat=8
       in:imklog     611 [004]    69.084959: synthetic:futex_wait: lat=55
   rs:main Q:Reg     612 [007]    69.085014: synthetic:futex_wait: lat=10180378
   rs:main Q:Reg     612 [007]    69.085041: synthetic:futex_wait: lat=5


But feel free to use any other synthetic event or perf command.

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tracing: Allow perf to read synthetic events
  2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
  2025-12-17 18:58 ` Ian Rogers
@ 2025-12-28 22:59 ` Jiri Olsa
  2025-12-29 22:49   ` Steven Rostedt
  1 sibling, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2025-12-28 22:59 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
	Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
	Peter Zijlstra

On Wed, Dec 17, 2025 at 11:39:20AM -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
> 
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
> 
> Add the necessary code to allow perf to also attach to synthetic events.
> 
> Reported-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

hi,
I don't see the crash, but perf record/script gives me 'FAILED TO PARSE' in perf script:

  # cd /sys/kernel/tracing
  # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace  if prev_state == 2' >> events/sched/sched_switch/trigger
  # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
  # echo 1 > events/synthetic/block_lat/enable

  # perf record -e 'synthetic:block_lat' -a
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.259 MB perf.data (1 samples) ]
  # perf script
  kworker/u33:2-w     244 [000]  1707.836263: synthetic:block_lat: [FAILED TO PARSE] pid=244 delta=21 stack=ARRAY[0b, 00, 00, 00, 00, 00, 00, 00, 1d, 72, 9d, 82, ff, ff, ff, ff, 0d, 7d, 9d, 82, ff, ff, ff, ff, 3d, 3d, 9e, 82, ff, ff, ff, ff, 05, 91, 9d, 82, ff, ff, ff, ff, 40, 7a, 42, 81, ff, ff, ff, ff, 5e, f4, 0c, 82, ff, ff, ff, ff, 43, 8d, 0c, 82, ff, ff, ff, ff, 82, 2d, 89, 81, ff, ff, ff, ff, 9b, 39, 89, 81, ff, ff, ff, ff, a6, 5a, 9c, 82, ff, ff, ff, ff, 2f, 01, 00, 81, ff, ff, ff, ff]

not sure it's fixed in the latest libtraceevent, mine is
  libtraceevent-1.8.4-3.fc42.x86_64

jirka


> ---
>  kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
>  1 file changed, 94 insertions(+), 27 deletions(-)
> 
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4554c458b78c..026e06f28958 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
>  	return len;
>  }
>  
> -static notrace void trace_event_raw_event_synth(void *__data,
> -						u64 *var_ref_vals,
> -						unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> +					  u64 *var_ref_vals,
> +					  unsigned int *var_ref_idx)
>  {
> -	unsigned int i, n_u64, val_idx, len, data_size = 0;
> -	struct trace_event_file *trace_file = __data;
> -	struct synth_trace_event *entry;
> -	struct trace_event_buffer fbuffer;
> -	struct trace_buffer *buffer;
> -	struct synth_event *event;
> -	int fields_size = 0;
> -
> -	event = trace_file->event_call->data;
> -
> -	if (trace_trigger_soft_disabled(trace_file))
> -		return;
> +	int fields_size;
>  
>  	fields_size = event->n_u64 * sizeof(u64);
>  
> -	for (i = 0; i < event->n_dynamic_fields; i++) {
> +	for (int i = 0; i < event->n_dynamic_fields; i++) {
>  		unsigned int field_pos = event->dynamic_fields[i]->field_pos;
>  		char *str_val;
> +		int val_idx;
> +		int len;
>  
>  		val_idx = var_ref_idx[field_pos];
>  		str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
>  
>  		fields_size += len;
>  	}
> +	return fields_size;
> +}
>  
> -	/*
> -	 * Avoid ring buffer recursion detection, as this event
> -	 * is being performed within another event.
> -	 */
> -	buffer = trace_file->tr->array_buffer.buffer;
> -	guard(ring_buffer_nest)(buffer);
> -
> -	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> -					   sizeof(*entry) + fields_size);
> -	if (!entry)
> -		return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> +					      struct synth_trace_event *entry,
> +					      u64 *var_ref_vals,
> +					      unsigned int *var_ref_idx)
> +{
> +	int data_size = 0;
> +	int i, n_u64;
> +	int val_idx;
> +	int len;
>  
>  	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
>  		val_idx = var_ref_idx[i];
> @@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
>  			n_u64++;
>  		}
>  	}
> +}
> +
> +static notrace void trace_event_raw_event_synth(void *__data,
> +						u64 *var_ref_vals,
> +						unsigned int *var_ref_idx)
> +{
> +	struct trace_event_file *trace_file = __data;
> +	struct synth_trace_event *entry;
> +	struct trace_event_buffer fbuffer;
> +	struct trace_buffer *buffer;
> +	struct synth_event *event;
> +	int fields_size;
> +
> +	event = trace_file->event_call->data;
> +
> +	if (trace_trigger_soft_disabled(trace_file))
> +		return;
> +
> +	fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> +	/*
> +	 * Avoid ring buffer recursion detection, as this event
> +	 * is being performed within another event.
> +	 */
> +	buffer = trace_file->tr->array_buffer.buffer;
> +	guard(ring_buffer_nest)(buffer);
> +
> +	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> +					   sizeof(*entry) + fields_size);
> +	if (!entry)
> +		return;
> +
> +	write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>  
>  	trace_event_buffer_commit(&fbuffer);
>  }
>  
> +#ifdef CONFIG_PERF_EVENTS
> +static notrace void perf_event_raw_event_synth(void *__data,
> +					       u64 *var_ref_vals,
> +					       unsigned int *var_ref_idx)
> +{
> +	struct trace_event_call *call = __data;
> +	struct synth_trace_event *entry;
> +	struct hlist_head *perf_head;
> +	struct synth_event *event;
> +	struct pt_regs *regs;
> +	int fields_size;
> +	size_t size;
> +	int context;
> +
> +	event = call->data;
> +
> +	perf_head = this_cpu_ptr(call->perf_events);
> +
> +	if (!perf_head || hlist_empty(perf_head))
> +		return;
> +
> +	fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> +	size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> +	entry = perf_trace_buf_alloc(size, &regs, &context);
> +
> +	if (unlikely(!entry))
> +		return;
> +
> +	write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> +	perf_fetch_caller_regs(regs);
> +
> +	perf_trace_buf_submit(entry, size, context,
> +			      call->event.type, 1, regs,
> +			      perf_head, NULL);
> +}
> +#endif
> +
>  static void free_synth_event_print_fmt(struct trace_event_call *call)
>  {
>  	if (call) {
> @@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
>  	call->flags = TRACE_EVENT_FL_TRACEPOINT;
>  	call->class->reg = synth_event_reg;
>  	call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> +	call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
>  	call->data = event;
>  	call->tp = event->tp;
>  
> -- 
> 2.51.0
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tracing: Allow perf to read synthetic events
  2025-12-28 22:59 ` Jiri Olsa
@ 2025-12-29 22:49   ` Steven Rostedt
  2025-12-29 23:36     ` Steven Rostedt
  0 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2025-12-29 22:49 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
	Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
	Peter Zijlstra

On Sun, 28 Dec 2025 23:59:34 +0100
Jiri Olsa <olsajiri@gmail.com> wrote:

> hi,
> I don't see the crash, but perf record/script gives me 'FAILED TO PARSE' in perf script:
> 
>   # cd /sys/kernel/tracing
>   # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
>   # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace  if prev_state == 2' >> events/sched/sched_switch/trigger
>   # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
>   # echo 1 > events/synthetic/block_lat/enable
> 
>   # perf record -e 'synthetic:block_lat' -a
>   ^C[ perf record: Woken up 1 times to write data ]
>   [ perf record: Captured and wrote 0.259 MB perf.data (1 samples) ]
>   # perf script
>   kworker/u33:2-w     244 [000]  1707.836263: synthetic:block_lat: [FAILED TO PARSE] pid=244 delta=21 stack=ARRAY[0b, 00, 00, 00, 00, 00, 00, 00, 1d, 72, 9d, 82, ff, ff, ff, ff, 0d, 7d, 9d, 82, ff, ff, ff, ff, 3d, 3d, 9e, 82, ff, ff, ff, ff, 05, 91, 9d, 82, ff, ff, ff, ff, 40, 7a, 42, 81, ff, ff, ff, ff, 5e, f4, 0c, 82, ff, ff, ff, ff, 43, 8d, 0c, 82, ff, ff, ff, ff, 82, 2d, 89, 81, ff, ff, ff, ff, 9b, 39, 89, 81, ff, ff, ff, ff, a6, 5a, 9c, 82, ff, ff, ff, ff, 2f, 01, 00, 81, ff, ff, ff, ff]
> 
> not sure it's fixed in the latest libtraceevent, mine is
>   libtraceevent-1.8.4-3.fc42.x86_64

Yeah, it's not supported yet by libtraceevent (fails for trace-cmd record
as well). But this should be easily fixed:

 ># trace-cmd list -e block_lat -F --full
 system: synthetic
 name: block_lat
 ID: 1871
 format:
	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
	field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
	field:int common_pid;	offset:4;	size:4;	signed:1;

	field:pid_t pid;	offset:8;	size:4;	signed:1;
	field:u64 delta;	offset:16;	size:8;	signed:0;
	field:__data_loc unsigned long[] stack;	offset:24;	size:8;	signed:1;

 print fmt: "pid=%d delta=%llu stack=%s", REC->pid, REC->delta, __get_stacktrace(stack)

Looks like I only need to add "__get_stacktrace()" to the libtraceevent parsing.

Thanks,

-- Steve


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tracing: Allow perf to read synthetic events
  2025-12-29 22:49   ` Steven Rostedt
@ 2025-12-29 23:36     ` Steven Rostedt
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2025-12-29 23:36 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: LKML, Linux Trace Kernel, Masami Hiramatsu, Mathieu Desnoyers,
	Ian Rogers, Arnaldo Carvalho de Melo, Namhyung Kim,
	Peter Zijlstra

On Mon, 29 Dec 2025 17:49:01 -0500
Steven Rostedt <steven@rostedt.org> wrote:

> Looks like I only need to add "__get_stacktrace()" to the libtraceevent parsing.

The below patch appears to work:

           <...>-22    [001] d..5.  2891.837516: block_lat:            pid=22 delta=159 stack=
=> __schedule+0xded
=> schedule+0x123
=> io_schedule+0x44
=> bit_wait_io+0x11
=> __wait_on_bit+0x4a
=> out_of_line_wait_on_bit+0x9d
=> ext4_read_bh+0x95
=> ext4_bread+0x51
=> __ext4_read_dirblock+0x45
=> htree_dirblock_to_tree+0x76
=> ext4_htree_fill_tree+0x3b1
=> ext4_readdir+0xa9b
=> iterate_dir+0xef
=> __se_sys_getdents64+0x76
=> do_syscall_64+0x93
           <...>-254   [004] d..5.  2892.173786: block_lat:            pid=254 delta=57 stack=
=> __schedule+0xded
=> schedule+0x123
=> io_schedule+0x44
=> bit_wait_io+0x11
=> __wait_on_bit+0x4a
=> out_of_line_wait_on_bit+0x9d
=> ext4_read_bh+0x95
=> ext4_bread+0x51
=> __ext4_read_dirblock+0x45
=> htree_dirblock_to_tree+0x76
=> ext4_htree_fill_tree+0x1e5
=> ext4_readdir+0xa9b
=> iterate_dir+0xef
=> __se_sys_getdents64+0x76
=> do_syscall_64+0x93

-- Steve

diff --git a/include/traceevent/event-parse.h b/include/traceevent/event-parse.h
index ebfc2c7..9c1abfa 100644
--- a/include/traceevent/event-parse.h
+++ b/include/traceevent/event-parse.h
@@ -138,6 +138,11 @@ struct tep_print_arg_bitmask {
 	struct tep_format_field	*field;
 };
 
+struct tep_print_arg_stacktrace {
+	char			*stacktrace;
+	struct tep_format_field	*field;
+};
+
 struct tep_print_arg_field {
 	char			*name;
 	struct tep_format_field	*field;
@@ -215,6 +220,7 @@ enum tep_print_arg_type {
 	TEP_PRINT_DYNAMIC_ARRAY_LEN,
 	TEP_PRINT_HEX_STR,
 	TEP_PRINT_CPUMASK,
+	TEP_PRINT_STACKTRACE,
 };
 
 struct tep_print_arg {
@@ -231,6 +237,7 @@ struct tep_print_arg {
 		struct tep_print_arg_func	func;
 		struct tep_print_arg_string	string;
 		struct tep_print_arg_bitmask	bitmask;
+		struct tep_print_arg_stacktrace	stacktrace;
 		struct tep_print_arg_op		op;
 		struct tep_print_arg_dynarray	dynarray;
 	};
diff --git a/src/event-parse.c b/src/event-parse.c
index 939b0a8..09d9092 100644
--- a/src/event-parse.c
+++ b/src/event-parse.c
@@ -1129,6 +1129,9 @@ static void free_arg(struct tep_print_arg *arg)
 		free_arg(arg->op.left);
 		free_arg(arg->op.right);
 		break;
+	case TEP_PRINT_STACKTRACE:
+		free(arg->stacktrace.stacktrace);
+		break;
 	case TEP_PRINT_FUNC:
 		while (arg->func.args) {
 			farg = arg->func.args;
@@ -2895,6 +2898,7 @@ static int arg_num_eval(struct tep_print_arg *arg, long long *val)
 	case TEP_PRINT_BSTRING:
 	case TEP_PRINT_BITMASK:
 	case TEP_PRINT_CPUMASK:
+	case TEP_PRINT_STACKTRACE:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		ret = 0;
@@ -2925,6 +2929,7 @@ static char *arg_eval (struct tep_print_arg *arg)
 	case TEP_PRINT_BSTRING:
 	case TEP_PRINT_BITMASK:
 	case TEP_PRINT_CPUMASK:
+	case TEP_PRINT_STACKTRACE:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		break;
@@ -3462,6 +3467,34 @@ process_cpumask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
 	return type;
 }
 
+static enum tep_event_type
+process_stacktrace(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+	enum tep_event_type type;
+	char *token;
+
+	if (read_expect_type(event->tep, TEP_EVENT_ITEM, &token) < 0)
+		goto out_free;
+
+	arg->type = TEP_PRINT_STACKTRACE;
+	arg->stacktrace.stacktrace = token;
+	arg->stacktrace.field = NULL;
+
+	if (read_expected(event->tep, TEP_EVENT_DELIM, ")") < 0)
+		goto out_err;
+
+	type = read_token(event->tep, &token);
+	*tok = token;
+
+	return type;
+
+ out_free:
+	free_token(token);
+ out_err:
+	*tok = NULL;
+	return TEP_EVENT_ERROR;
+}
+
 static struct tep_function_handler *
 find_func_handler(struct tep_handle *tep, char *func_name)
 {
@@ -3750,6 +3783,10 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
 		free_token(token);
 		return process_dynamic_array_len(event, arg, tok);
 	}
+	if (strcmp(token, "__get_stacktrace") == 0) {
+		free_token(token);
+		return process_stacktrace(event, arg, tok);
+	}
 	if (strcmp(token, "__builtin_expect") == 0) {
 		free_token(token);
 		return process_builtin_expect(event, arg, tok);
@@ -4414,6 +4451,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 	case TEP_PRINT_BSTRING:
 	case TEP_PRINT_BITMASK:
 	case TEP_PRINT_CPUMASK:
+	case TEP_PRINT_STACKTRACE:
 		return 0;
 	case TEP_PRINT_FUNC: {
 		struct trace_seq s;
@@ -4859,6 +4897,33 @@ more:
 	free(str);
 }
 
+static void print_stacktrace_to_seq(struct tep_handle *tep,
+				    struct trace_seq *s, const char *format,
+				    int len_arg, const void *data, int size)
+{
+	int nr_funcs = size / tep->long_size;
+	struct func_map *func;
+	unsigned long long val;
+
+	trace_seq_putc(s, '\n');
+
+	/* The first entry is a counter, skip it */
+	data += tep->long_size;
+
+	for (int i = 1; i < nr_funcs; i++) {
+		trace_seq_puts(s, "=> ");
+		val = tep_read_number(tep, data, tep->long_size);
+		func = find_func(tep, val);
+		if (func) {
+			trace_seq_puts(s, func->func);
+			trace_seq_printf(s, "+0x%llx\n", val - func->addr);
+		} else {
+			trace_seq_printf(s, "%llx\n", val);
+		}
+		data += tep->long_size;
+	}
+}
+
 static void print_str_arg(struct trace_seq *s, void *data, int size,
 			  struct tep_event *event, const char *format,
 			  int len_arg, struct tep_print_arg *arg)
@@ -5097,6 +5162,17 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 				     data + offset, len);
 		break;
 	}
+	case TEP_PRINT_STACKTRACE: {
+		if (!arg->stacktrace.field) {
+			arg->stacktrace.field = tep_find_any_field(event, arg->stacktrace.stacktrace);
+			if (!arg->stacktrace.field)
+				break;
+		}
+		dynamic_offset_field(tep, arg->stacktrace.field, data, size, &offset, &len);
+		print_stacktrace_to_seq(tep, s, format, len_arg,
+					data + offset, len);
+		break;
+	}
 	case TEP_PRINT_OP:
 		/*
 		 * The only op for string should be ? :

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-12-29 23:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-17 16:39 [PATCH] tracing: Allow perf to read synthetic events Steven Rostedt
2025-12-17 18:58 ` Ian Rogers
2025-12-17 19:08   ` Steven Rostedt
2025-12-28 22:59 ` Jiri Olsa
2025-12-29 22:49   ` Steven Rostedt
2025-12-29 23:36     ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).