* [PATCH v4 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains
2025-11-15 23:41 [PATCHSET v4 0/5] perf tools: Add deferred callchain support Namhyung Kim
@ 2025-11-15 23:41 ` Namhyung Kim
2025-11-19 16:49 ` Ian Rogers
2025-11-15 23:41 ` [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support Namhyung Kim
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2025-11-15 23:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Josh Poimboeuf, Indu Bhagat,
Jens Remus, Mathieu Desnoyers, linux-trace-kernel, bpf
It needs to sync with the kernel to support user space changes for the
deferred callchains.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b8002776e5..d292f96bc06f86bc 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -463,7 +463,9 @@ struct perf_event_attr {
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+ defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+ __reserved_1 : 24;
union {
__u32 wakeup_events; /* wake up every n events */
@@ -1239,6 +1241,22 @@ enum perf_event_type {
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+ /*
+ * This user callchain capture was deferred until shortly before
+ * returning to user space. Previous samples would have kernel
+ * callchains only and they need to be stitched with this to make full
+ * callchains.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 cookie;
+ * u64 nr;
+ * u64 ips[nr];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CALLCHAIN_DEFERRED = 22,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1269,6 +1287,7 @@ enum perf_callchain_context {
PERF_CONTEXT_HV = (__u64)-32,
PERF_CONTEXT_KERNEL = (__u64)-128,
PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
PERF_CONTEXT_GUEST = (__u64)-2048,
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
--
2.52.0.rc1.455.g30608eb744-goog
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains
2025-11-15 23:41 ` [PATCH v4 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains Namhyung Kim
@ 2025-11-19 16:49 ` Ian Rogers
0 siblings, 0 replies; 13+ messages in thread
From: Ian Rogers @ 2025-11-19 16:49 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> It needs to sync with the kernel to support user space changes for the
> deferred callchains.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> tools/include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++-
> 1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
> index 78a362b8002776e5..d292f96bc06f86bc 100644
> --- a/tools/include/uapi/linux/perf_event.h
> +++ b/tools/include/uapi/linux/perf_event.h
> @@ -463,7 +463,9 @@ struct perf_event_attr {
> inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
> remove_on_exec : 1, /* event is removed from task on exec */
> sigtrap : 1, /* send synchronous SIGTRAP on event */
> - __reserved_1 : 26;
> + defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
> + defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
> + __reserved_1 : 24;
>
> union {
> __u32 wakeup_events; /* wake up every n events */
> @@ -1239,6 +1241,22 @@ enum perf_event_type {
> */
> PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
>
> + /*
> + * This user callchain capture was deferred until shortly before
> + * returning to user space. Previous samples would have kernel
> + * callchains only and they need to be stitched with this to make full
> + * callchains.
> + *
> + * struct {
> + * struct perf_event_header header;
> + * u64 cookie;
> + * u64 nr;
> + * u64 ips[nr];
> + * struct sample_id sample_id;
> + * };
> + */
> + PERF_RECORD_CALLCHAIN_DEFERRED = 22,
> +
> PERF_RECORD_MAX, /* non-ABI */
> };
>
> @@ -1269,6 +1287,7 @@ enum perf_callchain_context {
> PERF_CONTEXT_HV = (__u64)-32,
> PERF_CONTEXT_KERNEL = (__u64)-128,
> PERF_CONTEXT_USER = (__u64)-512,
> + PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
>
> PERF_CONTEXT_GUEST = (__u64)-2048,
> PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
2025-11-15 23:41 [PATCHSET v4 0/5] perf tools: Add deferred callchain support Namhyung Kim
2025-11-15 23:41 ` [PATCH v4 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains Namhyung Kim
@ 2025-11-15 23:41 ` Namhyung Kim
2025-11-19 16:48 ` Ian Rogers
2025-11-15 23:41 ` [PATCH v4 3/5] perf record: Add --call-graph fp,defer option for deferred callchains Namhyung Kim
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2025-11-15 23:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Josh Poimboeuf, Indu Bhagat,
Jens Remus, Mathieu Desnoyers, linux-trace-kernel, bpf
Add a new event type for deferred callchains and a new callback for the
struct perf_tool. For now it doesn't actually handle the deferred
callchains but it just marks the sample if it has the PERF_CONTEXT_
USER_DEFFERED in the callchain array.
At least, perf report can dump the raw data with this change. Actually
this requires the next commit to enable attr.defer_callchain, but if you
already have a data file, it'll show the following result.
$ perf report -D
...
0x2158@perf.data [0x40]: event: 22
.
. ... raw event: size 64 bytes
. 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@.........
. 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3.....
. 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3.....
. 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5....
121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
... FP chain: nr:3
..... 0: 00007f18fe337fa7
..... 1: 00007f18fe330e0f
..... 2: 00007f18fe331448
: unhandled!
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/lib/perf/include/perf/event.h | 13 ++++++++++
tools/perf/util/event.c | 1 +
tools/perf/util/evsel.c | 31 +++++++++++++++++++++--
tools/perf/util/machine.c | 1 +
tools/perf/util/perf_event_attr_fprintf.c | 2 ++
tools/perf/util/sample.h | 2 ++
tools/perf/util/session.c | 20 +++++++++++++++
tools/perf/util/tool.c | 1 +
tools/perf/util/tool.h | 3 ++-
9 files changed, 71 insertions(+), 3 deletions(-)
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index aa1e91c97a226e1a..43a8cb04994fa033 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -151,6 +151,18 @@ struct perf_record_switch {
__u32 next_prev_tid;
};
+struct perf_record_callchain_deferred {
+ struct perf_event_header header;
+ /*
+ * This is to match kernel and (deferred) user stacks together.
+ * The kernel part will be in the sample callchain array after
+ * the PERF_CONTEXT_USER_DEFERRED entry.
+ */
+ __u64 cookie;
+ __u64 nr;
+ __u64 ips[];
+};
+
struct perf_record_header_attr {
struct perf_event_header header;
struct perf_event_attr attr;
@@ -523,6 +535,7 @@ union perf_event {
struct perf_record_read read;
struct perf_record_throttle throttle;
struct perf_record_sample sample;
+ struct perf_record_callchain_deferred callchain_deferred;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
struct perf_record_text_poke_event text_poke;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fcf44149feb20c35..4c92cc1a952c1d9f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_CGROUP] = "CGROUP",
[PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
[PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID",
+ [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 989c56d4a23f74f4..5ee3e7dee93fbbcb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
data->data_src = PERF_MEM_DATA_SRC_NONE;
data->vcpu = -1;
+ if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
+ const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+ data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
+ if (data->callchain->nr > max_callchain_nr)
+ return -EFAULT;
+
+ data->deferred_cookie = event->callchain_deferred.cookie;
+
+ if (evsel->core.attr.sample_id_all)
+ perf_evsel__parse_id_sample(evsel, event, data);
+ return 0;
+ }
+
if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->core.attr.sample_id_all)
return 0;
@@ -3213,12 +3227,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+ u64 callchain_nr;
OVERFLOW_CHECK_u64(array);
data->callchain = (struct ip_callchain *)array++;
- if (data->callchain->nr > max_callchain_nr)
+ callchain_nr = data->callchain->nr;
+ if (callchain_nr > max_callchain_nr)
return -EFAULT;
- sz = data->callchain->nr * sizeof(u64);
+ sz = callchain_nr * sizeof(u64);
+ /*
+ * Save the cookie for the deferred user callchain. The last 2
+ * entries in the callchain should be the context marker and the
+ * cookie. The cookie will be used to match PERF_RECORD_
+ * CALLCHAIN_DEFERRED later.
+ */
+ if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
+ data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
+ data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
+ data->deferred_callchain = true;
+ }
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b5dd42588c916d91..841b711d970e9457 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
*cpumode = PERF_RECORD_MISC_KERNEL;
break;
case PERF_CONTEXT_USER:
+ case PERF_CONTEXT_USER_DEFERRED:
*cpumode = PERF_RECORD_MISC_USER;
break;
default:
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 66b666d9ce649dd7..741c3d657a8b6ae7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(inherit_thread, p_unsigned);
PRINT_ATTRf(remove_on_exec, p_unsigned);
PRINT_ATTRf(sigtrap, p_unsigned);
+ PRINT_ATTRf(defer_callchain, p_unsigned);
+ PRINT_ATTRf(defer_output, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef42105..a8307b20a9ea8066 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -107,6 +107,8 @@ struct perf_sample {
/** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
u16 weight3;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
+ bool deferred_callchain; /* Has deferred user callchains */
+ u64 deferred_cookie;
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4b0236b2df2913e1..361e15c1f26a96d0 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
[PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap,
+ [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
for (i = 0; i < callchain->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
i, callchain->ips[i]);
+
+ if (sample->deferred_callchain)
+ printf("...... (deferred)\n");
}
static void branch_stack__printf(struct perf_sample *sample,
@@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
sample_read__printf(sample, evsel->core.attr.read_format);
}
+static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!dump_trace)
+ return;
+
+ printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
+ event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
+
+ if (evsel__has_callchain(evsel))
+ callchain__printf(evsel, sample);
+}
+
static void dump_read(struct evsel *evsel, union perf_event *event)
{
struct perf_record_read *read_event = &event->read;
@@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
return tool->text_poke(tool, event, sample, machine);
case PERF_RECORD_AUX_OUTPUT_HW_ID:
return tool->aux_output_hw_id(tool, event, sample, machine);
+ case PERF_RECORD_CALLCHAIN_DEFERRED:
+ dump_deferred_callchain(evsel, event, sample);
+ return tool->callchain_deferred(tool, event, sample, evsel, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
index 22a8a4ffe05f778e..f732d33e7f895ed4 100644
--- a/tools/perf/util/tool.c
+++ b/tools/perf/util/tool.c
@@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->read = process_event_sample_stub;
tool->throttle = process_event_stub;
tool->unthrottle = process_event_stub;
+ tool->callchain_deferred = process_event_sample_stub;
tool->attr = process_event_synth_attr_stub;
tool->event_update = process_event_synth_event_update_stub;
tool->tracing_data = process_event_synth_tracing_data_stub;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -44,7 +44,8 @@ enum show_feature_header {
struct perf_tool {
event_sample sample,
- read;
+ read,
+ callchain_deferred;
event_op mmap,
mmap2,
comm,
--
2.52.0.rc1.455.g30608eb744-goog
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
2025-11-15 23:41 ` [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support Namhyung Kim
@ 2025-11-19 16:48 ` Ian Rogers
2025-11-20 0:56 ` Namhyung Kim
0 siblings, 1 reply; 13+ messages in thread
From: Ian Rogers @ 2025-11-19 16:48 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Add a new event type for deferred callchains and a new callback for the
> struct perf_tool. For now it doesn't actually handle the deferred
> callchains but it just marks the sample if it has the PERF_CONTEXT_
> USER_DEFFERED in the callchain array.
>
> At least, perf report can dump the raw data with this change. Actually
> this requires the next commit to enable attr.defer_callchain, but if you
> already have a data file, it'll show the following result.
>
> $ perf report -D
> ...
> 0x2158@perf.data [0x40]: event: 22
> .
> . ... raw event: size 64 bytes
> . 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@.........
> . 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3.....
> . 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3.....
> . 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5....
>
> 121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
> ... FP chain: nr:3
> ..... 0: 00007f18fe337fa7
> ..... 1: 00007f18fe330e0f
> ..... 2: 00007f18fe331448
> : unhandled!
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/lib/perf/include/perf/event.h | 13 ++++++++++
> tools/perf/util/event.c | 1 +
> tools/perf/util/evsel.c | 31 +++++++++++++++++++++--
> tools/perf/util/machine.c | 1 +
> tools/perf/util/perf_event_attr_fprintf.c | 2 ++
> tools/perf/util/sample.h | 2 ++
> tools/perf/util/session.c | 20 +++++++++++++++
> tools/perf/util/tool.c | 1 +
> tools/perf/util/tool.h | 3 ++-
> 9 files changed, 71 insertions(+), 3 deletions(-)
>
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index aa1e91c97a226e1a..43a8cb04994fa033 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -151,6 +151,18 @@ struct perf_record_switch {
> __u32 next_prev_tid;
> };
>
> +struct perf_record_callchain_deferred {
> + struct perf_event_header header;
> + /*
> + * This is to match kernel and (deferred) user stacks together.
> + * The kernel part will be in the sample callchain array after
> + * the PERF_CONTEXT_USER_DEFERRED entry.
> + */
> + __u64 cookie;
> + __u64 nr;
> + __u64 ips[];
> +};
> +
> struct perf_record_header_attr {
> struct perf_event_header header;
> struct perf_event_attr attr;
> @@ -523,6 +535,7 @@ union perf_event {
> struct perf_record_read read;
> struct perf_record_throttle throttle;
> struct perf_record_sample sample;
> + struct perf_record_callchain_deferred callchain_deferred;
> struct perf_record_bpf_event bpf;
> struct perf_record_ksymbol ksymbol;
> struct perf_record_text_poke_event text_poke;
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index fcf44149feb20c35..4c92cc1a952c1d9f 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
> [PERF_RECORD_CGROUP] = "CGROUP",
> [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
> [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID",
> + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED",
> [PERF_RECORD_HEADER_ATTR] = "ATTR",
> [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
> [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 989c56d4a23f74f4..5ee3e7dee93fbbcb 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> data->data_src = PERF_MEM_DATA_SRC_NONE;
> data->vcpu = -1;
>
> + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
> + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> +
> + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
> + if (data->callchain->nr > max_callchain_nr)
> + return -EFAULT;
> +
> + data->deferred_cookie = event->callchain_deferred.cookie;
> +
> + if (evsel->core.attr.sample_id_all)
> + perf_evsel__parse_id_sample(evsel, event, data);
> + return 0;
> + }
> +
> if (event->header.type != PERF_RECORD_SAMPLE) {
> if (!evsel->core.attr.sample_id_all)
> return 0;
> @@ -3213,12 +3227,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>
> if (type & PERF_SAMPLE_CALLCHAIN) {
> const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> + u64 callchain_nr;
>
> OVERFLOW_CHECK_u64(array);
> data->callchain = (struct ip_callchain *)array++;
> - if (data->callchain->nr > max_callchain_nr)
> + callchain_nr = data->callchain->nr;
> + if (callchain_nr > max_callchain_nr)
> return -EFAULT;
> - sz = data->callchain->nr * sizeof(u64);
> + sz = callchain_nr * sizeof(u64);
> + /*
> + * Save the cookie for the deferred user callchain. The last 2
> + * entries in the callchain should be the context marker and the
> + * cookie. The cookie will be used to match PERF_RECORD_
> + * CALLCHAIN_DEFERRED later.
> + */
> + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
> + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
> + data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
> + data->deferred_callchain = true;
> + }
> OVERFLOW_CHECK(array, sz, max_size);
> array = (void *)array + sz;
> }
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index b5dd42588c916d91..841b711d970e9457 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
> *cpumode = PERF_RECORD_MISC_KERNEL;
> break;
> case PERF_CONTEXT_USER:
> + case PERF_CONTEXT_USER_DEFERRED:
> *cpumode = PERF_RECORD_MISC_USER;
> break;
> default:
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 66b666d9ce649dd7..741c3d657a8b6ae7 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
> PRINT_ATTRf(inherit_thread, p_unsigned);
> PRINT_ATTRf(remove_on_exec, p_unsigned);
> PRINT_ATTRf(sigtrap, p_unsigned);
> + PRINT_ATTRf(defer_callchain, p_unsigned);
> + PRINT_ATTRf(defer_output, p_unsigned);
>
> PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
> PRINT_ATTRf(bp_type, p_unsigned);
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index fae834144ef42105..a8307b20a9ea8066 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -107,6 +107,8 @@ struct perf_sample {
> /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
> u16 weight3;
> bool no_hw_idx; /* No hw_idx collected in branch_stack */
> + bool deferred_callchain; /* Has deferred user callchains */
> + u64 deferred_cookie;
> char insn[MAX_INSN];
> void *raw_data;
> struct ip_callchain *callchain;
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 4b0236b2df2913e1..361e15c1f26a96d0 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
> [PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
> [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
> [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap,
> + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap,
> [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
> [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
> [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
> @@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
> for (i = 0; i < callchain->nr; i++)
> printf("..... %2d: %016" PRIx64 "\n",
> i, callchain->ips[i]);
> +
> + if (sample->deferred_callchain)
> + printf("...... (deferred)\n");
> }
>
> static void branch_stack__printf(struct perf_sample *sample,
> @@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
> sample_read__printf(sample, evsel->core.attr.read_format);
> }
>
> +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
> + struct perf_sample *sample)
> +{
> + if (!dump_trace)
> + return;
> +
> + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
> + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
> +
> + if (evsel__has_callchain(evsel))
> + callchain__printf(evsel, sample);
> +}
> +
> static void dump_read(struct evsel *evsel, union perf_event *event)
> {
> struct perf_record_read *read_event = &event->read;
> @@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
> return tool->text_poke(tool, event, sample, machine);
> case PERF_RECORD_AUX_OUTPUT_HW_ID:
> return tool->aux_output_hw_id(tool, event, sample, machine);
> + case PERF_RECORD_CALLCHAIN_DEFERRED:
> + dump_deferred_callchain(evsel, event, sample);
> + return tool->callchain_deferred(tool, event, sample, evsel, machine);
> default:
> ++evlist->stats.nr_unknown_events;
> return -1;
> diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
> index 22a8a4ffe05f778e..f732d33e7f895ed4 100644
> --- a/tools/perf/util/tool.c
> +++ b/tools/perf/util/tool.c
> @@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
> tool->read = process_event_sample_stub;
> tool->throttle = process_event_stub;
> tool->unthrottle = process_event_stub;
> + tool->callchain_deferred = process_event_sample_stub;
nit: there's a similar change needed to delegate_tool__init, that code
is currently unused.
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> tool->attr = process_event_synth_attr_stub;
> tool->event_update = process_event_synth_event_update_stub;
> tool->tracing_data = process_event_synth_tracing_data_stub;
> diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644
> --- a/tools/perf/util/tool.h
> +++ b/tools/perf/util/tool.h
> @@ -44,7 +44,8 @@ enum show_feature_header {
>
> struct perf_tool {
> event_sample sample,
> - read;
> + read,
> + callchain_deferred;
> event_op mmap,
> mmap2,
> comm,
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
2025-11-19 16:48 ` Ian Rogers
@ 2025-11-20 0:56 ` Namhyung Kim
0 siblings, 0 replies; 13+ messages in thread
From: Namhyung Kim @ 2025-11-20 0:56 UTC (permalink / raw)
To: Ian Rogers
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Wed, Nov 19, 2025 at 08:48:35AM -0800, Ian Rogers wrote:
> On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Add a new event type for deferred callchains and a new callback for the
> > struct perf_tool. For now it doesn't actually handle the deferred
> > callchains but it just marks the sample if it has the PERF_CONTEXT_
> > USER_DEFFERED in the callchain array.
> >
> > At least, perf report can dump the raw data with this change. Actually
> > this requires the next commit to enable attr.defer_callchain, but if you
> > already have a data file, it'll show the following result.
> >
> > $ perf report -D
> > ...
> > 0x2158@perf.data [0x40]: event: 22
> > .
> > . ... raw event: size 64 bytes
> > . 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@.........
> > . 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3.....
> > . 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3.....
> > . 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5....
> >
> > 121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
> > ... FP chain: nr:3
> > ..... 0: 00007f18fe337fa7
> > ..... 1: 00007f18fe330e0f
> > ..... 2: 00007f18fe331448
> > : unhandled!
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/lib/perf/include/perf/event.h | 13 ++++++++++
> > tools/perf/util/event.c | 1 +
> > tools/perf/util/evsel.c | 31 +++++++++++++++++++++--
> > tools/perf/util/machine.c | 1 +
> > tools/perf/util/perf_event_attr_fprintf.c | 2 ++
> > tools/perf/util/sample.h | 2 ++
> > tools/perf/util/session.c | 20 +++++++++++++++
> > tools/perf/util/tool.c | 1 +
> > tools/perf/util/tool.h | 3 ++-
> > 9 files changed, 71 insertions(+), 3 deletions(-)
> >
> > diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> > index aa1e91c97a226e1a..43a8cb04994fa033 100644
> > --- a/tools/lib/perf/include/perf/event.h
> > +++ b/tools/lib/perf/include/perf/event.h
> > @@ -151,6 +151,18 @@ struct perf_record_switch {
> > __u32 next_prev_tid;
> > };
> >
> > +struct perf_record_callchain_deferred {
> > + struct perf_event_header header;
> > + /*
> > + * This is to match kernel and (deferred) user stacks together.
> > + * The kernel part will be in the sample callchain array after
> > + * the PERF_CONTEXT_USER_DEFERRED entry.
> > + */
> > + __u64 cookie;
> > + __u64 nr;
> > + __u64 ips[];
> > +};
> > +
> > struct perf_record_header_attr {
> > struct perf_event_header header;
> > struct perf_event_attr attr;
> > @@ -523,6 +535,7 @@ union perf_event {
> > struct perf_record_read read;
> > struct perf_record_throttle throttle;
> > struct perf_record_sample sample;
> > + struct perf_record_callchain_deferred callchain_deferred;
> > struct perf_record_bpf_event bpf;
> > struct perf_record_ksymbol ksymbol;
> > struct perf_record_text_poke_event text_poke;
> > diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> > index fcf44149feb20c35..4c92cc1a952c1d9f 100644
> > --- a/tools/perf/util/event.c
> > +++ b/tools/perf/util/event.c
> > @@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
> > [PERF_RECORD_CGROUP] = "CGROUP",
> > [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
> > [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID",
> > + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED",
> > [PERF_RECORD_HEADER_ATTR] = "ATTR",
> > [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
> > [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 989c56d4a23f74f4..5ee3e7dee93fbbcb 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> > data->data_src = PERF_MEM_DATA_SRC_NONE;
> > data->vcpu = -1;
> >
> > + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
> > + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> > +
> > + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
> > + if (data->callchain->nr > max_callchain_nr)
> > + return -EFAULT;
> > +
> > + data->deferred_cookie = event->callchain_deferred.cookie;
> > +
> > + if (evsel->core.attr.sample_id_all)
> > + perf_evsel__parse_id_sample(evsel, event, data);
> > + return 0;
> > + }
> > +
> > if (event->header.type != PERF_RECORD_SAMPLE) {
> > if (!evsel->core.attr.sample_id_all)
> > return 0;
> > @@ -3213,12 +3227,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> >
> > if (type & PERF_SAMPLE_CALLCHAIN) {
> > const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> > + u64 callchain_nr;
> >
> > OVERFLOW_CHECK_u64(array);
> > data->callchain = (struct ip_callchain *)array++;
> > - if (data->callchain->nr > max_callchain_nr)
> > + callchain_nr = data->callchain->nr;
> > + if (callchain_nr > max_callchain_nr)
> > return -EFAULT;
> > - sz = data->callchain->nr * sizeof(u64);
> > + sz = callchain_nr * sizeof(u64);
> > + /*
> > + * Save the cookie for the deferred user callchain. The last 2
> > + * entries in the callchain should be the context marker and the
> > + * cookie. The cookie will be used to match PERF_RECORD_
> > + * CALLCHAIN_DEFERRED later.
> > + */
> > + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
> > + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
> > + data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
> > + data->deferred_callchain = true;
> > + }
> > OVERFLOW_CHECK(array, sz, max_size);
> > array = (void *)array + sz;
> > }
> > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> > index b5dd42588c916d91..841b711d970e9457 100644
> > --- a/tools/perf/util/machine.c
> > +++ b/tools/perf/util/machine.c
> > @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
> > *cpumode = PERF_RECORD_MISC_KERNEL;
> > break;
> > case PERF_CONTEXT_USER:
> > + case PERF_CONTEXT_USER_DEFERRED:
> > *cpumode = PERF_RECORD_MISC_USER;
> > break;
> > default:
> > diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> > index 66b666d9ce649dd7..741c3d657a8b6ae7 100644
> > --- a/tools/perf/util/perf_event_attr_fprintf.c
> > +++ b/tools/perf/util/perf_event_attr_fprintf.c
> > @@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
> > PRINT_ATTRf(inherit_thread, p_unsigned);
> > PRINT_ATTRf(remove_on_exec, p_unsigned);
> > PRINT_ATTRf(sigtrap, p_unsigned);
> > + PRINT_ATTRf(defer_callchain, p_unsigned);
> > + PRINT_ATTRf(defer_output, p_unsigned);
> >
> > PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
> > PRINT_ATTRf(bp_type, p_unsigned);
> > diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> > index fae834144ef42105..a8307b20a9ea8066 100644
> > --- a/tools/perf/util/sample.h
> > +++ b/tools/perf/util/sample.h
> > @@ -107,6 +107,8 @@ struct perf_sample {
> > /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
> > u16 weight3;
> > bool no_hw_idx; /* No hw_idx collected in branch_stack */
> > + bool deferred_callchain; /* Has deferred user callchains */
> > + u64 deferred_cookie;
> > char insn[MAX_INSN];
> > void *raw_data;
> > struct ip_callchain *callchain;
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index 4b0236b2df2913e1..361e15c1f26a96d0 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
> > [PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
> > [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
> > [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap,
> > + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap,
> > [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
> > [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
> > [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
> > @@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
> > for (i = 0; i < callchain->nr; i++)
> > printf("..... %2d: %016" PRIx64 "\n",
> > i, callchain->ips[i]);
> > +
> > + if (sample->deferred_callchain)
> > + printf("...... (deferred)\n");
> > }
> >
> > static void branch_stack__printf(struct perf_sample *sample,
> > @@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
> > sample_read__printf(sample, evsel->core.attr.read_format);
> > }
> >
> > +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
> > + struct perf_sample *sample)
> > +{
> > + if (!dump_trace)
> > + return;
> > +
> > + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
> > + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
> > +
> > + if (evsel__has_callchain(evsel))
> > + callchain__printf(evsel, sample);
> > +}
> > +
> > static void dump_read(struct evsel *evsel, union perf_event *event)
> > {
> > struct perf_record_read *read_event = &event->read;
> > @@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
> > return tool->text_poke(tool, event, sample, machine);
> > case PERF_RECORD_AUX_OUTPUT_HW_ID:
> > return tool->aux_output_hw_id(tool, event, sample, machine);
> > + case PERF_RECORD_CALLCHAIN_DEFERRED:
> > + dump_deferred_callchain(evsel, event, sample);
> > + return tool->callchain_deferred(tool, event, sample, evsel, machine);
> > default:
> > ++evlist->stats.nr_unknown_events;
> > return -1;
> > diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
> > index 22a8a4ffe05f778e..f732d33e7f895ed4 100644
> > --- a/tools/perf/util/tool.c
> > +++ b/tools/perf/util/tool.c
> > @@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
> > tool->read = process_event_sample_stub;
> > tool->throttle = process_event_stub;
> > tool->unthrottle = process_event_stub;
> > + tool->callchain_deferred = process_event_sample_stub;
>
> nit: there's a similar change needed to delegate_tool__init, that code
> is currently unused.
Will update in v2.
>
> Reviewed-by: Ian Rogers <irogers@google.com>
Thanks for the review!
Namhyung
>
> > tool->attr = process_event_synth_attr_stub;
> > tool->event_update = process_event_synth_event_update_stub;
> > tool->tracing_data = process_event_synth_tracing_data_stub;
> > diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> > index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644
> > --- a/tools/perf/util/tool.h
> > +++ b/tools/perf/util/tool.h
> > @@ -44,7 +44,8 @@ enum show_feature_header {
> >
> > struct perf_tool {
> > event_sample sample,
> > - read;
> > + read,
> > + callchain_deferred;
> > event_op mmap,
> > mmap2,
> > comm,
> > --
> > 2.52.0.rc1.455.g30608eb744-goog
> >
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 3/5] perf record: Add --call-graph fp,defer option for deferred callchains
2025-11-15 23:41 [PATCHSET v4 0/5] perf tools: Add deferred callchain support Namhyung Kim
2025-11-15 23:41 ` [PATCH v4 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains Namhyung Kim
2025-11-15 23:41 ` [PATCH v4 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support Namhyung Kim
@ 2025-11-15 23:41 ` Namhyung Kim
2025-11-19 16:50 ` Ian Rogers
2025-11-15 23:41 ` [PATCH v4 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Namhyung Kim
2025-11-15 23:41 ` [PATCH v4 5/5] perf tools: Merge deferred user callchains Namhyung Kim
4 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2025-11-15 23:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Josh Poimboeuf, Indu Bhagat,
Jens Remus, Mathieu Desnoyers, linux-trace-kernel, bpf
Add a new callchain record mode option for deferred callchains. For now
it only works with FP (frame-pointer) mode.
And add the missing feature detection logic to clear the flag on old
kernels.
$ perf record --call-graph fp,defer -vv true
...
------------------------------------------------------------
perf_event_attr:
type 0 (PERF_TYPE_HARDWARE)
size 136
config 0 (PERF_COUNT_HW_CPU_CYCLES)
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|CALLCHAIN|PERIOD
read_format ID|LOST
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
ksymbol 1
bpf_event 1
defer_callchain 1
defer_output 1
------------------------------------------------------------
sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open failed, error -22
switching off deferred callchain support
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-config.txt | 3 +++
tools/perf/Documentation/perf-record.txt | 4 ++++
tools/perf/util/callchain.c | 16 +++++++++++++---
tools/perf/util/callchain.h | 1 +
tools/perf/util/evsel.c | 19 +++++++++++++++++++
tools/perf/util/evsel.h | 1 +
6 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c6f33565966735fe..642d1c490d9e3bcd 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -452,6 +452,9 @@ Variables
kernel space is controlled not by this option but by the
kernel config (CONFIG_UNWINDER_*).
+ The 'defer' mode can be used with 'fp' mode to enable deferred
+ user callchains (like 'fp,defer').
+
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
When using dwarf into record-mode, the default size will be used if omitted.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 067891bd7da6edc8..e8b9aadbbfa50574 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -325,6 +325,10 @@ OPTIONS
by default. User can change the number by passing it after comma
like "--call-graph fp,32".
+ Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
+ enable deferred user callchain which will collect user-space callchains
+ when the thread returns to the user space.
+
-q::
--quiet::
Don't print any warnings or messages, useful for scripting.
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index d7b7eef740b9d6ed..2884187ccbbecfdc 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
if (tok) {
unsigned long size;
- size = strtoul(tok, &name, 0);
- if (size < (unsigned) sysctl__max_stack())
- param->max_stack = size;
+ if (!strncmp(tok, "defer", sizeof("defer"))) {
+ param->defer = true;
+ } else {
+ size = strtoul(tok, &name, 0);
+ if (size < (unsigned) sysctl__max_stack())
+ param->max_stack = size;
+ }
}
break;
@@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
} while (0);
free(buf);
+
+ if (param->defer && param->record_mode != CALLCHAIN_FP) {
+ pr_err("callchain: deferred callchain only works with FP\n");
+ return -EINVAL;
+ }
+
return ret;
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
struct callchain_param {
bool enabled;
+ bool defer;
enum perf_call_graph_mode record_mode;
u32 dump_size;
enum chain_mode mode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5ee3e7dee93fbbcb..7772ee9cfe3ac1c7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
pr_info("Disabling user space callchains for function trace event.\n");
attr->exclude_callchain_user = 1;
}
+
+ if (param->defer && !attr->exclude_callchain_user)
+ attr->defer_callchain = 1;
}
void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
@@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
attr->build_id = track && opts->build_id;
+ attr->defer_output = track && callchain->defer;
/*
* ksymbol is tracked separately with text poke because it needs to be
@@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
+ evsel->core.attr.defer_callchain = 0;
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
+ evsel->core.attr.defer_output = 0;
if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ))
evsel->core.attr.inherit = 0;
@@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
/* Please add new feature detection here. */
+ attr.defer_callchain = true;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.defer_callchain = true;
+ pr_debug2("switching off deferred callchain support\n");
+ attr.defer_callchain = false;
+
attr.inherit = true;
attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
if (has_attr_feature(&attr, /*flags=*/0))
@@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
errno = old_errno;
check:
+ if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
+ perf_missing_features.defer_callchain)
+ return true;
+
if (evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
perf_missing_features.inherit_sample_read)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -221,6 +221,7 @@ struct perf_missing_features {
bool branch_counters;
bool aux_action;
bool inherit_sample_read;
+ bool defer_callchain;
};
extern struct perf_missing_features perf_missing_features;
--
2.52.0.rc1.455.g30608eb744-goog
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 3/5] perf record: Add --call-graph fp,defer option for deferred callchains
2025-11-15 23:41 ` [PATCH v4 3/5] perf record: Add --call-graph fp,defer option for deferred callchains Namhyung Kim
@ 2025-11-19 16:50 ` Ian Rogers
0 siblings, 0 replies; 13+ messages in thread
From: Ian Rogers @ 2025-11-19 16:50 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Add a new callchain record mode option for deferred callchains. For now
> it only works with FP (frame-pointer) mode.
>
> And add the missing feature detection logic to clear the flag on old
> kernels.
>
> $ perf record --call-graph fp,defer -vv true
> ...
> ------------------------------------------------------------
> perf_event_attr:
> type 0 (PERF_TYPE_HARDWARE)
> size 136
> config 0 (PERF_COUNT_HW_CPU_CYCLES)
> { sample_period, sample_freq } 4000
> sample_type IP|TID|TIME|CALLCHAIN|PERIOD
> read_format ID|LOST
> disabled 1
> inherit 1
> mmap 1
> comm 1
> freq 1
> enable_on_exec 1
> task 1
> sample_id_all 1
> mmap2 1
> comm_exec 1
> ksymbol 1
> bpf_event 1
> defer_callchain 1
> defer_output 1
> ------------------------------------------------------------
> sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
> sys_perf_event_open failed, error -22
> switching off deferred callchain support
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> tools/perf/Documentation/perf-config.txt | 3 +++
> tools/perf/Documentation/perf-record.txt | 4 ++++
> tools/perf/util/callchain.c | 16 +++++++++++++---
> tools/perf/util/callchain.h | 1 +
> tools/perf/util/evsel.c | 19 +++++++++++++++++++
> tools/perf/util/evsel.h | 1 +
> 6 files changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
> index c6f33565966735fe..642d1c490d9e3bcd 100644
> --- a/tools/perf/Documentation/perf-config.txt
> +++ b/tools/perf/Documentation/perf-config.txt
> @@ -452,6 +452,9 @@ Variables
> kernel space is controlled not by this option but by the
> kernel config (CONFIG_UNWINDER_*).
>
> + The 'defer' mode can be used with 'fp' mode to enable deferred
> + user callchains (like 'fp,defer').
> +
> call-graph.dump-size::
> The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
> When using dwarf into record-mode, the default size will be used if omitted.
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 067891bd7da6edc8..e8b9aadbbfa50574 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -325,6 +325,10 @@ OPTIONS
> by default. User can change the number by passing it after comma
> like "--call-graph fp,32".
>
> + Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
> + enable deferred user callchain which will collect user-space callchains
> + when the thread returns to the user space.
> +
> -q::
> --quiet::
> Don't print any warnings or messages, useful for scripting.
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index d7b7eef740b9d6ed..2884187ccbbecfdc 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
> if (tok) {
> unsigned long size;
>
> - size = strtoul(tok, &name, 0);
> - if (size < (unsigned) sysctl__max_stack())
> - param->max_stack = size;
> + if (!strncmp(tok, "defer", sizeof("defer"))) {
> + param->defer = true;
> + } else {
> + size = strtoul(tok, &name, 0);
> + if (size < (unsigned) sysctl__max_stack())
> + param->max_stack = size;
> + }
> }
> break;
>
> @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
> } while (0);
>
> free(buf);
> +
> + if (param->defer && param->record_mode != CALLCHAIN_FP) {
> + pr_err("callchain: deferred callchain only works with FP\n");
> + return -EINVAL;
> + }
> +
> return ret;
> }
>
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 86ed9e4d04f9ee7b..d5ae4fbb7ce5fa44 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users;
>
> struct callchain_param {
> bool enabled;
> + bool defer;
> enum perf_call_graph_mode record_mode;
> u32 dump_size;
> enum chain_mode mode;
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 5ee3e7dee93fbbcb..7772ee9cfe3ac1c7 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1065,6 +1065,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
> pr_info("Disabling user space callchains for function trace event.\n");
> attr->exclude_callchain_user = 1;
> }
> +
> + if (param->defer && !attr->exclude_callchain_user)
> + attr->defer_callchain = 1;
> }
>
> void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
> @@ -1511,6 +1514,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
> attr->mmap2 = track && !perf_missing_features.mmap2;
> attr->comm = track;
> attr->build_id = track && opts->build_id;
> + attr->defer_output = track && callchain->defer;
>
> /*
> * ksymbol is tracked separately with text poke because it needs to be
> @@ -2199,6 +2203,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
> + evsel->core.attr.defer_callchain = 0;
> + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
> + evsel->core.attr.defer_output = 0;
> if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
> (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
> evsel->core.attr.inherit = 0;
> @@ -2473,6 +2481,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
>
> /* Please add new feature detection here. */
>
> + attr.defer_callchain = true;
> + if (has_attr_feature(&attr, /*flags=*/0))
> + goto found;
> + perf_missing_features.defer_callchain = true;
> + pr_debug2("switching off deferred callchain support\n");
> + attr.defer_callchain = false;
> +
> attr.inherit = true;
> attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
> if (has_attr_feature(&attr, /*flags=*/0))
> @@ -2584,6 +2599,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> errno = old_errno;
>
> check:
> + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
> + perf_missing_features.defer_callchain)
> + return true;
> +
> if (evsel->core.attr.inherit &&
> (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
> perf_missing_features.inherit_sample_read)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -221,6 +221,7 @@ struct perf_missing_features {
> bool branch_counters;
> bool aux_action;
> bool inherit_sample_read;
> + bool defer_callchain;
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED
2025-11-15 23:41 [PATCHSET v4 0/5] perf tools: Add deferred callchain support Namhyung Kim
` (2 preceding siblings ...)
2025-11-15 23:41 ` [PATCH v4 3/5] perf record: Add --call-graph fp,defer option for deferred callchains Namhyung Kim
@ 2025-11-15 23:41 ` Namhyung Kim
2025-11-19 16:50 ` Ian Rogers
2025-11-15 23:41 ` [PATCH v4 5/5] perf tools: Merge deferred user callchains Namhyung Kim
4 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2025-11-15 23:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Josh Poimboeuf, Indu Bhagat,
Jens Remus, Mathieu Desnoyers, linux-trace-kernel, bpf
Handle the deferred callchains in the script output.
$ perf script
...
pwd 2312 121.163435: 249113 cpu/cycles/P:
ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
b00000006 (cookie) ([unknown])
pwd 2312 121.163447: DEFERRED CALLCHAIN [cookie: b00000006]
7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++
tools/perf/util/evsel_fprintf.c | 5 +-
2 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 011962e1ee0f6898..85b42205a71b3993 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2706,6 +2706,94 @@ static int process_sample_event(const struct perf_tool *tool,
return ret;
}
+static int process_deferred_sample_event(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel_script *es = evsel->priv;
+ unsigned int type = output_type(attr->type);
+ struct addr_location al;
+ FILE *fp = es->fp;
+ int ret = 0;
+
+ if (output[type].fields == 0)
+ return 0;
+
+ /* Set thread to NULL to indicate addr_al and al are not initialized */
+ addr_location__init(&al);
+
+ if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+ sample->time)) {
+ goto out_put;
+ }
+
+ if (debug_mode) {
+ if (sample->time < last_timestamp) {
+ pr_err("Samples misordered, previous: %" PRIu64
+ " this: %" PRIu64 "\n", last_timestamp,
+ sample->time);
+ nr_unordered++;
+ }
+ last_timestamp = sample->time;
+ goto out_put;
+ }
+
+ if (filter_cpu(sample))
+ goto out_put;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ ret = -1;
+ goto out_put;
+ }
+
+ if (al.filtered)
+ goto out_put;
+
+ if (!show_event(sample, evsel, al.thread, &al, NULL))
+ goto out_put;
+
+ if (evswitch__discard(&scr->evswitch, evsel))
+ goto out_put;
+
+ perf_sample__fprintf_start(scr, sample, al.thread, evsel,
+ PERF_RECORD_CALLCHAIN_DEFERRED, fp);
+ fprintf(fp, "DEFERRED CALLCHAIN [cookie: %llx]",
+ (unsigned long long)event->callchain_deferred.cookie);
+
+ if (PRINT_FIELD(IP)) {
+ struct callchain_cursor *cursor = NULL;
+
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al.thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack)) {
+ pr_info("cannot resolve deferred callchains\n");
+ cursor = NULL;
+ }
+ }
+
+ fputc(cursor ? '\n' : ' ', fp);
+ sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts,
+ cursor, symbol_conf.bt_stop_list, fp);
+ }
+
+ fprintf(fp, "\n");
+
+ if (verbose > 0)
+ fflush(fp);
+
+out_put:
+ addr_location__exit(&al);
+ return ret;
+}
+
// Used when scr->per_event_dump is not set
static struct evsel_script es_stdout;
@@ -4303,6 +4391,7 @@ int cmd_script(int argc, const char **argv)
perf_tool__init(&script.tool, !unsorted_dump);
script.tool.sample = process_sample_event;
+ script.tool.callchain_deferred = process_deferred_sample_event;
script.tool.mmap = perf_event__process_mmap;
script.tool.mmap2 = perf_event__process_mmap2;
script.tool.comm = perf_event__process_comm;
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 103984b29b1e10ae..10f1a03c28601e36 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -168,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
node_al.addr = addr;
node_al.map = map__get(map);
- if (print_symoffset) {
+ if (sample->deferred_callchain &&
+ sample->deferred_cookie == node->ip) {
+ printed += fprintf(fp, "(cookie)");
+ } else if (print_symoffset) {
printed += __symbol__fprintf_symname_offs(sym, &node_al,
print_unknown_as_addr,
true, fp);
--
2.52.0.rc1.455.g30608eb744-goog
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED
2025-11-15 23:41 ` [PATCH v4 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Namhyung Kim
@ 2025-11-19 16:50 ` Ian Rogers
0 siblings, 0 replies; 13+ messages in thread
From: Ian Rogers @ 2025-11-19 16:50 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Handle the deferred callchains in the script output.
>
> $ perf script
> ...
> pwd 2312 121.163435: 249113 cpu/cycles/P:
> ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
> ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
> ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
> ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
> ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
> ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
> ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
> b00000006 (cookie) ([unknown])
>
> pwd 2312 121.163447: DEFERRED CALLCHAIN [cookie: b00000006]
> 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++
> tools/perf/util/evsel_fprintf.c | 5 +-
> 2 files changed, 93 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 011962e1ee0f6898..85b42205a71b3993 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2706,6 +2706,94 @@ static int process_sample_event(const struct perf_tool *tool,
> return ret;
> }
>
> +static int process_deferred_sample_event(const struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct evsel *evsel,
> + struct machine *machine)
> +{
> + struct perf_script *scr = container_of(tool, struct perf_script, tool);
> + struct perf_event_attr *attr = &evsel->core.attr;
> + struct evsel_script *es = evsel->priv;
> + unsigned int type = output_type(attr->type);
> + struct addr_location al;
> + FILE *fp = es->fp;
> + int ret = 0;
> +
> + if (output[type].fields == 0)
> + return 0;
> +
> + /* Set thread to NULL to indicate addr_al and al are not initialized */
> + addr_location__init(&al);
> +
> + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
> + sample->time)) {
> + goto out_put;
> + }
> +
> + if (debug_mode) {
> + if (sample->time < last_timestamp) {
> + pr_err("Samples misordered, previous: %" PRIu64
> + " this: %" PRIu64 "\n", last_timestamp,
> + sample->time);
> + nr_unordered++;
> + }
> + last_timestamp = sample->time;
> + goto out_put;
> + }
> +
> + if (filter_cpu(sample))
> + goto out_put;
> +
> + if (machine__resolve(machine, &al, sample) < 0) {
> + pr_err("problem processing %d event, skipping it.\n",
> + event->header.type);
> + ret = -1;
> + goto out_put;
> + }
> +
> + if (al.filtered)
> + goto out_put;
> +
> + if (!show_event(sample, evsel, al.thread, &al, NULL))
> + goto out_put;
> +
> + if (evswitch__discard(&scr->evswitch, evsel))
> + goto out_put;
> +
> + perf_sample__fprintf_start(scr, sample, al.thread, evsel,
> + PERF_RECORD_CALLCHAIN_DEFERRED, fp);
> + fprintf(fp, "DEFERRED CALLCHAIN [cookie: %llx]",
> + (unsigned long long)event->callchain_deferred.cookie);
> +
> + if (PRINT_FIELD(IP)) {
> + struct callchain_cursor *cursor = NULL;
> +
> + if (symbol_conf.use_callchain && sample->callchain) {
> + cursor = get_tls_callchain_cursor();
> + if (thread__resolve_callchain(al.thread, cursor, evsel,
> + sample, NULL, NULL,
> + scripting_max_stack)) {
> + pr_info("cannot resolve deferred callchains\n");
> + cursor = NULL;
> + }
> + }
> +
> + fputc(cursor ? '\n' : ' ', fp);
> + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts,
> + cursor, symbol_conf.bt_stop_list, fp);
> + }
> +
> + fprintf(fp, "\n");
> +
> + if (verbose > 0)
> + fflush(fp);
> +
> +out_put:
> + addr_location__exit(&al);
> + return ret;
> +}
> +
> // Used when scr->per_event_dump is not set
> static struct evsel_script es_stdout;
>
> @@ -4303,6 +4391,7 @@ int cmd_script(int argc, const char **argv)
>
> perf_tool__init(&script.tool, !unsorted_dump);
> script.tool.sample = process_sample_event;
> + script.tool.callchain_deferred = process_deferred_sample_event;
> script.tool.mmap = perf_event__process_mmap;
> script.tool.mmap2 = perf_event__process_mmap2;
> script.tool.comm = perf_event__process_comm;
> diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
> index 103984b29b1e10ae..10f1a03c28601e36 100644
> --- a/tools/perf/util/evsel_fprintf.c
> +++ b/tools/perf/util/evsel_fprintf.c
> @@ -168,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
> node_al.addr = addr;
> node_al.map = map__get(map);
>
> - if (print_symoffset) {
> + if (sample->deferred_callchain &&
> + sample->deferred_cookie == node->ip) {
> + printed += fprintf(fp, "(cookie)");
> + } else if (print_symoffset) {
> printed += __symbol__fprintf_symname_offs(sym, &node_al,
> print_unknown_as_addr,
> true, fp);
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 5/5] perf tools: Merge deferred user callchains
2025-11-15 23:41 [PATCHSET v4 0/5] perf tools: Add deferred callchain support Namhyung Kim
` (3 preceding siblings ...)
2025-11-15 23:41 ` [PATCH v4 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Namhyung Kim
@ 2025-11-15 23:41 ` Namhyung Kim
2025-11-19 16:54 ` Ian Rogers
4 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2025-11-15 23:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Josh Poimboeuf, Indu Bhagat,
Jens Remus, Mathieu Desnoyers, linux-trace-kernel, bpf
Save samples with deferred callchains in a separate list and deliver
them after merging the user callchains. If users don't want to merge
they can set tool->merge_deferred_callchains to false to prevent the
behavior.
With previous result, now perf script will show the merged callchains.
$ perf script
...
pwd 2312 121.163435: 249113 cpu/cycles/P:
ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
...
The old output can be get using --no-merge-callchain option.
Also perf report can get the user callchain entry at the end.
$ perf report --no-children --stdio -q -S __build_id_parse.isra.0
# symbol: __build_id_parse.isra.0
8.40% pwd [kernel.kallsyms]
|
---__build_id_parse.isra.0
perf_event_mmap
mprotect_fixup
do_mprotect_pkey
__x64_sys_mprotect
do_syscall_64
entry_SYSCALL_64_after_hwframe
mprotect
_dl_sysdep_start
_dl_start_user
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-script.txt | 5 ++
tools/perf/builtin-inject.c | 1 +
tools/perf/builtin-report.c | 1 +
tools/perf/builtin-script.c | 4 ++
tools/perf/util/callchain.c | 29 ++++++++++
tools/perf/util/callchain.h | 3 ++
tools/perf/util/evlist.c | 1 +
tools/perf/util/evlist.h | 2 +
tools/perf/util/session.c | 67 +++++++++++++++++++++++-
tools/perf/util/tool.c | 1 +
tools/perf/util/tool.h | 1 +
11 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 28bec7e78bc858ba..03d1129606328d6d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -527,6 +527,11 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
+--merge-callchains::
+ Enable merging deferred user callchains if available. This is the
+ default behavior. If you want to see separate CALLCHAIN_DEFERRED
+ records for some reason, use --no-merge-callchains explicitly.
+
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index bd9245d2dd41aa48..51d2721b6db9dccb 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2527,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
inject.tool.auxtrace = perf_event__repipe_auxtrace;
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
inject.tool.dont_split_sample_group = true;
+ inject.tool.merge_deferred_callchains = false;
inject.session = __perf_session__new(&data, &inject.tool,
/*trace_event_repipe=*/inject.output.is_pipe,
/*host_env=*/NULL);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2bc269f5fcef8023..add6b1c2aaf04270 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1614,6 +1614,7 @@ int cmd_report(int argc, const char **argv)
report.tool.event_update = perf_event__process_event_update;
report.tool.feature = process_feature_event;
report.tool.ordering_requires_timestamps = true;
+ report.tool.merge_deferred_callchains = !dump_trace;
session = perf_session__new(&data, &report.tool);
if (IS_ERR(session)) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 85b42205a71b3993..62e43d3c5ad731a0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -4009,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
bool header_only = false;
bool script_started = false;
bool unsorted_dump = false;
+ bool merge_deferred_callchains = true;
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
@@ -4162,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
"Guest code can be found in hypervisor process"),
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
"Enable LBR callgraph stitching approach"),
+ OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
+ "Enable merge deferred user callchains"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -4418,6 +4421,7 @@ int cmd_script(int argc, const char **argv)
script.tool.throttle = process_throttle_event;
script.tool.unthrottle = process_throttle_event;
script.tool.ordering_requires_timestamps = true;
+ script.tool.merge_deferred_callchains = merge_deferred_callchains;
session = perf_session__new(&data, &script.tool);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 2884187ccbbecfdc..71dc5a070065dd2a 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1838,3 +1838,32 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
}
return 0;
}
+
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain)
+{
+ u64 nr_orig = sample_orig->callchain->nr - 1;
+ u64 nr_deferred = sample_callchain->callchain->nr;
+ struct ip_callchain *callchain;
+
+ if (sample_orig->callchain->nr < 2) {
+ sample_orig->deferred_callchain = false;
+ return -EINVAL;
+ }
+
+ callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
+ if (callchain == NULL) {
+ sample_orig->deferred_callchain = false;
+ return -ENOMEM;
+ }
+
+ callchain->nr = nr_orig + nr_deferred;
+ /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
+ memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
+ /* copy deferred user callchains */
+ memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
+ nr_deferred * sizeof(u64));
+
+ sample_orig->callchain = callchain;
+ return 0;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d5ae4fbb7ce5fa44..2a52af8c80ace33c 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -318,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
struct perf_sample *sample, int max_stack,
bool symbols, callchain_iter_fn cb, void *data);
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e8217efdda5323c6..03674d2cbd015e4f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.pos = -1;
evlist->nr_br_cntr = -1;
metricgroup__rblist_init(&evlist->metric_events);
+ INIT_LIST_HEAD(&evlist->deferred_samples);
}
struct evlist *evlist__new(void)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 5e71e3dc60423079..911834ae7c2a6f76 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -92,6 +92,8 @@ struct evlist {
* of struct metric_expr.
*/
struct rblist metric_events;
+ /* samples with deferred_callchain would wait here. */
+ struct list_head deferred_samples;
};
struct evsel_str_handler {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 361e15c1f26a96d0..2e777fd1bcf6707b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1285,6 +1285,60 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
per_thread);
}
+struct deferred_event {
+ struct list_head list;
+ union perf_event *event;
+};
+
+static int evlist__deliver_deferred_samples(struct evlist *evlist,
+ const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct deferred_event *de, *tmp;
+ struct evsel *evsel;
+ int ret = 0;
+
+ if (!tool->merge_deferred_callchains) {
+ evsel = evlist__id2evsel(evlist, sample->id);
+ return tool->callchain_deferred(tool, event, sample,
+ evsel, machine);
+ }
+
+ list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
+ struct perf_sample orig_sample;
+
+ ret = evlist__parse_sample(evlist, de->event, &orig_sample);
+ if (ret < 0) {
+ pr_err("failed to parse original sample\n");
+ break;
+ }
+
+ if (sample->tid != orig_sample.tid)
+ continue;
+
+ if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
+ sample__merge_deferred_callchain(&orig_sample, sample);
+ else
+ orig_sample.deferred_callchain = false;
+
+ evsel = evlist__id2evsel(evlist, orig_sample.id);
+ ret = evlist__deliver_sample(evlist, tool, de->event,
+ &orig_sample, evsel, machine);
+
+ if (orig_sample.deferred_callchain)
+ free(orig_sample.callchain);
+
+ list_del(&de->list);
+ free(de);
+
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
@@ -1313,6 +1367,16 @@ static int machines__deliver_event(struct machines *machines,
return 0;
}
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
+ if (sample->deferred_callchain && tool->merge_deferred_callchains) {
+ struct deferred_event *de = malloc(sizeof(*de));
+
+ if (de == NULL)
+ return -ENOMEM;
+
+ de->event = event;
+ list_add_tail(&de->list, &evlist->deferred_samples);
+ return 0;
+ }
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
@@ -1372,7 +1436,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->aux_output_hw_id(tool, event, sample, machine);
case PERF_RECORD_CALLCHAIN_DEFERRED:
dump_deferred_callchain(evsel, event, sample);
- return tool->callchain_deferred(tool, event, sample, evsel, machine);
+ return evlist__deliver_deferred_samples(evlist, tool, event,
+ sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
index f732d33e7f895ed4..c5d3b464b2a433b3 100644
--- a/tools/perf/util/tool.c
+++ b/tools/perf/util/tool.c
@@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->cgroup_events = false;
tool->no_warn = false;
tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
+ tool->merge_deferred_callchains = true;
tool->sample = process_event_sample_stub;
tool->mmap = process_event_stub;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 9b9f0a8cbf3de4b5..e96b69d25a5b737d 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -90,6 +90,7 @@ struct perf_tool {
bool cgroup_events;
bool no_warn;
bool dont_split_sample_group;
+ bool merge_deferred_callchains;
enum show_feature_header show_feat_hdr;
};
--
2.52.0.rc1.455.g30608eb744-goog
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 5/5] perf tools: Merge deferred user callchains
2025-11-15 23:41 ` [PATCH v4 5/5] perf tools: Merge deferred user callchains Namhyung Kim
@ 2025-11-19 16:54 ` Ian Rogers
2025-11-20 1:07 ` Namhyung Kim
0 siblings, 1 reply; 13+ messages in thread
From: Ian Rogers @ 2025-11-19 16:54 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Save samples with deferred callchains in a separate list and deliver
> them after merging the user callchains. If users don't want to merge
> they can set tool->merge_deferred_callchains to false to prevent the
> behavior.
>
> With previous result, now perf script will show the merged callchains.
>
> $ perf script
> ...
> pwd 2312 121.163435: 249113 cpu/cycles/P:
> ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
> ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
> ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
> ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
> ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
> ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
> ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
> 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> ...
>
> The old output can be get using --no-merge-callchain option.
> Also perf report can get the user callchain entry at the end.
>
> $ perf report --no-children --stdio -q -S __build_id_parse.isra.0
> # symbol: __build_id_parse.isra.0
> 8.40% pwd [kernel.kallsyms]
> |
> ---__build_id_parse.isra.0
> perf_event_mmap
> mprotect_fixup
> do_mprotect_pkey
> __x64_sys_mprotect
> do_syscall_64
> entry_SYSCALL_64_after_hwframe
> mprotect
> _dl_sysdep_start
> _dl_start_user
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/perf/Documentation/perf-script.txt | 5 ++
> tools/perf/builtin-inject.c | 1 +
> tools/perf/builtin-report.c | 1 +
> tools/perf/builtin-script.c | 4 ++
> tools/perf/util/callchain.c | 29 ++++++++++
> tools/perf/util/callchain.h | 3 ++
> tools/perf/util/evlist.c | 1 +
> tools/perf/util/evlist.h | 2 +
> tools/perf/util/session.c | 67 +++++++++++++++++++++++-
> tools/perf/util/tool.c | 1 +
> tools/perf/util/tool.h | 1 +
> 11 files changed, 114 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
> index 28bec7e78bc858ba..03d1129606328d6d 100644
> --- a/tools/perf/Documentation/perf-script.txt
> +++ b/tools/perf/Documentation/perf-script.txt
> @@ -527,6 +527,11 @@ include::itrace.txt[]
> The known limitations include exception handing such as
> setjmp/longjmp will have calls/returns not match.
>
> +--merge-callchains::
> + Enable merging deferred user callchains if available. This is the
> + default behavior. If you want to see separate CALLCHAIN_DEFERRED
> + records for some reason, use --no-merge-callchains explicitly.
> +
> :GMEXAMPLECMD: script
> :GMEXAMPLESUBCMD:
> include::guest-files.txt[]
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index bd9245d2dd41aa48..51d2721b6db9dccb 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2527,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
> inject.tool.auxtrace = perf_event__repipe_auxtrace;
> inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
> inject.tool.dont_split_sample_group = true;
> + inject.tool.merge_deferred_callchains = false;
> inject.session = __perf_session__new(&data, &inject.tool,
> /*trace_event_repipe=*/inject.output.is_pipe,
> /*host_env=*/NULL);
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 2bc269f5fcef8023..add6b1c2aaf04270 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -1614,6 +1614,7 @@ int cmd_report(int argc, const char **argv)
> report.tool.event_update = perf_event__process_event_update;
> report.tool.feature = process_feature_event;
> report.tool.ordering_requires_timestamps = true;
> + report.tool.merge_deferred_callchains = !dump_trace;
>
> session = perf_session__new(&data, &report.tool);
> if (IS_ERR(session)) {
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 85b42205a71b3993..62e43d3c5ad731a0 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -4009,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
> bool header_only = false;
> bool script_started = false;
> bool unsorted_dump = false;
> + bool merge_deferred_callchains = true;
> char *rec_script_path = NULL;
> char *rep_script_path = NULL;
> struct perf_session *session;
> @@ -4162,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
> "Guest code can be found in hypervisor process"),
> OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
> "Enable LBR callgraph stitching approach"),
> + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
> + "Enable merge deferred user callchains"),
> OPTS_EVSWITCH(&script.evswitch),
> OPT_END()
> };
> @@ -4418,6 +4421,7 @@ int cmd_script(int argc, const char **argv)
> script.tool.throttle = process_throttle_event;
> script.tool.unthrottle = process_throttle_event;
> script.tool.ordering_requires_timestamps = true;
> + script.tool.merge_deferred_callchains = merge_deferred_callchains;
> session = perf_session__new(&data, &script.tool);
> if (IS_ERR(session))
> return PTR_ERR(session);
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 2884187ccbbecfdc..71dc5a070065dd2a 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -1838,3 +1838,32 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
> }
> return 0;
> }
> +
> +int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
> + struct perf_sample *sample_callchain)
> +{
> + u64 nr_orig = sample_orig->callchain->nr - 1;
> + u64 nr_deferred = sample_callchain->callchain->nr;
> + struct ip_callchain *callchain;
> +
> + if (sample_orig->callchain->nr < 2) {
> + sample_orig->deferred_callchain = false;
> + return -EINVAL;
> + }
> +
> + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
> + if (callchain == NULL) {
> + sample_orig->deferred_callchain = false;
> + return -ENOMEM;
> + }
> +
> + callchain->nr = nr_orig + nr_deferred;
> + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
> + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
> + /* copy deferred user callchains */
> + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
> + nr_deferred * sizeof(u64));
> +
> + sample_orig->callchain = callchain;
> + return 0;
> +}
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index d5ae4fbb7ce5fa44..2a52af8c80ace33c 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -318,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
> struct perf_sample *sample, int max_stack,
> bool symbols, callchain_iter_fn cb, void *data);
>
> +int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
> + struct perf_sample *sample_callchain);
> +
> #endif /* __PERF_CALLCHAIN_H */
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index e8217efdda5323c6..03674d2cbd015e4f 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
> evlist->ctl_fd.pos = -1;
> evlist->nr_br_cntr = -1;
> metricgroup__rblist_init(&evlist->metric_events);
> + INIT_LIST_HEAD(&evlist->deferred_samples);
> }
>
> struct evlist *evlist__new(void)
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index 5e71e3dc60423079..911834ae7c2a6f76 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -92,6 +92,8 @@ struct evlist {
> * of struct metric_expr.
> */
> struct rblist metric_events;
> + /* samples with deferred_callchain would wait here. */
> + struct list_head deferred_samples;
> };
>
> struct evsel_str_handler {
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 361e15c1f26a96d0..2e777fd1bcf6707b 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -1285,6 +1285,60 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
> per_thread);
> }
>
> +struct deferred_event {
> + struct list_head list;
> + union perf_event *event;
What's going on with the memory here? Doesn't the event need to be a
copy to avoid the memory going away in between processing events? It
is definitely worth commenting. Note, I don't see a copy being made
for the event here in machines__deliver_event.
Thanks,
Ian
> +};
> +
> +static int evlist__deliver_deferred_samples(struct evlist *evlist,
> + const struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct machine *machine)
> +{
> + struct deferred_event *de, *tmp;
> + struct evsel *evsel;
> + int ret = 0;
> +
> + if (!tool->merge_deferred_callchains) {
> + evsel = evlist__id2evsel(evlist, sample->id);
> + return tool->callchain_deferred(tool, event, sample,
> + evsel, machine);
> + }
> +
> + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
> + struct perf_sample orig_sample;
> +
> + ret = evlist__parse_sample(evlist, de->event, &orig_sample);
> + if (ret < 0) {
> + pr_err("failed to parse original sample\n");
> + break;
> + }
> +
> + if (sample->tid != orig_sample.tid)
> + continue;
> +
> + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
> + sample__merge_deferred_callchain(&orig_sample, sample);
> + else
> + orig_sample.deferred_callchain = false;
> +
> + evsel = evlist__id2evsel(evlist, orig_sample.id);
> + ret = evlist__deliver_sample(evlist, tool, de->event,
> + &orig_sample, evsel, machine);
> +
> + if (orig_sample.deferred_callchain)
> + free(orig_sample.callchain);
> +
> + list_del(&de->list);
> + free(de);
> +
> + if (ret)
> + break;
> + }
> + return ret;
> +}
> +
> static int machines__deliver_event(struct machines *machines,
> struct evlist *evlist,
> union perf_event *event,
> @@ -1313,6 +1367,16 @@ static int machines__deliver_event(struct machines *machines,
> return 0;
> }
> dump_sample(evsel, event, sample, perf_env__arch(machine->env));
> + if (sample->deferred_callchain && tool->merge_deferred_callchains) {
> + struct deferred_event *de = malloc(sizeof(*de));
> +
> + if (de == NULL)
> + return -ENOMEM;
> +
> + de->event = event;
> + list_add_tail(&de->list, &evlist->deferred_samples);
> + return 0;
> + }
> return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
> case PERF_RECORD_MMAP:
> return tool->mmap(tool, event, sample, machine);
> @@ -1372,7 +1436,8 @@ static int machines__deliver_event(struct machines *machines,
> return tool->aux_output_hw_id(tool, event, sample, machine);
> case PERF_RECORD_CALLCHAIN_DEFERRED:
> dump_deferred_callchain(evsel, event, sample);
> - return tool->callchain_deferred(tool, event, sample, evsel, machine);
> + return evlist__deliver_deferred_samples(evlist, tool, event,
> + sample, machine);
> default:
> ++evlist->stats.nr_unknown_events;
> return -1;
> diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
> index f732d33e7f895ed4..c5d3b464b2a433b3 100644
> --- a/tools/perf/util/tool.c
> +++ b/tools/perf/util/tool.c
> @@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
> tool->cgroup_events = false;
> tool->no_warn = false;
> tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
> + tool->merge_deferred_callchains = true;
>
> tool->sample = process_event_sample_stub;
> tool->mmap = process_event_stub;
> diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> index 9b9f0a8cbf3de4b5..e96b69d25a5b737d 100644
> --- a/tools/perf/util/tool.h
> +++ b/tools/perf/util/tool.h
> @@ -90,6 +90,7 @@ struct perf_tool {
> bool cgroup_events;
> bool no_warn;
> bool dont_split_sample_group;
> + bool merge_deferred_callchains;
> enum show_feature_header show_feat_hdr;
> };
>
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v4 5/5] perf tools: Merge deferred user callchains
2025-11-19 16:54 ` Ian Rogers
@ 2025-11-20 1:07 ` Namhyung Kim
0 siblings, 0 replies; 13+ messages in thread
From: Namhyung Kim @ 2025-11-20 1:07 UTC (permalink / raw)
To: Ian Rogers
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Josh Poimboeuf, Indu Bhagat, Jens Remus,
Mathieu Desnoyers, linux-trace-kernel, bpf
On Wed, Nov 19, 2025 at 08:54:26AM -0800, Ian Rogers wrote:
> On Sat, Nov 15, 2025 at 3:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Save samples with deferred callchains in a separate list and deliver
> > them after merging the user callchains. If users don't want to merge
> > they can set tool->merge_deferred_callchains to false to prevent the
> > behavior.
> >
> > With previous result, now perf script will show the merged callchains.
> >
> > $ perf script
> > ...
> > pwd 2312 121.163435: 249113 cpu/cycles/P:
> > ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
> > ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
> > ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
> > ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
> > ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
> > ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
> > ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
> > 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> > 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> > 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
> > ...
> >
> > The old output can be get using --no-merge-callchain option.
> > Also perf report can get the user callchain entry at the end.
> >
> > $ perf report --no-children --stdio -q -S __build_id_parse.isra.0
> > # symbol: __build_id_parse.isra.0
> > 8.40% pwd [kernel.kallsyms]
> > |
> > ---__build_id_parse.isra.0
> > perf_event_mmap
> > mprotect_fixup
> > do_mprotect_pkey
> > __x64_sys_mprotect
> > do_syscall_64
> > entry_SYSCALL_64_after_hwframe
> > mprotect
> > _dl_sysdep_start
> > _dl_start_user
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/perf/Documentation/perf-script.txt | 5 ++
> > tools/perf/builtin-inject.c | 1 +
> > tools/perf/builtin-report.c | 1 +
> > tools/perf/builtin-script.c | 4 ++
> > tools/perf/util/callchain.c | 29 ++++++++++
> > tools/perf/util/callchain.h | 3 ++
> > tools/perf/util/evlist.c | 1 +
> > tools/perf/util/evlist.h | 2 +
> > tools/perf/util/session.c | 67 +++++++++++++++++++++++-
> > tools/perf/util/tool.c | 1 +
> > tools/perf/util/tool.h | 1 +
> > 11 files changed, 114 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
> > index 28bec7e78bc858ba..03d1129606328d6d 100644
> > --- a/tools/perf/Documentation/perf-script.txt
> > +++ b/tools/perf/Documentation/perf-script.txt
> > @@ -527,6 +527,11 @@ include::itrace.txt[]
> > The known limitations include exception handing such as
> > setjmp/longjmp will have calls/returns not match.
> >
> > +--merge-callchains::
> > + Enable merging deferred user callchains if available. This is the
> > + default behavior. If you want to see separate CALLCHAIN_DEFERRED
> > + records for some reason, use --no-merge-callchains explicitly.
> > +
> > :GMEXAMPLECMD: script
> > :GMEXAMPLESUBCMD:
> > include::guest-files.txt[]
> > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> > index bd9245d2dd41aa48..51d2721b6db9dccb 100644
> > --- a/tools/perf/builtin-inject.c
> > +++ b/tools/perf/builtin-inject.c
> > @@ -2527,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
> > inject.tool.auxtrace = perf_event__repipe_auxtrace;
> > inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
> > inject.tool.dont_split_sample_group = true;
> > + inject.tool.merge_deferred_callchains = false;
> > inject.session = __perf_session__new(&data, &inject.tool,
> > /*trace_event_repipe=*/inject.output.is_pipe,
> > /*host_env=*/NULL);
> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index 2bc269f5fcef8023..add6b1c2aaf04270 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -1614,6 +1614,7 @@ int cmd_report(int argc, const char **argv)
> > report.tool.event_update = perf_event__process_event_update;
> > report.tool.feature = process_feature_event;
> > report.tool.ordering_requires_timestamps = true;
> > + report.tool.merge_deferred_callchains = !dump_trace;
> >
> > session = perf_session__new(&data, &report.tool);
> > if (IS_ERR(session)) {
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index 85b42205a71b3993..62e43d3c5ad731a0 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -4009,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
> > bool header_only = false;
> > bool script_started = false;
> > bool unsorted_dump = false;
> > + bool merge_deferred_callchains = true;
> > char *rec_script_path = NULL;
> > char *rep_script_path = NULL;
> > struct perf_session *session;
> > @@ -4162,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
> > "Guest code can be found in hypervisor process"),
> > OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
> > "Enable LBR callgraph stitching approach"),
> > + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
> > + "Enable merge deferred user callchains"),
> > OPTS_EVSWITCH(&script.evswitch),
> > OPT_END()
> > };
> > @@ -4418,6 +4421,7 @@ int cmd_script(int argc, const char **argv)
> > script.tool.throttle = process_throttle_event;
> > script.tool.unthrottle = process_throttle_event;
> > script.tool.ordering_requires_timestamps = true;
> > + script.tool.merge_deferred_callchains = merge_deferred_callchains;
> > session = perf_session__new(&data, &script.tool);
> > if (IS_ERR(session))
> > return PTR_ERR(session);
> > diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> > index 2884187ccbbecfdc..71dc5a070065dd2a 100644
> > --- a/tools/perf/util/callchain.c
> > +++ b/tools/perf/util/callchain.c
> > @@ -1838,3 +1838,32 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
> > }
> > return 0;
> > }
> > +
> > +int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
> > + struct perf_sample *sample_callchain)
> > +{
> > + u64 nr_orig = sample_orig->callchain->nr - 1;
> > + u64 nr_deferred = sample_callchain->callchain->nr;
> > + struct ip_callchain *callchain;
> > +
> > + if (sample_orig->callchain->nr < 2) {
> > + sample_orig->deferred_callchain = false;
> > + return -EINVAL;
> > + }
> > +
> > + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
> > + if (callchain == NULL) {
> > + sample_orig->deferred_callchain = false;
> > + return -ENOMEM;
> > + }
> > +
> > + callchain->nr = nr_orig + nr_deferred;
> > + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
> > + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
> > + /* copy deferred user callchains */
> > + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
> > + nr_deferred * sizeof(u64));
> > +
> > + sample_orig->callchain = callchain;
> > + return 0;
> > +}
> > diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> > index d5ae4fbb7ce5fa44..2a52af8c80ace33c 100644
> > --- a/tools/perf/util/callchain.h
> > +++ b/tools/perf/util/callchain.h
> > @@ -318,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
> > struct perf_sample *sample, int max_stack,
> > bool symbols, callchain_iter_fn cb, void *data);
> >
> > +int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
> > + struct perf_sample *sample_callchain);
> > +
> > #endif /* __PERF_CALLCHAIN_H */
> > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> > index e8217efdda5323c6..03674d2cbd015e4f 100644
> > --- a/tools/perf/util/evlist.c
> > +++ b/tools/perf/util/evlist.c
> > @@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
> > evlist->ctl_fd.pos = -1;
> > evlist->nr_br_cntr = -1;
> > metricgroup__rblist_init(&evlist->metric_events);
> > + INIT_LIST_HEAD(&evlist->deferred_samples);
> > }
> >
> > struct evlist *evlist__new(void)
> > diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> > index 5e71e3dc60423079..911834ae7c2a6f76 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -92,6 +92,8 @@ struct evlist {
> > * of struct metric_expr.
> > */
> > struct rblist metric_events;
> > + /* samples with deferred_callchain would wait here. */
> > + struct list_head deferred_samples;
> > };
> >
> > struct evsel_str_handler {
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index 361e15c1f26a96d0..2e777fd1bcf6707b 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -1285,6 +1285,60 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
> > per_thread);
> > }
> >
> > +struct deferred_event {
> > + struct list_head list;
> > + union perf_event *event;
>
> What's going on with the memory here? Doesn't the event need to be a
> copy to avoid the memory going away in between processing events? It
> is definitely worth commenting. Note, I don't see a copy being made
> for the event here in machines__deliver_event.
It doesn't allocate memory for the event. It rather points to the ring
buffer. But there are places we need to copy. Will update!
Thanks,
Namhyung
>
> > +};
> > +
> > +static int evlist__deliver_deferred_samples(struct evlist *evlist,
> > + const struct perf_tool *tool,
> > + union perf_event *event,
> > + struct perf_sample *sample,
> > + struct machine *machine)
> > +{
> > + struct deferred_event *de, *tmp;
> > + struct evsel *evsel;
> > + int ret = 0;
> > +
> > + if (!tool->merge_deferred_callchains) {
> > + evsel = evlist__id2evsel(evlist, sample->id);
> > + return tool->callchain_deferred(tool, event, sample,
> > + evsel, machine);
> > + }
> > +
> > + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
> > + struct perf_sample orig_sample;
> > +
> > + ret = evlist__parse_sample(evlist, de->event, &orig_sample);
> > + if (ret < 0) {
> > + pr_err("failed to parse original sample\n");
> > + break;
> > + }
> > +
> > + if (sample->tid != orig_sample.tid)
> > + continue;
> > +
> > + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
> > + sample__merge_deferred_callchain(&orig_sample, sample);
> > + else
> > + orig_sample.deferred_callchain = false;
> > +
> > + evsel = evlist__id2evsel(evlist, orig_sample.id);
> > + ret = evlist__deliver_sample(evlist, tool, de->event,
> > + &orig_sample, evsel, machine);
> > +
> > + if (orig_sample.deferred_callchain)
> > + free(orig_sample.callchain);
> > +
> > + list_del(&de->list);
> > + free(de);
> > +
> > + if (ret)
> > + break;
> > + }
> > + return ret;
> > +}
> > +
> > static int machines__deliver_event(struct machines *machines,
> > struct evlist *evlist,
> > union perf_event *event,
> > @@ -1313,6 +1367,16 @@ static int machines__deliver_event(struct machines *machines,
> > return 0;
> > }
> > dump_sample(evsel, event, sample, perf_env__arch(machine->env));
> > + if (sample->deferred_callchain && tool->merge_deferred_callchains) {
> > + struct deferred_event *de = malloc(sizeof(*de));
> > +
> > + if (de == NULL)
> > + return -ENOMEM;
> > +
> > + de->event = event;
> > + list_add_tail(&de->list, &evlist->deferred_samples);
> > + return 0;
> > + }
> > return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
> > case PERF_RECORD_MMAP:
> > return tool->mmap(tool, event, sample, machine);
> > @@ -1372,7 +1436,8 @@ static int machines__deliver_event(struct machines *machines,
> > return tool->aux_output_hw_id(tool, event, sample, machine);
> > case PERF_RECORD_CALLCHAIN_DEFERRED:
> > dump_deferred_callchain(evsel, event, sample);
> > - return tool->callchain_deferred(tool, event, sample, evsel, machine);
> > + return evlist__deliver_deferred_samples(evlist, tool, event,
> > + sample, machine);
> > default:
> > ++evlist->stats.nr_unknown_events;
> > return -1;
> > diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
> > index f732d33e7f895ed4..c5d3b464b2a433b3 100644
> > --- a/tools/perf/util/tool.c
> > +++ b/tools/perf/util/tool.c
> > @@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
> > tool->cgroup_events = false;
> > tool->no_warn = false;
> > tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
> > + tool->merge_deferred_callchains = true;
> >
> > tool->sample = process_event_sample_stub;
> > tool->mmap = process_event_stub;
> > diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> > index 9b9f0a8cbf3de4b5..e96b69d25a5b737d 100644
> > --- a/tools/perf/util/tool.h
> > +++ b/tools/perf/util/tool.h
> > @@ -90,6 +90,7 @@ struct perf_tool {
> > bool cgroup_events;
> > bool no_warn;
> > bool dont_split_sample_group;
> > + bool merge_deferred_callchains;
> > enum show_feature_header show_feat_hdr;
> > };
> >
> > --
> > 2.52.0.rc1.455.g30608eb744-goog
> >
^ permalink raw reply [flat|nested] 13+ messages in thread