* [PATCH v2 1/2] perf inject: Add --convert-callchain option
@ 2026-01-10 1:17 Namhyung Kim
2026-01-10 1:17 ` [PATCH v2 2/2] perf test: Add DWARF callchain conversion test Namhyung Kim
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Namhyung Kim @ 2026-01-10 1:17 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
There are applications not built with frame pointers, so DWARF is needed
to get the stack traces. So `perf record --call-graph dwarf` saves the
stack and register data for each sample to get the stacktrace offline.
But sometimes those data may have sensitive information and we don't
want to keep them in the file.
This perf inject --convert-callchain option parses the callchains and
discard the stack and register after that. This will save storage space
and processing time for the new data file. Of course, users should
remove the original data file. :)
The down side is that it cannot handle inlined callchain entries as they
all have the same IPs. Maybe we can add an option to perf report to
look up inlined functions using DWARF - IIUC it won't requires stack and
register data.
This is an example.
$ perf record --call-graph dwarf -- perf test -w noploop
$ perf report --stdio --no-children --percent-limit=0 > output-prev
$ perf inject -i perf.data --convert-callchain -o perf.data.out
$ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next
$ diff -u output-prev output-next
...
0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
|
- ---elf_dynamic_do_Rela (inlined)
- _dl_relocate_object_no_relro
+ ---_dl_relocate_object_no_relro
_dl_relocate_object
dl_main
_dl_sysdep_start
- _dl_start_final (inlined)
_dl_start
_start
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
v2 changes)
* Use machine__kernel_ip() instead (James)
* Check sample types for DWARF callchains (James)
* Fix build errors (James)
* Add a new test (Ian)
tools/perf/Documentation/perf-inject.txt | 5 +
tools/perf/builtin-inject.c | 151 +++++++++++++++++++++++
2 files changed, 156 insertions(+)
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index c972032f4ca0d248..95dfdf39666efe89 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -109,6 +109,11 @@ include::itrace.txt[]
should be used, and also --buildid-all and --switch-events may be
useful.
+--convert-callchain::
+ Parse DWARF callchains and convert them to usual callchains. This also
+ discards stack and register data from the samples. This will lose
+ inlined callchain entries.
+
:GMEXAMPLECMD: inject
:GMEXAMPLESUBCMD:
include::guestmount.txt[]
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 6080afec537d2178..02bd388d602fdd75 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -122,6 +122,7 @@ struct perf_inject {
bool in_place_update;
bool in_place_update_dry_run;
bool copy_kcore_dir;
+ bool convert_callchain;
const char *input_name;
struct perf_data output;
u64 bytes_written;
@@ -133,6 +134,7 @@ struct perf_inject {
struct guest_session guest_session;
struct strlist *known_build_ids;
const struct evsel *mmap_evsel;
+ struct ip_callchain *raw_callchain;
};
struct event_entry {
@@ -383,6 +385,89 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
+static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+ union perf_event *event_copy = (void *)inject->event_copy;
+ struct callchain_cursor_node *node;
+ struct thread *thread;
+ u64 sample_type = evsel->core.attr.sample_type;
+ u32 sample_size = event->header.size;
+ u64 i, k;
+ int ret;
+
+ if (event_copy == NULL) {
+ inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!inject->event_copy)
+ return -ENOMEM;
+
+ event_copy = (void *)inject->event_copy;
+ }
+
+ if (cursor == NULL)
+ return perf_event__repipe_synth(tool, event);
+
+ callchain_cursor_reset(cursor);
+
+ thread = machine__find_thread(machine, -1, sample->pid);
+ if (thread == NULL)
+ return perf_event__repipe_synth(tool, event);
+
+ /* this will parse DWARF using stack and register data */
+ ret = thread__resolve_callchain(thread, cursor, evsel, sample,
+ /*parent=*/NULL, /*root_al=*/NULL,
+ PERF_MAX_STACK_DEPTH);
+ thread__put(thread);
+ if (ret != 0)
+ return perf_event__repipe_synth(tool, event);
+
+ /* copy kernel callchain and context entries */
+ for (i = 0; i < sample->callchain->nr; i++) {
+ inject->raw_callchain->ips[i] = sample->callchain->ips[i];
+ if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
+ i++;
+ break;
+ }
+ }
+ if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
+ inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
+
+ node = cursor->first;
+ for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
+ if (machine__kernel_ip(machine, node->ip))
+ /* kernel IPs were added already */;
+ else if (node->ms.sym && node->ms.sym->inlined)
+ /* we can't handle inlined callchains */;
+ else
+ inject->raw_callchain->ips[i++] = node->ip;
+
+ node = node->next;
+ }
+
+ inject->raw_callchain->nr = i;
+ sample->callchain = inject->raw_callchain;
+
+ memcpy(event_copy, event, sizeof(event->header));
+
+ /* adjust sample size for stack and regs */
+ sample_size -= sample->user_stack.size;
+ sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
+ sample_size += (sample->callchain->nr + 1) * sizeof(u64);
+ event_copy->header.size = sample_size;
+
+ /* remove sample_type {STACK,REGS}_USER for synthesize */
+ sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
+
+ perf_event__synthesize_sample(event_copy, sample_type,
+ evsel->core.attr.read_format, sample);
+ return perf_event__repipe_synth(tool, event_copy);
+}
+
static struct dso *findnew_dso(int pid, int tid, const char *filename,
const struct dso_id *id, struct machine *machine)
{
@@ -2270,6 +2355,15 @@ static int __cmd_inject(struct perf_inject *inject)
/* Allow space in the header for guest attributes */
output_data_offset += gs->session->header.data_offset;
output_data_offset = roundup(output_data_offset, 4096);
+ } else if (inject->convert_callchain) {
+ inject->tool.sample = perf_event__convert_sample_callchain;
+ inject->tool.fork = perf_event__repipe_fork;
+ inject->tool.comm = perf_event__repipe_comm;
+ inject->tool.exit = perf_event__repipe_exit;
+ inject->tool.mmap = perf_event__repipe_mmap;
+ inject->tool.mmap2 = perf_event__repipe_mmap2;
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
}
if (!inject->itrace_synth_opts.set)
@@ -2322,6 +2416,23 @@ static int __cmd_inject(struct perf_inject *inject)
perf_header__set_feat(&session->header,
HEADER_BRANCH_STACK);
}
+
+ /*
+ * The converted data file won't have stack and registers.
+ * Update the perf_event_attr to remove them before writing.
+ */
+ if (inject->convert_callchain) {
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ evsel__reset_sample_bit(evsel, REGS_USER);
+ evsel__reset_sample_bit(evsel, STACK_USER);
+ evsel->core.attr.sample_regs_user = 0;
+ evsel->core.attr.sample_stack_user = 0;
+ evsel->core.attr.exclude_callchain_user = 0;
+ }
+ }
+
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
@@ -2346,6 +2457,18 @@ static int __cmd_inject(struct perf_inject *inject)
return ret;
}
+static bool evsel__has_dwarf_callchain(struct evsel *evsel)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ const u64 dwarf_callchain_flags =
+ PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
+
+ if (!attr->exclude_callchain_user)
+ return false;
+
+ return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
+}
+
int cmd_inject(int argc, const char **argv)
{
struct perf_inject inject = {
@@ -2414,6 +2537,8 @@ int cmd_inject(int argc, const char **argv)
OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
"guest mount directory under which every guest os"
" instance has a subdir"),
+ OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
+ "Generate callchains using DWARF and drop register/stack data"),
OPT_END()
};
const char * const inject_usage[] = {
@@ -2429,6 +2554,9 @@ int cmd_inject(int argc, const char **argv)
#ifndef HAVE_JITDUMP
set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
+#ifndef HAVE_LIBDW_SUPPORT
+ set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
#endif
argc = parse_options(argc, argv, options, inject_usage, 0);
@@ -2588,6 +2716,28 @@ int cmd_inject(int argc, const char **argv)
}
}
+ if (inject.convert_callchain) {
+ struct evsel *evsel;
+
+ if (inject.output.is_pipe || inject.session->data->is_pipe) {
+ pr_err("--convert-callchain cannot work with pipe\n");
+ goto out_delete;
+ }
+
+ evlist__for_each_entry(inject.session->evlist, evsel) {
+ if (!evsel__has_dwarf_callchain(evsel)) {
+ pr_err("--convert-callchain requires DWARF call graph.\n");
+ goto out_delete;
+ }
+ }
+
+ inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
+ if (inject.raw_callchain == NULL) {
+ pr_err("callchain allocation failed\n");
+ goto out_delete;
+ }
+ }
+
#ifdef HAVE_JITDUMP
if (inject.jit_mode) {
inject.tool.mmap2 = perf_event__repipe_mmap2;
@@ -2618,5 +2768,6 @@ int cmd_inject(int argc, const char **argv)
free(inject.itrace_synth_opts.vm_tm_corr_args);
free(inject.event_copy);
free(inject.guest_session.ev.event_buf);
+ free(inject.raw_callchain);
return ret;
}
--
2.52.0.457.g6b5491de43-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 2/2] perf test: Add DWARF callchain conversion test
2026-01-10 1:17 [PATCH v2 1/2] perf inject: Add --convert-callchain option Namhyung Kim
@ 2026-01-10 1:17 ` Namhyung Kim
2026-01-12 23:15 ` Ian Rogers
2026-01-12 23:15 ` [PATCH v2 1/2] perf inject: Add --convert-callchain option Ian Rogers
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2026-01-10 1:17 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, James Clark
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users
$ ./perf test -vv "DWARF callchain"
87: perf inject to convert DWARF callchains to regular ones:
--- start ---
test child forked, pid 1560328
recording data with DWARF callchain
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.908 MB /tmp/perf-test.nM3WoW (105 samples) ]
convert DWARF callchain using perf inject
compare the both result excluding inlined functions
---- end(0) ----
87: perf inject to convert DWARF callchains to regular ones : Ok
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/tests/shell/inject-callchain.sh | 45 ++++++++++++++++++++++
1 file changed, 45 insertions(+)
create mode 100755 tools/perf/tests/shell/inject-callchain.sh
diff --git a/tools/perf/tests/shell/inject-callchain.sh b/tools/perf/tests/shell/inject-callchain.sh
new file mode 100755
index 0000000000000000..a1cba8010f954d21
--- /dev/null
+++ b/tools/perf/tests/shell/inject-callchain.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# perf inject to convert DWARF callchains to regular ones
+# SPDX-License-Identifier: GPL-2.0
+
+if ! perf check feature -q dwarf; then
+ echo "SKIP: DWARF support is not available"
+ exit 2
+fi
+
+TESTDATA=$(mktemp /tmp/perf-test.XXXXXX)
+
+err=0
+
+cleanup()
+{
+ trap - EXIT TERM INT
+ rm -f ${TESTDATA}*
+}
+
+trap_cleanup()
+{
+ cleanup
+ exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+echo "recording data with DWARF callchain"
+perf record -F 999 --call-graph dwarf -o "${TESTDATA}" -- perf test -w noploop
+
+echo "convert DWARF callchain using perf inject"
+perf inject -i "${TESTDATA}" --convert-callchain -o "${TESTDATA}.new"
+
+perf report -i "${TESTDATA}" --no-children -q --percent-limit=1 > ${TESTDATA}.out
+perf report -i "${TESTDATA}.new" --no-children -q --percent-limit=1 > ${TESTDATA}.new.out
+
+echo "compare the both result excluding inlined functions"
+if diff -u "${TESTDATA}.out" "${TESTDATA}.new.out" | grep "^- " | grep -qv "(inlined)"; then
+ echo "Found some differences"
+ diff -u "${TESTDATA}.out" "${TESTDATA}.new.out"
+ err=1
+fi
+
+cleanup
+exit $err
--
2.52.0.457.g6b5491de43-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-10 1:17 [PATCH v2 1/2] perf inject: Add --convert-callchain option Namhyung Kim
2026-01-10 1:17 ` [PATCH v2 2/2] perf test: Add DWARF callchain conversion test Namhyung Kim
@ 2026-01-12 23:15 ` Ian Rogers
2026-01-13 19:38 ` Arnaldo Carvalho de Melo
2026-01-13 21:35 ` Arnaldo Carvalho de Melo
3 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2026-01-12 23:15 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users
On Fri, Jan 9, 2026 at 5:17 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> There are applications not built with frame pointers, so DWARF is needed
> to get the stack traces. So `perf record --call-graph dwarf` saves the
> stack and register data for each sample to get the stacktrace offline.
> But sometimes those data may have sensitive information and we don't
> want to keep them in the file.
>
> This perf inject --convert-callchain option parses the callchains and
> discard the stack and register after that. This will save storage space
> and processing time for the new data file. Of course, users should
> remove the original data file. :)
>
> The down side is that it cannot handle inlined callchain entries as they
> all have the same IPs. Maybe we can add an option to perf report to
> look up inlined functions using DWARF - IIUC it won't requires stack and
> register data.
>
> This is an example.
>
> $ perf record --call-graph dwarf -- perf test -w noploop
>
> $ perf report --stdio --no-children --percent-limit=0 > output-prev
>
> $ perf inject -i perf.data --convert-callchain -o perf.data.out
>
> $ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next
>
> $ diff -u output-prev output-next
> ...
> 0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
> |
> - ---elf_dynamic_do_Rela (inlined)
> - _dl_relocate_object_no_relro
> + ---_dl_relocate_object_no_relro
> _dl_relocate_object
> dl_main
> _dl_sysdep_start
> - _dl_start_final (inlined)
> _dl_start
> _start
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
As I mentioned in v1 I think things can be better with a delegate
tool, but this is definitely a cool new feature.
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> v2 changes)
> * Use machine__kernel_ip() instead (James)
> * Check sample types for DWARF callchains (James)
> * Fix build errors (James)
> * Add a new test (Ian)
>
> tools/perf/Documentation/perf-inject.txt | 5 +
> tools/perf/builtin-inject.c | 151 +++++++++++++++++++++++
> 2 files changed, 156 insertions(+)
>
> diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
> index c972032f4ca0d248..95dfdf39666efe89 100644
> --- a/tools/perf/Documentation/perf-inject.txt
> +++ b/tools/perf/Documentation/perf-inject.txt
> @@ -109,6 +109,11 @@ include::itrace.txt[]
> should be used, and also --buildid-all and --switch-events may be
> useful.
>
> +--convert-callchain::
> + Parse DWARF callchains and convert them to usual callchains. This also
> + discards stack and register data from the samples. This will lose
> + inlined callchain entries.
> +
> :GMEXAMPLECMD: inject
> :GMEXAMPLESUBCMD:
> include::guestmount.txt[]
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 6080afec537d2178..02bd388d602fdd75 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -122,6 +122,7 @@ struct perf_inject {
> bool in_place_update;
> bool in_place_update_dry_run;
> bool copy_kcore_dir;
> + bool convert_callchain;
> const char *input_name;
> struct perf_data output;
> u64 bytes_written;
> @@ -133,6 +134,7 @@ struct perf_inject {
> struct guest_session guest_session;
> struct strlist *known_build_ids;
> const struct evsel *mmap_evsel;
> + struct ip_callchain *raw_callchain;
> };
>
> struct event_entry {
> @@ -383,6 +385,89 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
> return perf_event__repipe_synth(tool, event);
> }
>
> +static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct evsel *evsel,
> + struct machine *machine)
> +{
> + struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> + struct callchain_cursor *cursor = get_tls_callchain_cursor();
> + union perf_event *event_copy = (void *)inject->event_copy;
> + struct callchain_cursor_node *node;
> + struct thread *thread;
> + u64 sample_type = evsel->core.attr.sample_type;
> + u32 sample_size = event->header.size;
> + u64 i, k;
> + int ret;
> +
> + if (event_copy == NULL) {
> + inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
> + if (!inject->event_copy)
> + return -ENOMEM;
> +
> + event_copy = (void *)inject->event_copy;
> + }
> +
> + if (cursor == NULL)
> + return perf_event__repipe_synth(tool, event);
> +
> + callchain_cursor_reset(cursor);
> +
> + thread = machine__find_thread(machine, -1, sample->pid);
> + if (thread == NULL)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* this will parse DWARF using stack and register data */
> + ret = thread__resolve_callchain(thread, cursor, evsel, sample,
> + /*parent=*/NULL, /*root_al=*/NULL,
> + PERF_MAX_STACK_DEPTH);
> + thread__put(thread);
> + if (ret != 0)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* copy kernel callchain and context entries */
> + for (i = 0; i < sample->callchain->nr; i++) {
> + inject->raw_callchain->ips[i] = sample->callchain->ips[i];
> + if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
> + i++;
> + break;
> + }
> + }
> + if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
> + inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
> +
> + node = cursor->first;
> + for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
> + if (machine__kernel_ip(machine, node->ip))
> + /* kernel IPs were added already */;
> + else if (node->ms.sym && node->ms.sym->inlined)
> + /* we can't handle inlined callchains */;
> + else
> + inject->raw_callchain->ips[i++] = node->ip;
> +
> + node = node->next;
> + }
> +
> + inject->raw_callchain->nr = i;
> + sample->callchain = inject->raw_callchain;
> +
> + memcpy(event_copy, event, sizeof(event->header));
> +
> + /* adjust sample size for stack and regs */
> + sample_size -= sample->user_stack.size;
> + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
> + sample_size += (sample->callchain->nr + 1) * sizeof(u64);
> + event_copy->header.size = sample_size;
> +
> + /* remove sample_type {STACK,REGS}_USER for synthesize */
> + sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
> +
> + perf_event__synthesize_sample(event_copy, sample_type,
> + evsel->core.attr.read_format, sample);
> + return perf_event__repipe_synth(tool, event_copy);
> +}
> +
> static struct dso *findnew_dso(int pid, int tid, const char *filename,
> const struct dso_id *id, struct machine *machine)
> {
> @@ -2270,6 +2355,15 @@ static int __cmd_inject(struct perf_inject *inject)
> /* Allow space in the header for guest attributes */
> output_data_offset += gs->session->header.data_offset;
> output_data_offset = roundup(output_data_offset, 4096);
> + } else if (inject->convert_callchain) {
> + inject->tool.sample = perf_event__convert_sample_callchain;
> + inject->tool.fork = perf_event__repipe_fork;
> + inject->tool.comm = perf_event__repipe_comm;
> + inject->tool.exit = perf_event__repipe_exit;
> + inject->tool.mmap = perf_event__repipe_mmap;
> + inject->tool.mmap2 = perf_event__repipe_mmap2;
> + inject->tool.ordered_events = true;
> + inject->tool.ordering_requires_timestamps = true;
> }
>
> if (!inject->itrace_synth_opts.set)
> @@ -2322,6 +2416,23 @@ static int __cmd_inject(struct perf_inject *inject)
> perf_header__set_feat(&session->header,
> HEADER_BRANCH_STACK);
> }
> +
> + /*
> + * The converted data file won't have stack and registers.
> + * Update the perf_event_attr to remove them before writing.
> + */
> + if (inject->convert_callchain) {
> + struct evsel *evsel;
> +
> + evlist__for_each_entry(session->evlist, evsel) {
> + evsel__reset_sample_bit(evsel, REGS_USER);
> + evsel__reset_sample_bit(evsel, STACK_USER);
> + evsel->core.attr.sample_regs_user = 0;
> + evsel->core.attr.sample_stack_user = 0;
> + evsel->core.attr.exclude_callchain_user = 0;
> + }
> + }
> +
> session->header.data_offset = output_data_offset;
> session->header.data_size = inject->bytes_written;
> perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
> @@ -2346,6 +2457,18 @@ static int __cmd_inject(struct perf_inject *inject)
> return ret;
> }
>
> +static bool evsel__has_dwarf_callchain(struct evsel *evsel)
> +{
> + struct perf_event_attr *attr = &evsel->core.attr;
> + const u64 dwarf_callchain_flags =
> + PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
> +
> + if (!attr->exclude_callchain_user)
> + return false;
> +
> + return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
> +}
> +
> int cmd_inject(int argc, const char **argv)
> {
> struct perf_inject inject = {
> @@ -2414,6 +2537,8 @@ int cmd_inject(int argc, const char **argv)
> OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
> "guest mount directory under which every guest os"
> " instance has a subdir"),
> + OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
> + "Generate callchains using DWARF and drop register/stack data"),
> OPT_END()
> };
> const char * const inject_usage[] = {
> @@ -2429,6 +2554,9 @@ int cmd_inject(int argc, const char **argv)
>
> #ifndef HAVE_JITDUMP
> set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
> +#endif
> +#ifndef HAVE_LIBDW_SUPPORT
> + set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
> #endif
> argc = parse_options(argc, argv, options, inject_usage, 0);
>
> @@ -2588,6 +2716,28 @@ int cmd_inject(int argc, const char **argv)
> }
> }
>
> + if (inject.convert_callchain) {
> + struct evsel *evsel;
> +
> + if (inject.output.is_pipe || inject.session->data->is_pipe) {
> + pr_err("--convert-callchain cannot work with pipe\n");
> + goto out_delete;
> + }
> +
> + evlist__for_each_entry(inject.session->evlist, evsel) {
> + if (!evsel__has_dwarf_callchain(evsel)) {
> + pr_err("--convert-callchain requires DWARF call graph.\n");
> + goto out_delete;
> + }
> + }
> +
> + inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
> + if (inject.raw_callchain == NULL) {
> + pr_err("callchain allocation failed\n");
> + goto out_delete;
> + }
> + }
> +
> #ifdef HAVE_JITDUMP
> if (inject.jit_mode) {
> inject.tool.mmap2 = perf_event__repipe_mmap2;
> @@ -2618,5 +2768,6 @@ int cmd_inject(int argc, const char **argv)
> free(inject.itrace_synth_opts.vm_tm_corr_args);
> free(inject.event_copy);
> free(inject.guest_session.ev.event_buf);
> + free(inject.raw_callchain);
> return ret;
> }
> --
> 2.52.0.457.g6b5491de43-goog
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] perf test: Add DWARF callchain conversion test
2026-01-10 1:17 ` [PATCH v2 2/2] perf test: Add DWARF callchain conversion test Namhyung Kim
@ 2026-01-12 23:15 ` Ian Rogers
0 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2026-01-12 23:15 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, James Clark, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users
On Fri, Jan 9, 2026 at 5:17 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> $ ./perf test -vv "DWARF callchain"
> 87: perf inject to convert DWARF callchains to regular ones:
> --- start ---
> test child forked, pid 1560328
> recording data with DWARF callchain
> [ perf record: Woken up 4 times to write data ]
> [ perf record: Captured and wrote 0.908 MB /tmp/perf-test.nM3WoW (105 samples) ]
> convert DWARF callchain using perf inject
> compare the both result excluding inlined functions
> ---- end(0) ----
> 87: perf inject to convert DWARF callchains to regular ones : Ok
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> ---
> tools/perf/tests/shell/inject-callchain.sh | 45 ++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
> create mode 100755 tools/perf/tests/shell/inject-callchain.sh
>
> diff --git a/tools/perf/tests/shell/inject-callchain.sh b/tools/perf/tests/shell/inject-callchain.sh
> new file mode 100755
> index 0000000000000000..a1cba8010f954d21
> --- /dev/null
> +++ b/tools/perf/tests/shell/inject-callchain.sh
> @@ -0,0 +1,45 @@
> +#!/bin/bash
> +# perf inject to convert DWARF callchains to regular ones
> +# SPDX-License-Identifier: GPL-2.0
> +
> +if ! perf check feature -q dwarf; then
> + echo "SKIP: DWARF support is not available"
> + exit 2
> +fi
> +
> +TESTDATA=$(mktemp /tmp/perf-test.XXXXXX)
> +
> +err=0
> +
> +cleanup()
> +{
> + trap - EXIT TERM INT
> + rm -f ${TESTDATA}*
> +}
> +
> +trap_cleanup()
> +{
> + cleanup
> + exit 1
> +}
> +
> +trap trap_cleanup EXIT TERM INT
> +
> +echo "recording data with DWARF callchain"
> +perf record -F 999 --call-graph dwarf -o "${TESTDATA}" -- perf test -w noploop
> +
> +echo "convert DWARF callchain using perf inject"
> +perf inject -i "${TESTDATA}" --convert-callchain -o "${TESTDATA}.new"
> +
> +perf report -i "${TESTDATA}" --no-children -q --percent-limit=1 > ${TESTDATA}.out
> +perf report -i "${TESTDATA}.new" --no-children -q --percent-limit=1 > ${TESTDATA}.new.out
> +
> +echo "compare the both result excluding inlined functions"
> +if diff -u "${TESTDATA}.out" "${TESTDATA}.new.out" | grep "^- " | grep -qv "(inlined)"; then
> + echo "Found some differences"
> + diff -u "${TESTDATA}.out" "${TESTDATA}.new.out"
> + err=1
> +fi
> +
> +cleanup
> +exit $err
> --
> 2.52.0.457.g6b5491de43-goog
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-10 1:17 [PATCH v2 1/2] perf inject: Add --convert-callchain option Namhyung Kim
2026-01-10 1:17 ` [PATCH v2 2/2] perf test: Add DWARF callchain conversion test Namhyung Kim
2026-01-12 23:15 ` [PATCH v2 1/2] perf inject: Add --convert-callchain option Ian Rogers
@ 2026-01-13 19:38 ` Arnaldo Carvalho de Melo
2026-01-13 21:15 ` Namhyung Kim
2026-01-13 21:35 ` Arnaldo Carvalho de Melo
3 siblings, 1 reply; 9+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-01-13 19:38 UTC (permalink / raw)
To: Namhyung Kim
Cc: Ian Rogers, James Clark, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users
On Fri, Jan 09, 2026 at 05:17:14PM -0800, Namhyung Kim wrote:
> There are applications not built with frame pointers, so DWARF is needed
> to get the stack traces. So `perf record --call-graph dwarf` saves the
> stack and register data for each sample to get the stacktrace offline.
> But sometimes those data may have sensitive information and we don't
> want to keep them in the file.
>
> This perf inject --convert-callchain option parses the callchains and
> discard the stack and register after that. This will save storage space
> and processing time for the new data file. Of course, users should
> remove the original data file. :)
This made me think for a while to finally realize this is not a general
purpose "convert callchain" option, but one that converts to ip-based
callchains specificaly, useful and probably can stay with this name, or
maybe we could use --resolve-callchains as we use
thread__resolve_callchain() for that anyway?
- Arnaldo
> The down side is that it cannot handle inlined callchain entries as they
> all have the same IPs. Maybe we can add an option to perf report to
> look up inlined functions using DWARF - IIUC it won't requires stack and
> register data.
>
> This is an example.
>
> $ perf record --call-graph dwarf -- perf test -w noploop
>
> $ perf report --stdio --no-children --percent-limit=0 > output-prev
>
> $ perf inject -i perf.data --convert-callchain -o perf.data.out
>
> $ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next
>
> $ diff -u output-prev output-next
> ...
> 0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
> |
> - ---elf_dynamic_do_Rela (inlined)
> - _dl_relocate_object_no_relro
> + ---_dl_relocate_object_no_relro
> _dl_relocate_object
> dl_main
> _dl_sysdep_start
> - _dl_start_final (inlined)
> _dl_start
> _start
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> v2 changes)
> * Use machine__kernel_ip() instead (James)
> * Check sample types for DWARF callchains (James)
> * Fix build errors (James)
> * Add a new test (Ian)
>
> tools/perf/Documentation/perf-inject.txt | 5 +
> tools/perf/builtin-inject.c | 151 +++++++++++++++++++++++
> 2 files changed, 156 insertions(+)
>
> diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
> index c972032f4ca0d248..95dfdf39666efe89 100644
> --- a/tools/perf/Documentation/perf-inject.txt
> +++ b/tools/perf/Documentation/perf-inject.txt
> @@ -109,6 +109,11 @@ include::itrace.txt[]
> should be used, and also --buildid-all and --switch-events may be
> useful.
>
> +--convert-callchain::
> + Parse DWARF callchains and convert them to usual callchains. This also
> + discards stack and register data from the samples. This will lose
> + inlined callchain entries.
> +
> :GMEXAMPLECMD: inject
> :GMEXAMPLESUBCMD:
> include::guestmount.txt[]
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 6080afec537d2178..02bd388d602fdd75 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -122,6 +122,7 @@ struct perf_inject {
> bool in_place_update;
> bool in_place_update_dry_run;
> bool copy_kcore_dir;
> + bool convert_callchain;
> const char *input_name;
> struct perf_data output;
> u64 bytes_written;
> @@ -133,6 +134,7 @@ struct perf_inject {
> struct guest_session guest_session;
> struct strlist *known_build_ids;
> const struct evsel *mmap_evsel;
> + struct ip_callchain *raw_callchain;
> };
>
> struct event_entry {
> @@ -383,6 +385,89 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
> return perf_event__repipe_synth(tool, event);
> }
>
> +static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct evsel *evsel,
> + struct machine *machine)
> +{
> + struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> + struct callchain_cursor *cursor = get_tls_callchain_cursor();
> + union perf_event *event_copy = (void *)inject->event_copy;
> + struct callchain_cursor_node *node;
> + struct thread *thread;
> + u64 sample_type = evsel->core.attr.sample_type;
> + u32 sample_size = event->header.size;
> + u64 i, k;
> + int ret;
> +
> + if (event_copy == NULL) {
> + inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
> + if (!inject->event_copy)
> + return -ENOMEM;
> +
> + event_copy = (void *)inject->event_copy;
> + }
> +
> + if (cursor == NULL)
> + return perf_event__repipe_synth(tool, event);
> +
> + callchain_cursor_reset(cursor);
> +
> + thread = machine__find_thread(machine, -1, sample->pid);
> + if (thread == NULL)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* this will parse DWARF using stack and register data */
> + ret = thread__resolve_callchain(thread, cursor, evsel, sample,
> + /*parent=*/NULL, /*root_al=*/NULL,
> + PERF_MAX_STACK_DEPTH);
> + thread__put(thread);
> + if (ret != 0)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* copy kernel callchain and context entries */
> + for (i = 0; i < sample->callchain->nr; i++) {
> + inject->raw_callchain->ips[i] = sample->callchain->ips[i];
> + if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
> + i++;
> + break;
> + }
> + }
> + if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
> + inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
> +
> + node = cursor->first;
> + for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
> + if (machine__kernel_ip(machine, node->ip))
> + /* kernel IPs were added already */;
> + else if (node->ms.sym && node->ms.sym->inlined)
> + /* we can't handle inlined callchains */;
> + else
> + inject->raw_callchain->ips[i++] = node->ip;
> +
> + node = node->next;
> + }
> +
> + inject->raw_callchain->nr = i;
> + sample->callchain = inject->raw_callchain;
> +
> + memcpy(event_copy, event, sizeof(event->header));
> +
> + /* adjust sample size for stack and regs */
> + sample_size -= sample->user_stack.size;
> + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
> + sample_size += (sample->callchain->nr + 1) * sizeof(u64);
> + event_copy->header.size = sample_size;
> +
> + /* remove sample_type {STACK,REGS}_USER for synthesize */
> + sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
> +
> + perf_event__synthesize_sample(event_copy, sample_type,
> + evsel->core.attr.read_format, sample);
> + return perf_event__repipe_synth(tool, event_copy);
> +}
> +
> static struct dso *findnew_dso(int pid, int tid, const char *filename,
> const struct dso_id *id, struct machine *machine)
> {
> @@ -2270,6 +2355,15 @@ static int __cmd_inject(struct perf_inject *inject)
> /* Allow space in the header for guest attributes */
> output_data_offset += gs->session->header.data_offset;
> output_data_offset = roundup(output_data_offset, 4096);
> + } else if (inject->convert_callchain) {
> + inject->tool.sample = perf_event__convert_sample_callchain;
> + inject->tool.fork = perf_event__repipe_fork;
> + inject->tool.comm = perf_event__repipe_comm;
> + inject->tool.exit = perf_event__repipe_exit;
> + inject->tool.mmap = perf_event__repipe_mmap;
> + inject->tool.mmap2 = perf_event__repipe_mmap2;
> + inject->tool.ordered_events = true;
> + inject->tool.ordering_requires_timestamps = true;
> }
>
> if (!inject->itrace_synth_opts.set)
> @@ -2322,6 +2416,23 @@ static int __cmd_inject(struct perf_inject *inject)
> perf_header__set_feat(&session->header,
> HEADER_BRANCH_STACK);
> }
> +
> + /*
> + * The converted data file won't have stack and registers.
> + * Update the perf_event_attr to remove them before writing.
> + */
> + if (inject->convert_callchain) {
> + struct evsel *evsel;
> +
> + evlist__for_each_entry(session->evlist, evsel) {
> + evsel__reset_sample_bit(evsel, REGS_USER);
> + evsel__reset_sample_bit(evsel, STACK_USER);
> + evsel->core.attr.sample_regs_user = 0;
> + evsel->core.attr.sample_stack_user = 0;
> + evsel->core.attr.exclude_callchain_user = 0;
> + }
> + }
> +
> session->header.data_offset = output_data_offset;
> session->header.data_size = inject->bytes_written;
> perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
> @@ -2346,6 +2457,18 @@ static int __cmd_inject(struct perf_inject *inject)
> return ret;
> }
>
> +static bool evsel__has_dwarf_callchain(struct evsel *evsel)
> +{
> + struct perf_event_attr *attr = &evsel->core.attr;
> + const u64 dwarf_callchain_flags =
> + PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
> +
> + if (!attr->exclude_callchain_user)
> + return false;
> +
> + return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
> +}
> +
> int cmd_inject(int argc, const char **argv)
> {
> struct perf_inject inject = {
> @@ -2414,6 +2537,8 @@ int cmd_inject(int argc, const char **argv)
> OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
> "guest mount directory under which every guest os"
> " instance has a subdir"),
> + OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
> + "Generate callchains using DWARF and drop register/stack data"),
> OPT_END()
> };
> const char * const inject_usage[] = {
> @@ -2429,6 +2554,9 @@ int cmd_inject(int argc, const char **argv)
>
> #ifndef HAVE_JITDUMP
> set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
> +#endif
> +#ifndef HAVE_LIBDW_SUPPORT
> + set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
> #endif
> argc = parse_options(argc, argv, options, inject_usage, 0);
>
> @@ -2588,6 +2716,28 @@ int cmd_inject(int argc, const char **argv)
> }
> }
>
> + if (inject.convert_callchain) {
> + struct evsel *evsel;
> +
> + if (inject.output.is_pipe || inject.session->data->is_pipe) {
> + pr_err("--convert-callchain cannot work with pipe\n");
> + goto out_delete;
> + }
> +
> + evlist__for_each_entry(inject.session->evlist, evsel) {
> + if (!evsel__has_dwarf_callchain(evsel)) {
> + pr_err("--convert-callchain requires DWARF call graph.\n");
> + goto out_delete;
> + }
> + }
> +
> + inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
> + if (inject.raw_callchain == NULL) {
> + pr_err("callchain allocation failed\n");
> + goto out_delete;
> + }
> + }
> +
> #ifdef HAVE_JITDUMP
> if (inject.jit_mode) {
> inject.tool.mmap2 = perf_event__repipe_mmap2;
> @@ -2618,5 +2768,6 @@ int cmd_inject(int argc, const char **argv)
> free(inject.itrace_synth_opts.vm_tm_corr_args);
> free(inject.event_copy);
> free(inject.guest_session.ev.event_buf);
> + free(inject.raw_callchain);
> return ret;
> }
> --
> 2.52.0.457.g6b5491de43-goog
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-13 19:38 ` Arnaldo Carvalho de Melo
@ 2026-01-13 21:15 ` Namhyung Kim
2026-01-13 21:24 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 9+ messages in thread
From: Namhyung Kim @ 2026-01-13 21:15 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Ian Rogers, James Clark, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users
Hi Arnaldo,
On Tue, Jan 13, 2026 at 04:38:31PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Jan 09, 2026 at 05:17:14PM -0800, Namhyung Kim wrote:
> > There are applications not built with frame pointers, so DWARF is needed
> > to get the stack traces. So `perf record --call-graph dwarf` saves the
> > stack and register data for each sample to get the stacktrace offline.
> > But sometimes those data may have sensitive information and we don't
> > want to keep them in the file.
> >
> > This perf inject --convert-callchain option parses the callchains and
> > discard the stack and register after that. This will save storage space
> > and processing time for the new data file. Of course, users should
> > remove the original data file. :)
>
> This made me think for a while to finally realize this is not a general
> purpose "convert callchain" option, but one that converts to ip-based
> callchains specificaly, useful and probably can stay with this name, or
> maybe we could use --resolve-callchains as we use
> thread__resolve_callchain() for that anyway?
IIUC resolving callchain is to get DSO/map and symbol from raw data.
But I admit the name is general but have no other idea. :)
Thanks,
Namhyung
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-13 21:15 ` Namhyung Kim
@ 2026-01-13 21:24 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 9+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-01-13 21:24 UTC (permalink / raw)
To: Namhyung Kim
Cc: Ian Rogers, James Clark, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users
On Tue, Jan 13, 2026 at 01:15:06PM -0800, Namhyung Kim wrote:
> On Tue, Jan 13, 2026 at 04:38:31PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Fri, Jan 09, 2026 at 05:17:14PM -0800, Namhyung Kim wrote:
> > > There are applications not built with frame pointers, so DWARF is needed
> > > to get the stack traces. So `perf record --call-graph dwarf` saves the
> > > stack and register data for each sample to get the stacktrace offline.
> > > But sometimes those data may have sensitive information and we don't
> > > want to keep them in the file.
> > > This perf inject --convert-callchain option parses the callchains and
> > > discard the stack and register after that. This will save storage space
> > > and processing time for the new data file. Of course, users should
> > > remove the original data file. :)
> > This made me think for a while to finally realize this is not a general
> > purpose "convert callchain" option, but one that converts to ip-based
> > callchains specificaly, useful and probably can stay with this name, or
> > maybe we could use --resolve-callchains as we use
> > thread__resolve_callchain() for that anyway?
> IIUC resolving callchain is to get DSO/map and symbol from raw data.
> But I admit the name is general but have no other idea. :)
Naming is difficult, I think we can keep as --convert-callchain, if we
ever want to do other kind of conversion than from DWARF to IP we can
just pass arguments to this option, with it without options, as is now,
meaning the most common convertion, the one being done in your patch.
I'll apply the patch as is, thanks.
- Arnaldo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-10 1:17 [PATCH v2 1/2] perf inject: Add --convert-callchain option Namhyung Kim
` (2 preceding siblings ...)
2026-01-13 19:38 ` Arnaldo Carvalho de Melo
@ 2026-01-13 21:35 ` Arnaldo Carvalho de Melo
2026-01-13 22:45 ` Namhyung Kim
3 siblings, 1 reply; 9+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-01-13 21:35 UTC (permalink / raw)
To: Namhyung Kim
Cc: Ian Rogers, James Clark, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users
On Fri, Jan 09, 2026 at 05:17:14PM -0800, Namhyung Kim wrote:
> There are applications not built with frame pointers, so DWARF is needed
> to get the stack traces. So `perf record --call-graph dwarf` saves the
> stack and register data for each sample to get the stacktrace offline.
> But sometimes those data may have sensitive information and we don't
> want to keep them in the file.
>
> This perf inject --convert-callchain option parses the callchains and
> discard the stack and register after that. This will save storage space
> and processing time for the new data file. Of course, users should
> remove the original data file. :)
>
> The down side is that it cannot handle inlined callchain entries as they
> all have the same IPs. Maybe we can add an option to perf report to
> look up inlined functions using DWARF - IIUC it won't requires stack and
> register data.
>
> This is an example.
>
> $ perf record --call-graph dwarf -- perf test -w noploop
>
> $ perf report --stdio --no-children --percent-limit=0 > output-prev
>
> $ perf inject -i perf.data --convert-callchain -o perf.data.out
>
> $ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next
>
> $ diff -u output-prev output-next
> ...
> 0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
> |
> - ---elf_dynamic_do_Rela (inlined)
> - _dl_relocate_object_no_relro
> + ---_dl_relocate_object_no_relro
> _dl_relocate_object
> dl_main
> _dl_sysdep_start
> - _dl_start_final (inlined)
> _dl_start
> _start
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> v2 changes)
> * Use machine__kernel_ip() instead (James)
> * Check sample types for DWARF callchains (James)
> * Fix build errors (James)
> * Add a new test (Ian)
>
> tools/perf/Documentation/perf-inject.txt | 5 +
> tools/perf/builtin-inject.c | 151 +++++++++++++++++++++++
> 2 files changed, 156 insertions(+)
>
> diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
> index c972032f4ca0d248..95dfdf39666efe89 100644
> --- a/tools/perf/Documentation/perf-inject.txt
> +++ b/tools/perf/Documentation/perf-inject.txt
> @@ -109,6 +109,11 @@ include::itrace.txt[]
> should be used, and also --buildid-all and --switch-events may be
> useful.
>
> +--convert-callchain::
> + Parse DWARF callchains and convert them to usual callchains. This also
> + discards stack and register data from the samples. This will lose
> + inlined callchain entries.
> +
> :GMEXAMPLECMD: inject
> :GMEXAMPLESUBCMD:
> include::guestmount.txt[]
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 6080afec537d2178..02bd388d602fdd75 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -122,6 +122,7 @@ struct perf_inject {
> bool in_place_update;
> bool in_place_update_dry_run;
> bool copy_kcore_dir;
> + bool convert_callchain;
> const char *input_name;
> struct perf_data output;
> u64 bytes_written;
> @@ -133,6 +134,7 @@ struct perf_inject {
> struct guest_session guest_session;
> struct strlist *known_build_ids;
> const struct evsel *mmap_evsel;
> + struct ip_callchain *raw_callchain;
> };
>
> struct event_entry {
> @@ -383,6 +385,89 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
> return perf_event__repipe_synth(tool, event);
> }
>
> +static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct evsel *evsel,
> + struct machine *machine)
> +{
> + struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> + struct callchain_cursor *cursor = get_tls_callchain_cursor();
> + union perf_event *event_copy = (void *)inject->event_copy;
> + struct callchain_cursor_node *node;
> + struct thread *thread;
> + u64 sample_type = evsel->core.attr.sample_type;
> + u32 sample_size = event->header.size;
> + u64 i, k;
> + int ret;
> +
> + if (event_copy == NULL) {
> + inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
> + if (!inject->event_copy)
> + return -ENOMEM;
> +
> + event_copy = (void *)inject->event_copy;
> + }
> +
> + if (cursor == NULL)
> + return perf_event__repipe_synth(tool, event);
So when you don't manage to convert you just repipe the whole event,
with all the stack that you're supposed to discard? Shouldn't we do this
adjustment anyway?
+ /* adjust sample size for stack and regs */
+ sample_size -= sample->user_stack.size;
+ sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
+ sample_size += (sample->callchain->nr + 1) * sizeof(u64);
+ event_copy->header.size = sample_size;
I.e. we either manage to convert the stack or we throw it away?
- Arnaldo
> +
> + callchain_cursor_reset(cursor);
> +
> + thread = machine__find_thread(machine, -1, sample->pid);
> + if (thread == NULL)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* this will parse DWARF using stack and register data */
> + ret = thread__resolve_callchain(thread, cursor, evsel, sample,
> + /*parent=*/NULL, /*root_al=*/NULL,
> + PERF_MAX_STACK_DEPTH);
> + thread__put(thread);
> + if (ret != 0)
> + return perf_event__repipe_synth(tool, event);
> +
> + /* copy kernel callchain and context entries */
> + for (i = 0; i < sample->callchain->nr; i++) {
> + inject->raw_callchain->ips[i] = sample->callchain->ips[i];
> + if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
> + i++;
> + break;
> + }
> + }
> + if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
> + inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
> +
> + node = cursor->first;
> + for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
> + if (machine__kernel_ip(machine, node->ip))
> + /* kernel IPs were added already */;
> + else if (node->ms.sym && node->ms.sym->inlined)
> + /* we can't handle inlined callchains */;
> + else
> + inject->raw_callchain->ips[i++] = node->ip;
> +
> + node = node->next;
> + }
> +
> + inject->raw_callchain->nr = i;
> + sample->callchain = inject->raw_callchain;
> +
> + memcpy(event_copy, event, sizeof(event->header));
> +
> + /* adjust sample size for stack and regs */
> + sample_size -= sample->user_stack.size;
> + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
> + sample_size += (sample->callchain->nr + 1) * sizeof(u64);
> + event_copy->header.size = sample_size;
> +
> + /* remove sample_type {STACK,REGS}_USER for synthesize */
> + sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
> +
> + perf_event__synthesize_sample(event_copy, sample_type,
> + evsel->core.attr.read_format, sample);
> + return perf_event__repipe_synth(tool, event_copy);
> +}
> +
> static struct dso *findnew_dso(int pid, int tid, const char *filename,
> const struct dso_id *id, struct machine *machine)
> {
> @@ -2270,6 +2355,15 @@ static int __cmd_inject(struct perf_inject *inject)
> /* Allow space in the header for guest attributes */
> output_data_offset += gs->session->header.data_offset;
> output_data_offset = roundup(output_data_offset, 4096);
> + } else if (inject->convert_callchain) {
> + inject->tool.sample = perf_event__convert_sample_callchain;
> + inject->tool.fork = perf_event__repipe_fork;
> + inject->tool.comm = perf_event__repipe_comm;
> + inject->tool.exit = perf_event__repipe_exit;
> + inject->tool.mmap = perf_event__repipe_mmap;
> + inject->tool.mmap2 = perf_event__repipe_mmap2;
> + inject->tool.ordered_events = true;
> + inject->tool.ordering_requires_timestamps = true;
> }
>
> if (!inject->itrace_synth_opts.set)
> @@ -2322,6 +2416,23 @@ static int __cmd_inject(struct perf_inject *inject)
> perf_header__set_feat(&session->header,
> HEADER_BRANCH_STACK);
> }
> +
> + /*
> + * The converted data file won't have stack and registers.
> + * Update the perf_event_attr to remove them before writing.
> + */
> + if (inject->convert_callchain) {
> + struct evsel *evsel;
> +
> + evlist__for_each_entry(session->evlist, evsel) {
> + evsel__reset_sample_bit(evsel, REGS_USER);
> + evsel__reset_sample_bit(evsel, STACK_USER);
> + evsel->core.attr.sample_regs_user = 0;
> + evsel->core.attr.sample_stack_user = 0;
> + evsel->core.attr.exclude_callchain_user = 0;
> + }
> + }
> +
> session->header.data_offset = output_data_offset;
> session->header.data_size = inject->bytes_written;
> perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
> @@ -2346,6 +2457,18 @@ static int __cmd_inject(struct perf_inject *inject)
> return ret;
> }
>
> +static bool evsel__has_dwarf_callchain(struct evsel *evsel)
> +{
> + struct perf_event_attr *attr = &evsel->core.attr;
> + const u64 dwarf_callchain_flags =
> + PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
> +
> + if (!attr->exclude_callchain_user)
> + return false;
> +
> + return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
> +}
> +
> int cmd_inject(int argc, const char **argv)
> {
> struct perf_inject inject = {
> @@ -2414,6 +2537,8 @@ int cmd_inject(int argc, const char **argv)
> OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
> "guest mount directory under which every guest os"
> " instance has a subdir"),
> + OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
> + "Generate callchains using DWARF and drop register/stack data"),
> OPT_END()
> };
> const char * const inject_usage[] = {
> @@ -2429,6 +2554,9 @@ int cmd_inject(int argc, const char **argv)
>
> #ifndef HAVE_JITDUMP
> set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
> +#endif
> +#ifndef HAVE_LIBDW_SUPPORT
> + set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
> #endif
> argc = parse_options(argc, argv, options, inject_usage, 0);
>
> @@ -2588,6 +2716,28 @@ int cmd_inject(int argc, const char **argv)
> }
> }
>
> + if (inject.convert_callchain) {
> + struct evsel *evsel;
> +
> + if (inject.output.is_pipe || inject.session->data->is_pipe) {
> + pr_err("--convert-callchain cannot work with pipe\n");
> + goto out_delete;
> + }
> +
> + evlist__for_each_entry(inject.session->evlist, evsel) {
> + if (!evsel__has_dwarf_callchain(evsel)) {
> + pr_err("--convert-callchain requires DWARF call graph.\n");
> + goto out_delete;
> + }
> + }
> +
> + inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
> + if (inject.raw_callchain == NULL) {
> + pr_err("callchain allocation failed\n");
> + goto out_delete;
> + }
> + }
> +
> #ifdef HAVE_JITDUMP
> if (inject.jit_mode) {
> inject.tool.mmap2 = perf_event__repipe_mmap2;
> @@ -2618,5 +2768,6 @@ int cmd_inject(int argc, const char **argv)
> free(inject.itrace_synth_opts.vm_tm_corr_args);
> free(inject.event_copy);
> free(inject.guest_session.ev.event_buf);
> + free(inject.raw_callchain);
> return ret;
> }
> --
> 2.52.0.457.g6b5491de43-goog
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] perf inject: Add --convert-callchain option
2026-01-13 21:35 ` Arnaldo Carvalho de Melo
@ 2026-01-13 22:45 ` Namhyung Kim
0 siblings, 0 replies; 9+ messages in thread
From: Namhyung Kim @ 2026-01-13 22:45 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Ian Rogers, James Clark, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users
On Tue, Jan 13, 2026 at 06:35:25PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Jan 09, 2026 at 05:17:14PM -0800, Namhyung Kim wrote:
> > There are applications not built with frame pointers, so DWARF is needed
> > to get the stack traces. So `perf record --call-graph dwarf` saves the
> > stack and register data for each sample to get the stacktrace offline.
> > But sometimes those data may have sensitive information and we don't
> > want to keep them in the file.
> >
> > This perf inject --convert-callchain option parses the callchains and
> > discard the stack and register after that. This will save storage space
> > and processing time for the new data file. Of course, users should
> > remove the original data file. :)
> >
> > The down side is that it cannot handle inlined callchain entries as they
> > all have the same IPs. Maybe we can add an option to perf report to
> > look up inlined functions using DWARF - IIUC it won't requires stack and
> > register data.
> >
> > This is an example.
> >
> > $ perf record --call-graph dwarf -- perf test -w noploop
> >
> > $ perf report --stdio --no-children --percent-limit=0 > output-prev
> >
> > $ perf inject -i perf.data --convert-callchain -o perf.data.out
> >
> > $ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next
> >
> > $ diff -u output-prev output-next
> > ...
> > 0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
> > |
> > - ---elf_dynamic_do_Rela (inlined)
> > - _dl_relocate_object_no_relro
> > + ---_dl_relocate_object_no_relro
> > _dl_relocate_object
> > dl_main
> > _dl_sysdep_start
> > - _dl_start_final (inlined)
> > _dl_start
> > _start
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > v2 changes)
> > * Use machine__kernel_ip() instead (James)
> > * Check sample types for DWARF callchains (James)
> > * Fix build errors (James)
> > * Add a new test (Ian)
> >
> > tools/perf/Documentation/perf-inject.txt | 5 +
> > tools/perf/builtin-inject.c | 151 +++++++++++++++++++++++
> > 2 files changed, 156 insertions(+)
> >
> > diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
> > index c972032f4ca0d248..95dfdf39666efe89 100644
> > --- a/tools/perf/Documentation/perf-inject.txt
> > +++ b/tools/perf/Documentation/perf-inject.txt
> > @@ -109,6 +109,11 @@ include::itrace.txt[]
> > should be used, and also --buildid-all and --switch-events may be
> > useful.
> >
> > +--convert-callchain::
> > + Parse DWARF callchains and convert them to usual callchains. This also
> > + discards stack and register data from the samples. This will lose
> > + inlined callchain entries.
> > +
> > :GMEXAMPLECMD: inject
> > :GMEXAMPLESUBCMD:
> > include::guestmount.txt[]
> > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> > index 6080afec537d2178..02bd388d602fdd75 100644
> > --- a/tools/perf/builtin-inject.c
> > +++ b/tools/perf/builtin-inject.c
> > @@ -122,6 +122,7 @@ struct perf_inject {
> > bool in_place_update;
> > bool in_place_update_dry_run;
> > bool copy_kcore_dir;
> > + bool convert_callchain;
> > const char *input_name;
> > struct perf_data output;
> > u64 bytes_written;
> > @@ -133,6 +134,7 @@ struct perf_inject {
> > struct guest_session guest_session;
> > struct strlist *known_build_ids;
> > const struct evsel *mmap_evsel;
> > + struct ip_callchain *raw_callchain;
> > };
> >
> > struct event_entry {
> > @@ -383,6 +385,89 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
> > return perf_event__repipe_synth(tool, event);
> > }
> >
> > +static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
> > + union perf_event *event,
> > + struct perf_sample *sample,
> > + struct evsel *evsel,
> > + struct machine *machine)
> > +{
> > + struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> > + struct callchain_cursor *cursor = get_tls_callchain_cursor();
> > + union perf_event *event_copy = (void *)inject->event_copy;
> > + struct callchain_cursor_node *node;
> > + struct thread *thread;
> > + u64 sample_type = evsel->core.attr.sample_type;
> > + u32 sample_size = event->header.size;
> > + u64 i, k;
> > + int ret;
> > +
> > + if (event_copy == NULL) {
> > + inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
> > + if (!inject->event_copy)
> > + return -ENOMEM;
> > +
> > + event_copy = (void *)inject->event_copy;
> > + }
> > +
> > + if (cursor == NULL)
> > + return perf_event__repipe_synth(tool, event);
>
> So when you don't manage to convert you just repipe the whole event,
> with all the stack that you're supposed to discard? Shouldn't we do this
> adjustment anyway?
>
> + /* adjust sample size for stack and regs */
> + sample_size -= sample->user_stack.size;
> + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
> + sample_size += (sample->callchain->nr + 1) * sizeof(u64);
> + event_copy->header.size = sample_size;
>
> I.e. we either manage to convert the stack or we throw it away?
Oh.. good point. I think we should just return failue if cursor is NULL
which is unexpected. But thread can be missing or resolve_callchain may
return failure for some reason. I think then we should adjust samples
with kernel stacks only.
Will send v3.
Thanks,
Namhyung
>
> - Arnaldo
>
> > +
> > + callchain_cursor_reset(cursor);
> > +
> > + thread = machine__find_thread(machine, -1, sample->pid);
> > + if (thread == NULL)
> > + return perf_event__repipe_synth(tool, event);
> > +
> > + /* this will parse DWARF using stack and register data */
> > + ret = thread__resolve_callchain(thread, cursor, evsel, sample,
> > + /*parent=*/NULL, /*root_al=*/NULL,
> > + PERF_MAX_STACK_DEPTH);
> > + thread__put(thread);
> > + if (ret != 0)
> > + return perf_event__repipe_synth(tool, event);
> > +
> > + /* copy kernel callchain and context entries */
> > + for (i = 0; i < sample->callchain->nr; i++) {
> > + inject->raw_callchain->ips[i] = sample->callchain->ips[i];
> > + if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
> > + i++;
> > + break;
> > + }
> > + }
> > + if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
> > + inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
> > +
> > + node = cursor->first;
> > + for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
> > + if (machine__kernel_ip(machine, node->ip))
> > + /* kernel IPs were added already */;
> > + else if (node->ms.sym && node->ms.sym->inlined)
> > + /* we can't handle inlined callchains */;
> > + else
> > + inject->raw_callchain->ips[i++] = node->ip;
> > +
> > + node = node->next;
> > + }
> > +
> > + inject->raw_callchain->nr = i;
> > + sample->callchain = inject->raw_callchain;
> > +
> > + memcpy(event_copy, event, sizeof(event->header));
> > +
> > + /* adjust sample size for stack and regs */
> > + sample_size -= sample->user_stack.size;
> > + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
> > + sample_size += (sample->callchain->nr + 1) * sizeof(u64);
> > + event_copy->header.size = sample_size;
> > +
> > + /* remove sample_type {STACK,REGS}_USER for synthesize */
> > + sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
> > +
> > + perf_event__synthesize_sample(event_copy, sample_type,
> > + evsel->core.attr.read_format, sample);
> > + return perf_event__repipe_synth(tool, event_copy);
> > +}
> > +
> > static struct dso *findnew_dso(int pid, int tid, const char *filename,
> > const struct dso_id *id, struct machine *machine)
> > {
> > @@ -2270,6 +2355,15 @@ static int __cmd_inject(struct perf_inject *inject)
> > /* Allow space in the header for guest attributes */
> > output_data_offset += gs->session->header.data_offset;
> > output_data_offset = roundup(output_data_offset, 4096);
> > + } else if (inject->convert_callchain) {
> > + inject->tool.sample = perf_event__convert_sample_callchain;
> > + inject->tool.fork = perf_event__repipe_fork;
> > + inject->tool.comm = perf_event__repipe_comm;
> > + inject->tool.exit = perf_event__repipe_exit;
> > + inject->tool.mmap = perf_event__repipe_mmap;
> > + inject->tool.mmap2 = perf_event__repipe_mmap2;
> > + inject->tool.ordered_events = true;
> > + inject->tool.ordering_requires_timestamps = true;
> > }
> >
> > if (!inject->itrace_synth_opts.set)
> > @@ -2322,6 +2416,23 @@ static int __cmd_inject(struct perf_inject *inject)
> > perf_header__set_feat(&session->header,
> > HEADER_BRANCH_STACK);
> > }
> > +
> > + /*
> > + * The converted data file won't have stack and registers.
> > + * Update the perf_event_attr to remove them before writing.
> > + */
> > + if (inject->convert_callchain) {
> > + struct evsel *evsel;
> > +
> > + evlist__for_each_entry(session->evlist, evsel) {
> > + evsel__reset_sample_bit(evsel, REGS_USER);
> > + evsel__reset_sample_bit(evsel, STACK_USER);
> > + evsel->core.attr.sample_regs_user = 0;
> > + evsel->core.attr.sample_stack_user = 0;
> > + evsel->core.attr.exclude_callchain_user = 0;
> > + }
> > + }
> > +
> > session->header.data_offset = output_data_offset;
> > session->header.data_size = inject->bytes_written;
> > perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
> > @@ -2346,6 +2457,18 @@ static int __cmd_inject(struct perf_inject *inject)
> > return ret;
> > }
> >
> > +static bool evsel__has_dwarf_callchain(struct evsel *evsel)
> > +{
> > + struct perf_event_attr *attr = &evsel->core.attr;
> > + const u64 dwarf_callchain_flags =
> > + PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
> > +
> > + if (!attr->exclude_callchain_user)
> > + return false;
> > +
> > + return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
> > +}
> > +
> > int cmd_inject(int argc, const char **argv)
> > {
> > struct perf_inject inject = {
> > @@ -2414,6 +2537,8 @@ int cmd_inject(int argc, const char **argv)
> > OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
> > "guest mount directory under which every guest os"
> > " instance has a subdir"),
> > + OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
> > + "Generate callchains using DWARF and drop register/stack data"),
> > OPT_END()
> > };
> > const char * const inject_usage[] = {
> > @@ -2429,6 +2554,9 @@ int cmd_inject(int argc, const char **argv)
> >
> > #ifndef HAVE_JITDUMP
> > set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
> > +#endif
> > +#ifndef HAVE_LIBDW_SUPPORT
> > + set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
> > #endif
> > argc = parse_options(argc, argv, options, inject_usage, 0);
> >
> > @@ -2588,6 +2716,28 @@ int cmd_inject(int argc, const char **argv)
> > }
> > }
> >
> > + if (inject.convert_callchain) {
> > + struct evsel *evsel;
> > +
> > + if (inject.output.is_pipe || inject.session->data->is_pipe) {
> > + pr_err("--convert-callchain cannot work with pipe\n");
> > + goto out_delete;
> > + }
> > +
> > + evlist__for_each_entry(inject.session->evlist, evsel) {
> > + if (!evsel__has_dwarf_callchain(evsel)) {
> > + pr_err("--convert-callchain requires DWARF call graph.\n");
> > + goto out_delete;
> > + }
> > + }
> > +
> > + inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
> > + if (inject.raw_callchain == NULL) {
> > + pr_err("callchain allocation failed\n");
> > + goto out_delete;
> > + }
> > + }
> > +
> > #ifdef HAVE_JITDUMP
> > if (inject.jit_mode) {
> > inject.tool.mmap2 = perf_event__repipe_mmap2;
> > @@ -2618,5 +2768,6 @@ int cmd_inject(int argc, const char **argv)
> > free(inject.itrace_synth_opts.vm_tm_corr_args);
> > free(inject.event_copy);
> > free(inject.guest_session.ev.event_buf);
> > + free(inject.raw_callchain);
> > return ret;
> > }
> > --
> > 2.52.0.457.g6b5491de43-goog
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2026-01-13 22:45 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-10 1:17 [PATCH v2 1/2] perf inject: Add --convert-callchain option Namhyung Kim
2026-01-10 1:17 ` [PATCH v2 2/2] perf test: Add DWARF callchain conversion test Namhyung Kim
2026-01-12 23:15 ` Ian Rogers
2026-01-12 23:15 ` [PATCH v2 1/2] perf inject: Add --convert-callchain option Ian Rogers
2026-01-13 19:38 ` Arnaldo Carvalho de Melo
2026-01-13 21:15 ` Namhyung Kim
2026-01-13 21:24 ` Arnaldo Carvalho de Melo
2026-01-13 21:35 ` Arnaldo Carvalho de Melo
2026-01-13 22:45 ` Namhyung Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox