* [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps
@ 2025-04-05 3:12 Howard Chu
2025-04-06 22:54 ` Krzysztof Łopatowski
2025-04-17 8:21 ` Aditya Bodkhe
0 siblings, 2 replies; 3+ messages in thread
From: Howard Chu @ 2025-04-05 3:12 UTC (permalink / raw)
To: peterz
Cc: mingo, acme, namhyung, mark.rutland, alexander.shishkin, jolsa,
irogers, adrian.hunter, kan.liang, james.clark, howardchu95,
charlie, mpetlan, vmolnaro, linux, mhiramat, leo.yan, dima,
linux-perf-users, linux-kernel, krzysztof.m.lopatowski
If perf trace isn't run with '--call-graph' and '--kernel-syscall-graph'
options, e.g.
sudo ./perf trace --call-graph fp --kernel-syscall-graph -a
there is no need for creating kernel maps.
before:
perf $ time sudo ./perf trace -- sleep 1
? ( ): sleep/3462908 ... [continued]: execve()) = 0
...
1001.459 ( ): sleep/3463166 exit_group() = ?
real 0m2.834s
user 0m0.011s
sys 0m0.012s
after:
perf $ time sudo ./perf trace -- sleep 1
? ( ): sleep/3459948 ... [continued]: execve()) = 0
...
1001.471 ( ): 3459948 exit_group() = ?
real 0m1.810s
user 0m0.008s
sys 0m0.015s
I also want to express my gratitude to Krzysztof Łopatowski—his
profiling of the perf trace [1] inspired this patch. I'm not sure why
the discussion stalled, and I apologize for not being able to answer his
questions. Since his findings significantly improve the startup time of
perf trace, I don't want to take credit for that. So Krzysztof, please
let me know if you'd like a Suggested-by: or anything else.
[1]: https://lore.kernel.org/linux-perf-users/CAOQCU67EsHyw_FsqGbRuityahZTSAtWzffU=hLUJ7K=aZ=1hhA@mail.gmail.com/
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Cc: "Krzysztof Łopatowski" <krzysztof.m.lopatowski@gmail.com>
---
tools/perf/builtin-buildid-list.c | 2 +-
tools/perf/builtin-trace.c | 2 +-
tools/perf/tests/code-reading.c | 2 +-
tools/perf/tests/dlfilter-test.c | 2 +-
tools/perf/tests/dwarf-unwind.c | 2 +-
tools/perf/tests/mmap-thread-lookup.c | 2 +-
tools/perf/tests/symbols.c | 2 +-
tools/perf/util/machine.c | 6 +++---
tools/perf/util/machine.h | 2 +-
tools/perf/util/probe-event.c | 2 +-
10 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 52dfacaff8e3..357201d8ef0c 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -47,7 +47,7 @@ static void buildid__show_kernel_maps(void)
{
struct machine *machine;
- machine = machine__new_host();
+ machine = machine__new_host(true);
machine__for_each_kernel_map(machine, buildid__map_cb, NULL);
machine__delete(machine);
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f55a8a6481f2..39f23ce39842 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1963,7 +1963,7 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
if (err)
return err;
- trace->host = machine__new_host();
+ trace->host = machine__new_host(callchain_param.enabled && trace->kernel_syscallchains);
if (trace->host == NULL)
return -ENOMEM;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index cf6edbe697b2..17c7b9f95532 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -654,7 +654,7 @@ static int do_test_code_reading(bool try_kcore)
pid = getpid();
- machine = machine__new_host();
+ machine = machine__new_host(true);
machine->env = &perf_env;
ret = machine__create_kernel_maps(machine);
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index 54f59d1246bc..11f70ebabacf 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -352,7 +352,7 @@ static int test__dlfilter_test(struct test_data *td)
return test_result("Failed to find program symbols", TEST_FAIL);
pr_debug("Creating new host machine structure\n");
- td->machine = machine__new_host();
+ td->machine = machine__new_host(true);
td->machine->env = &perf_env;
td->fd = creat(td->perf_data_file_name, 0644);
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 4803ab2d97ba..b76712f68403 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -203,7 +203,7 @@ noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused,
struct thread *thread;
int err = -1;
- machine = machine__new_host();
+ machine = machine__new_host(true);
if (!machine) {
pr_err("Could not get machine\n");
return -1;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index ddd1da9a4ba9..194b5affaa41 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -167,7 +167,7 @@ static int mmap_events(synth_cb synth)
*/
TEST_ASSERT_VAL("failed to create threads", !threads_create());
- machine = machine__new_host();
+ machine = machine__new_host(true);
dump_trace = verbose > 1 ? 1 : 0;
diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c
index ee20a366f32f..6b22a451211a 100644
--- a/tools/perf/tests/symbols.c
+++ b/tools/perf/tests/symbols.c
@@ -19,7 +19,7 @@ struct test_info {
static int init_test_info(struct test_info *ti)
{
- ti->machine = machine__new_host();
+ ti->machine = machine__new_host(true);
if (!ti->machine) {
pr_debug("machine__new_host() failed!\n");
return TEST_FAIL;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 55d4977b9913..b1828e3922f2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -125,14 +125,14 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
return 0;
}
-struct machine *machine__new_host(void)
+struct machine *machine__new_host(bool create_kmaps)
{
struct machine *machine = malloc(sizeof(*machine));
if (machine != NULL) {
machine__init(machine, "", HOST_KERNEL_ID);
- if (machine__create_kernel_maps(machine) < 0)
+ if (create_kmaps && machine__create_kernel_maps(machine) < 0)
goto out_delete;
machine->env = &perf_env;
@@ -146,7 +146,7 @@ struct machine *machine__new_host(void)
struct machine *machine__new_kallsyms(void)
{
- struct machine *machine = machine__new_host();
+ struct machine *machine = machine__new_host(true);
/*
* FIXME:
* 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index ae3e5542d57d..e6faf8cd06e7 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -163,7 +163,7 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
-struct machine *machine__new_host(void);
+struct machine *machine__new_host(bool create_kmaps);
struct machine *machine__new_kallsyms(void);
int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
void machine__exit(struct machine *machine);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 307ad6242a4e..6b5b5542f454 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -94,7 +94,7 @@ int init_probe_symbol_maps(bool user_only)
if (symbol_conf.vmlinux_name)
pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
- host_machine = machine__new_host();
+ host_machine = machine__new_host(true);
if (!host_machine) {
pr_debug("machine__new_host() failed.\n");
symbol__exit();
--
2.45.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps
2025-04-05 3:12 [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps Howard Chu
@ 2025-04-06 22:54 ` Krzysztof Łopatowski
2025-04-17 8:21 ` Aditya Bodkhe
1 sibling, 0 replies; 3+ messages in thread
From: Krzysztof Łopatowski @ 2025-04-06 22:54 UTC (permalink / raw)
To: Howard Chu
Cc: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
jolsa, irogers, adrian.hunter, kan.liang, james.clark, charlie,
mpetlan, vmolnaro, linux, mhiramat, leo.yan, dima,
linux-perf-users, linux-kernel
Thank you Howard,
I much appreciate your work.
> I also want to express my gratitude to Krzysztof Łopatowski—his
> profiling of the perf trace [1] inspired this patch.
:-)
> I'm not sure why the discussion stalled, and I apologize for
> not being able to answer his questions.
I believe the conversation stalled because it touched a piece of code that
has been forgotten/abandoned.
Perhaps because the mechanism of setting trampolines in the kernel changed.
I'm not sure about that.
> Since his findings significantly improve the startup time of
> perf trace, I don't want to take credit for that.
> So Krzysztof, please let me know if you'd like a Suggested-by:
> or anything else.
I'd appreciate a Suggested-by: credit.
A while ago I also tried to refactor perf's code to improve the startup
time even more by merging all invocations of kallsyms__parse.
I gave up on that for now.
But I also investigated the possibility of removing dependency on
/proc/kallsyms. You can find my findings in [2].
Basic idea is to use a built-in feature of printk [3].
We can use this either in a custom kernel module or
(in a limited way) in an eBPF program to get name of a kernel symbol
on demand from raw kpointer.
It's a question to the more knowledgeable programmers here if such an
approach could work in perf? I didn't share it here before because
I hoped to get some feedback from the BCC guys first.
Best regards,
Krzysztof
[1]: https://lore.kernel.org/linux-perf-users/CAOQCU67EsHyw_FsqGbRuityahZTSAtWzffU=hLUJ7K=aZ=1hhA@mail.gmail.com/
[2]: https://github.com/iovisor/bcc/pull/5217
[3]: https://docs.kernel.org/core-api/printk-formats.html#symbols-function-pointers
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps
2025-04-05 3:12 [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps Howard Chu
2025-04-06 22:54 ` Krzysztof Łopatowski
@ 2025-04-17 8:21 ` Aditya Bodkhe
1 sibling, 0 replies; 3+ messages in thread
From: Aditya Bodkhe @ 2025-04-17 8:21 UTC (permalink / raw)
To: Howard Chu, peterz
Cc: mingo, acme, namhyung, mark.rutland, alexander.shishkin, jolsa,
irogers, adrian.hunter, kan.liang, james.clark, charlie, mpetlan,
vmolnaro, linux, mhiramat, leo.yan, dima, linux-perf-users,
linux-kernel, krzysztof.m.lopatowski
Hi Howard
I tested this patch on powerpc platform and got following results
Average real time over 10 runs : 2.130 Seconds (Without Patch)
Average real time over 10 runs : 2.062 Secomds (With Patch)
Am i missing something here
Thanks
Aditya
On 05/04/25 8:42 am, Howard Chu wrote:
> If perf trace isn't run with '--call-graph' and '--kernel-syscall-graph'
> options, e.g.
>
> sudo ./perf trace --call-graph fp --kernel-syscall-graph -a
>
> there is no need for creating kernel maps.
>
> before:
>
> perf $ time sudo ./perf trace -- sleep 1
> ? ( ): sleep/3462908 ... [continued]: execve()) = 0
> ...
> 1001.459 ( ): sleep/3463166 exit_group() = ?
>
> real 0m2.834s
> user 0m0.011s
> sys 0m0.012s
>
> after:
>
> perf $ time sudo ./perf trace -- sleep 1
> ? ( ): sleep/3459948 ... [continued]: execve()) = 0
> ...
> 1001.471 ( ): 3459948 exit_group() = ?
>
> real 0m1.810s
> user 0m0.008s
> sys 0m0.015s
>
> I also want to express my gratitude to Krzysztof Łopatowski—his
> profiling of the perf trace [1] inspired this patch. I'm not sure why
> the discussion stalled, and I apologize for not being able to answer his
> questions. Since his findings significantly improve the startup time of
> perf trace, I don't want to take credit for that. So Krzysztof, please
> let me know if you'd like a Suggested-by: or anything else.
>
> [1]: https://lore.kernel.org/linux-perf-users/CAOQCU67EsHyw_FsqGbRuityahZTSAtWzffU=hLUJ7K=aZ=1hhA@mail.gmail.com/
> Signed-off-by: Howard Chu <howardchu95@gmail.com>
> Cc: "Krzysztof Łopatowski" <krzysztof.m.lopatowski@gmail.com>
> ---
> tools/perf/builtin-buildid-list.c | 2 +-
> tools/perf/builtin-trace.c | 2 +-
> tools/perf/tests/code-reading.c | 2 +-
> tools/perf/tests/dlfilter-test.c | 2 +-
> tools/perf/tests/dwarf-unwind.c | 2 +-
> tools/perf/tests/mmap-thread-lookup.c | 2 +-
> tools/perf/tests/symbols.c | 2 +-
> tools/perf/util/machine.c | 6 +++---
> tools/perf/util/machine.h | 2 +-
> tools/perf/util/probe-event.c | 2 +-
> 10 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
> index 52dfacaff8e3..357201d8ef0c 100644
> --- a/tools/perf/builtin-buildid-list.c
> +++ b/tools/perf/builtin-buildid-list.c
> @@ -47,7 +47,7 @@ static void buildid__show_kernel_maps(void)
> {
> struct machine *machine;
>
> - machine = machine__new_host();
> + machine = machine__new_host(true);
> machine__for_each_kernel_map(machine, buildid__map_cb, NULL);
> machine__delete(machine);
> }
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index f55a8a6481f2..39f23ce39842 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1963,7 +1963,7 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
> if (err)
> return err;
>
> - trace->host = machine__new_host();
> + trace->host = machine__new_host(callchain_param.enabled && trace->kernel_syscallchains);
> if (trace->host == NULL)
> return -ENOMEM;
>
> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> index cf6edbe697b2..17c7b9f95532 100644
> --- a/tools/perf/tests/code-reading.c
> +++ b/tools/perf/tests/code-reading.c
> @@ -654,7 +654,7 @@ static int do_test_code_reading(bool try_kcore)
>
> pid = getpid();
>
> - machine = machine__new_host();
> + machine = machine__new_host(true);
> machine->env = &perf_env;
>
> ret = machine__create_kernel_maps(machine);
> diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
> index 54f59d1246bc..11f70ebabacf 100644
> --- a/tools/perf/tests/dlfilter-test.c
> +++ b/tools/perf/tests/dlfilter-test.c
> @@ -352,7 +352,7 @@ static int test__dlfilter_test(struct test_data *td)
> return test_result("Failed to find program symbols", TEST_FAIL);
>
> pr_debug("Creating new host machine structure\n");
> - td->machine = machine__new_host();
> + td->machine = machine__new_host(true);
> td->machine->env = &perf_env;
>
> td->fd = creat(td->perf_data_file_name, 0644);
> diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
> index 4803ab2d97ba..b76712f68403 100644
> --- a/tools/perf/tests/dwarf-unwind.c
> +++ b/tools/perf/tests/dwarf-unwind.c
> @@ -203,7 +203,7 @@ noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused,
> struct thread *thread;
> int err = -1;
>
> - machine = machine__new_host();
> + machine = machine__new_host(true);
> if (!machine) {
> pr_err("Could not get machine\n");
> return -1;
> diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
> index ddd1da9a4ba9..194b5affaa41 100644
> --- a/tools/perf/tests/mmap-thread-lookup.c
> +++ b/tools/perf/tests/mmap-thread-lookup.c
> @@ -167,7 +167,7 @@ static int mmap_events(synth_cb synth)
> */
> TEST_ASSERT_VAL("failed to create threads", !threads_create());
>
> - machine = machine__new_host();
> + machine = machine__new_host(true);
>
> dump_trace = verbose > 1 ? 1 : 0;
>
> diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c
> index ee20a366f32f..6b22a451211a 100644
> --- a/tools/perf/tests/symbols.c
> +++ b/tools/perf/tests/symbols.c
> @@ -19,7 +19,7 @@ struct test_info {
>
> static int init_test_info(struct test_info *ti)
> {
> - ti->machine = machine__new_host();
> + ti->machine = machine__new_host(true);
> if (!ti->machine) {
> pr_debug("machine__new_host() failed!\n");
> return TEST_FAIL;
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 55d4977b9913..b1828e3922f2 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -125,14 +125,14 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
> return 0;
> }
>
> -struct machine *machine__new_host(void)
> +struct machine *machine__new_host(bool create_kmaps)
> {
> struct machine *machine = malloc(sizeof(*machine));
>
> if (machine != NULL) {
> machine__init(machine, "", HOST_KERNEL_ID);
>
> - if (machine__create_kernel_maps(machine) < 0)
> + if (create_kmaps && machine__create_kernel_maps(machine) < 0)
> goto out_delete;
>
> machine->env = &perf_env;
> @@ -146,7 +146,7 @@ struct machine *machine__new_host(void)
>
> struct machine *machine__new_kallsyms(void)
> {
> - struct machine *machine = machine__new_host();
> + struct machine *machine = machine__new_host(true);
> /*
> * FIXME:
> * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
> diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
> index ae3e5542d57d..e6faf8cd06e7 100644
> --- a/tools/perf/util/machine.h
> +++ b/tools/perf/util/machine.h
> @@ -163,7 +163,7 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
> void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
> void machines__set_comm_exec(struct machines *machines, bool comm_exec);
>
> -struct machine *machine__new_host(void);
> +struct machine *machine__new_host(bool create_kmaps);
> struct machine *machine__new_kallsyms(void);
> int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
> void machine__exit(struct machine *machine);
> diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
> index 307ad6242a4e..6b5b5542f454 100644
> --- a/tools/perf/util/probe-event.c
> +++ b/tools/perf/util/probe-event.c
> @@ -94,7 +94,7 @@ int init_probe_symbol_maps(bool user_only)
> if (symbol_conf.vmlinux_name)
> pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
>
> - host_machine = machine__new_host();
> + host_machine = machine__new_host(true);
> if (!host_machine) {
> pr_debug("machine__new_host() failed.\n");
> symbol__exit();
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-04-17 8:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-05 3:12 [PATCH v1] perf trace: Speed up startup time by bypassing the creation of kernel maps Howard Chu
2025-04-06 22:54 ` Krzysztof Łopatowski
2025-04-17 8:21 ` Aditya Bodkhe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).