* [PATCH 1/4] perf ftrace: Add 'tail' option to --graph-opts
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
@ 2024-07-29 0:41 ` Namhyung Kim
2024-07-29 0:41 ` [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable() Namhyung Kim
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2024-07-29 0:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Changbin Du
The 'graph-tail' option is to print function name as a comment at the end.
This is useful when a large function is mixed with other functions
(possibly from different CPUs).
For example,
$ sudo perf ftrace -- perf stat true
...
1) | get_unused_fd_flags() {
1) | alloc_fd() {
1) 0.178 us | _raw_spin_lock();
1) 0.187 us | expand_files();
1) 0.169 us | _raw_spin_unlock();
1) 1.211 us | }
1) 1.503 us | }
$ sudo perf ftrace --graph-opts tail -- perf stat true
...
1) | get_unused_fd_flags() {
1) | alloc_fd() {
1) 0.099 us | _raw_spin_lock();
1) 0.083 us | expand_files();
1) 0.081 us | _raw_spin_unlock();
1) 0.601 us | } /* alloc_fd */
1) 0.751 us | } /* get_unused_fd_flags */
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-ftrace.txt | 1 +
tools/perf/builtin-ftrace.c | 18 ++++++++++++++++++
tools/perf/util/ftrace.h | 1 +
3 files changed, 20 insertions(+)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index d780b93fcf87..7ea1645a13cf 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -125,6 +125,7 @@ OPTIONS for 'perf ftrace trace'
- verbose - Show process names, PIDs, timestamps, etc.
- thresh=<n> - Setup trace duration threshold in microseconds.
- depth=<n> - Set max depth for function graph tracer to follow.
+ - tail - Print function name at the end.
OPTIONS for 'perf ftrace latency'
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index eb30c8eca488..33c52ebda2fd 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -228,6 +228,7 @@ static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
write_tracing_option_file("funcgraph-irqs", "1");
write_tracing_option_file("funcgraph-proc", "0");
write_tracing_option_file("funcgraph-abstime", "0");
+ write_tracing_option_file("funcgraph-tail", "0");
write_tracing_option_file("latency-format", "0");
write_tracing_option_file("irq-info", "0");
}
@@ -464,6 +465,17 @@ static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
return 0;
}
+static int set_tracing_funcgraph_tail(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_tail)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-tail", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
static int set_tracing_thresh(struct perf_ftrace *ftrace)
{
int ret;
@@ -540,6 +552,11 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return -1;
}
+ if (set_tracing_funcgraph_tail(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-tail\n");
+ return -1;
+ }
+
return 0;
}
@@ -1099,6 +1116,7 @@ static int parse_graph_tracer_opts(const struct option *opt,
{ .name = "verbose", .value_ptr = &ftrace->graph_verbose },
{ .name = "thresh", .value_ptr = &ftrace->graph_thresh },
{ .name = "depth", .value_ptr = &ftrace->graph_depth },
+ { .name = "tail", .value_ptr = &ftrace->graph_tail },
{ .name = NULL, }
};
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 558efcb98d25..2515e841c64c 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -25,6 +25,7 @@ struct perf_ftrace {
int graph_noirqs;
int graph_verbose;
int graph_thresh;
+ int graph_tail;
};
struct filter_entry {
--
2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable()
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
2024-07-29 0:41 ` [PATCH 1/4] perf ftrace: Add 'tail' option to --graph-opts Namhyung Kim
@ 2024-07-29 0:41 ` Namhyung Kim
2024-07-29 18:22 ` Ian Rogers
2024-07-29 0:41 ` [PATCH 3/4] perf ftrace: Add 'profile' command Namhyung Kim
` (3 subsequent siblings)
5 siblings, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2024-07-29 0:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Changbin Du
The check is a common part of the ftrace commands, let's move it out.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-ftrace.c | 44 +++++++++++++++++--------------------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 33c52ebda2fd..978608ecd89c 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -59,6 +59,22 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
done = true;
}
+static int check_ftrace_capable(void)
+{
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
+ pr_err("ftrace only works for %s!\n",
+#ifdef HAVE_LIBCAP_SUPPORT
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
+#else
+ "root"
+#endif
+ );
+ return -1;
+ }
+ return 0;
+}
+
static int __write_tracing_file(const char *name, const char *val, bool append)
{
char *file;
@@ -586,18 +602,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
.events = POLLIN,
};
- if (!(perf_cap__capable(CAP_PERFMON) ||
- perf_cap__capable(CAP_SYS_ADMIN))) {
- pr_err("ftrace only works for %s!\n",
-#ifdef HAVE_LIBCAP_SUPPORT
- "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
-#else
- "root"
-#endif
- );
- return -1;
- }
-
select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
@@ -902,18 +906,6 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
};
int buckets[NUM_BUCKET] = { };
- if (!(perf_cap__capable(CAP_PERFMON) ||
- perf_cap__capable(CAP_SYS_ADMIN))) {
- pr_err("ftrace only works for %s!\n",
-#ifdef HAVE_LIBCAP_SUPPORT
- "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
-#else
- "root"
-#endif
- );
- return -1;
- }
-
trace_fd = prepare_func_latency(ftrace);
if (trace_fd < 0)
goto out;
@@ -1220,6 +1212,10 @@ int cmd_ftrace(int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
+ ret = check_ftrace_capable();
+ if (ret < 0)
+ return -1;
+
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
--
2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable()
2024-07-29 0:41 ` [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable() Namhyung Kim
@ 2024-07-29 18:22 ` Ian Rogers
2024-07-30 16:07 ` Namhyung Kim
0 siblings, 1 reply; 10+ messages in thread
From: Ian Rogers @ 2024-07-29 18:22 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, Kan Liang, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Changbin Du
On Sun, Jul 28, 2024 at 5:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> The check is a common part of the ftrace commands, let's move it out.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
The whole ifdefs and perf_cap__capable seemed wrong, not least as we'd
like the error message to be accurate not just when libcap is present,
so I sent out;
https://lore.kernel.org/lkml/20240729181931.2870851-1-irogers@google.com/
which I think can supersede this.
Thanks,
Ian
> ---
> tools/perf/builtin-ftrace.c | 44 +++++++++++++++++--------------------
> 1 file changed, 20 insertions(+), 24 deletions(-)
>
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index 33c52ebda2fd..978608ecd89c 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -59,6 +59,22 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
> done = true;
> }
>
> +static int check_ftrace_capable(void)
> +{
> + if (!(perf_cap__capable(CAP_PERFMON) ||
> + perf_cap__capable(CAP_SYS_ADMIN))) {
> + pr_err("ftrace only works for %s!\n",
> +#ifdef HAVE_LIBCAP_SUPPORT
> + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> +#else
> + "root"
> +#endif
> + );
> + return -1;
> + }
> + return 0;
> +}
> +
> static int __write_tracing_file(const char *name, const char *val, bool append)
> {
> char *file;
> @@ -586,18 +602,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
> .events = POLLIN,
> };
>
> - if (!(perf_cap__capable(CAP_PERFMON) ||
> - perf_cap__capable(CAP_SYS_ADMIN))) {
> - pr_err("ftrace only works for %s!\n",
> -#ifdef HAVE_LIBCAP_SUPPORT
> - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> -#else
> - "root"
> -#endif
> - );
> - return -1;
> - }
> -
> select_tracer(ftrace);
>
> if (reset_tracing_files(ftrace) < 0) {
> @@ -902,18 +906,6 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
> };
> int buckets[NUM_BUCKET] = { };
>
> - if (!(perf_cap__capable(CAP_PERFMON) ||
> - perf_cap__capable(CAP_SYS_ADMIN))) {
> - pr_err("ftrace only works for %s!\n",
> -#ifdef HAVE_LIBCAP_SUPPORT
> - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> -#else
> - "root"
> -#endif
> - );
> - return -1;
> - }
> -
> trace_fd = prepare_func_latency(ftrace);
> if (trace_fd < 0)
> goto out;
> @@ -1220,6 +1212,10 @@ int cmd_ftrace(int argc, const char **argv)
> signal(SIGCHLD, sig_handler);
> signal(SIGPIPE, sig_handler);
>
> + ret = check_ftrace_capable();
> + if (ret < 0)
> + return -1;
> +
> ret = perf_config(perf_ftrace_config, &ftrace);
> if (ret < 0)
> return -1;
> --
> 2.46.0.rc1.232.g9752f9e123-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable()
2024-07-29 18:22 ` Ian Rogers
@ 2024-07-30 16:07 ` Namhyung Kim
0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2024-07-30 16:07 UTC (permalink / raw)
To: Ian Rogers
Cc: Arnaldo Carvalho de Melo, Kan Liang, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Changbin Du
Hi Ian,
On Mon, Jul 29, 2024 at 11:23 AM Ian Rogers <irogers@google.com> wrote:
>
> On Sun, Jul 28, 2024 at 5:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > The check is a common part of the ftrace commands, let's move it out.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
>
> The whole ifdefs and perf_cap__capable seemed wrong, not least as we'd
> like the error message to be accurate not just when libcap is present,
> so I sent out;
> https://lore.kernel.org/lkml/20240729181931.2870851-1-irogers@google.com/
> which I think can supersede this.
Thanks for the change! I'll reply to the patch.
Namhyung
>
> > ---
> > tools/perf/builtin-ftrace.c | 44 +++++++++++++++++--------------------
> > 1 file changed, 20 insertions(+), 24 deletions(-)
> >
> > diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> > index 33c52ebda2fd..978608ecd89c 100644
> > --- a/tools/perf/builtin-ftrace.c
> > +++ b/tools/perf/builtin-ftrace.c
> > @@ -59,6 +59,22 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
> > done = true;
> > }
> >
> > +static int check_ftrace_capable(void)
> > +{
> > + if (!(perf_cap__capable(CAP_PERFMON) ||
> > + perf_cap__capable(CAP_SYS_ADMIN))) {
> > + pr_err("ftrace only works for %s!\n",
> > +#ifdef HAVE_LIBCAP_SUPPORT
> > + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> > +#else
> > + "root"
> > +#endif
> > + );
> > + return -1;
> > + }
> > + return 0;
> > +}
> > +
> > static int __write_tracing_file(const char *name, const char *val, bool append)
> > {
> > char *file;
> > @@ -586,18 +602,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
> > .events = POLLIN,
> > };
> >
> > - if (!(perf_cap__capable(CAP_PERFMON) ||
> > - perf_cap__capable(CAP_SYS_ADMIN))) {
> > - pr_err("ftrace only works for %s!\n",
> > -#ifdef HAVE_LIBCAP_SUPPORT
> > - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> > -#else
> > - "root"
> > -#endif
> > - );
> > - return -1;
> > - }
> > -
> > select_tracer(ftrace);
> >
> > if (reset_tracing_files(ftrace) < 0) {
> > @@ -902,18 +906,6 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
> > };
> > int buckets[NUM_BUCKET] = { };
> >
> > - if (!(perf_cap__capable(CAP_PERFMON) ||
> > - perf_cap__capable(CAP_SYS_ADMIN))) {
> > - pr_err("ftrace only works for %s!\n",
> > -#ifdef HAVE_LIBCAP_SUPPORT
> > - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
> > -#else
> > - "root"
> > -#endif
> > - );
> > - return -1;
> > - }
> > -
> > trace_fd = prepare_func_latency(ftrace);
> > if (trace_fd < 0)
> > goto out;
> > @@ -1220,6 +1212,10 @@ int cmd_ftrace(int argc, const char **argv)
> > signal(SIGCHLD, sig_handler);
> > signal(SIGPIPE, sig_handler);
> >
> > + ret = check_ftrace_capable();
> > + if (ret < 0)
> > + return -1;
> > +
> > ret = perf_config(perf_ftrace_config, &ftrace);
> > if (ret < 0)
> > return -1;
> > --
> > 2.46.0.rc1.232.g9752f9e123-goog
> >
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 3/4] perf ftrace: Add 'profile' command
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
2024-07-29 0:41 ` [PATCH 1/4] perf ftrace: Add 'tail' option to --graph-opts Namhyung Kim
2024-07-29 0:41 ` [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable() Namhyung Kim
@ 2024-07-29 0:41 ` Namhyung Kim
2024-07-29 0:41 ` [PATCH 4/4] perf ftrace: Add -s/--sort option to profile sub-command Namhyung Kim
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2024-07-29 0:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Changbin Du
The 'perf ftrace profile' command is to get function execution profiles
using function-graph tracer so that users can see the total, average,
max execution time as well as the number of invocation easily.
The following is a profile for perf_event_open syscall.
$ sudo perf ftrace profile -G __x64_sys_perf_event_open -- \
perf stat -e cycles -C1 true 2> /dev/null | head
# Total (us) Avg (us) Max (us) Count Function
65.611 65.611 65.611 1 __x64_sys_perf_event_open
30.527 30.527 30.527 1 anon_inode_getfile
30.260 30.260 30.260 1 __anon_inode_getfile
29.700 29.700 29.700 1 alloc_file_pseudo
17.578 17.578 17.578 1 d_alloc_pseudo
17.382 17.382 17.382 1 __d_alloc
16.738 16.738 16.738 1 kmem_cache_alloc_lru
15.686 15.686 15.686 1 perf_event_alloc
14.012 7.006 11.264 2 obj_cgroup_charge
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-ftrace.txt | 42 ++-
tools/perf/builtin-ftrace.c | 318 ++++++++++++++++++++++-
tools/perf/util/ftrace.h | 2 +
3 files changed, 359 insertions(+), 3 deletions(-)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 7ea1645a13cf..33f32467f287 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,7 +9,7 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
-'perf ftrace' {trace|latency} <command>
+'perf ftrace' {trace|latency|profile} <command>
DESCRIPTION
-----------
@@ -23,6 +23,9 @@ kernel's ftrace infrastructure.
'perf ftrace latency' calculates execution latency of a given function
(optionally with BPF) and display it as a histogram.
+ 'perf ftrace profile' show a execution profile for each function including
+ total, average, max time and the number of calls.
+
The following options apply to perf ftrace.
COMMON OPTIONS
@@ -146,6 +149,43 @@ OPTIONS for 'perf ftrace latency'
Use nano-second instead of micro-second as a base unit of the histogram.
+OPTIONS for 'perf ftrace profile'
+---------------------------------
+
+-T::
+--trace-funcs=::
+ Set function filter on the given function (or a glob pattern).
+ Multiple functions can be given by using this option more than once.
+ The function argument also can be a glob pattern. It will be passed
+ to 'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+ Do not trace functions given by the argument. Like -T option, this
+ can be used more than once to specify multiple functions (or glob
+ patterns). It will be passed to 'set_ftrace_notrace' in tracefs.
+
+-G::
+--graph-funcs=::
+ Set graph filter on the given function (or a glob pattern). This is
+ useful to trace for functions executed from the given function. This
+ can be used more than once to specify multiple functions. It will be
+ passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+ Set graph notrace filter on the given function (or a glob pattern).
+ Like -G option, this is useful for the function_graph tracer only and
+ disables tracing for function executed from the given function. This
+ can be used more than once to specify multiple functions. It will be
+ passed to 'set_graph_notrace' in tracefs.
+
+-m::
+--buffer-size::
+ Set the size of per-cpu tracing buffer, <size> is expected to
+ be a number with appended unit character - B/K/M/G.
+
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 978608ecd89c..ae9389435d1b 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,6 +13,7 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
+#include <inttypes.h>
#include <math.h>
#include <poll.h>
#include <ctype.h>
@@ -22,15 +23,18 @@
#include "debug.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
+#include <api/io.h>
#include <api/fs/tracing_path.h>
#include "evlist.h"
#include "target.h"
#include "cpumap.h"
+#include "hashmap.h"
#include "thread_map.h"
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/ftrace.h"
+#include "util/stat.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
@@ -959,6 +963,294 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
+static size_t profile_hash(long func, void *ctx __maybe_unused)
+{
+ return str_hash((char *)func);
+}
+
+static bool profile_equal(long func1, long func2, void *ctx __maybe_unused)
+{
+ return !strcmp((char *)func1, (char *)func2);
+}
+
+static int prepare_func_profile(struct perf_ftrace *ftrace)
+{
+ ftrace->tracer = "function_graph";
+ ftrace->graph_tail = 1;
+
+ ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
+ if (ftrace->profile_hash == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* This is saved in a hashmap keyed by the function name */
+struct ftrace_profile_data {
+ struct stats st;
+};
+
+static int add_func_duration(struct perf_ftrace *ftrace, char *func, double time_ns)
+{
+ struct ftrace_profile_data *prof = NULL;
+
+ if (!hashmap__find(ftrace->profile_hash, func, &prof)) {
+ char *key = strdup(func);
+
+ if (key == NULL)
+ return -ENOMEM;
+
+ prof = zalloc(sizeof(*prof));
+ if (prof == NULL) {
+ free(key);
+ return -ENOMEM;
+ }
+
+ init_stats(&prof->st);
+ hashmap__add(ftrace->profile_hash, key, prof);
+ }
+
+ update_stats(&prof->st, time_ns);
+ return 0;
+}
+
+/*
+ * The ftrace function_graph text output normally looks like below:
+ *
+ * CPU DURATION FUNCTION
+ *
+ * 0) | syscall_trace_enter.isra.0() {
+ * 0) | __audit_syscall_entry() {
+ * 0) | auditd_test_task() {
+ * 0) 0.271 us | __rcu_read_lock();
+ * 0) 0.275 us | __rcu_read_unlock();
+ * 0) 1.254 us | } /\* auditd_test_task *\/
+ * 0) 0.279 us | ktime_get_coarse_real_ts64();
+ * 0) 2.227 us | } /\* __audit_syscall_entry *\/
+ * 0) 2.713 us | } /\* syscall_trace_enter.isra.0 *\/
+ *
+ * Parse the line and get the duration and function name.
+ */
+static int parse_func_duration(struct perf_ftrace *ftrace, char *line, size_t len)
+{
+ char *p;
+ char *func;
+ double duration;
+
+ /* skip CPU */
+ p = strchr(line, ')');
+ if (p == NULL)
+ return 0;
+
+ /* get duration */
+ p = skip_spaces(p + 1);
+
+ /* no duration? */
+ if (p == NULL || *p == '|')
+ return 0;
+
+ /* skip markers like '*' or '!' for longer than ms */
+ if (!isdigit(*p))
+ p++;
+
+ duration = strtod(p, &p);
+
+ if (strncmp(p, " us", 3)) {
+ pr_debug("non-usec time found.. ignoring\n");
+ return 0;
+ }
+
+ /*
+ * profile stat keeps the max and min values as integer,
+ * convert to nsec time so that we can have accurate max.
+ */
+ duration *= 1000;
+
+ /* skip to the pipe */
+ while (p < line + len && *p != '|')
+ p++;
+
+ if (*p++ != '|')
+ return -EINVAL;
+
+ /* get function name */
+ func = skip_spaces(p);
+
+ /* skip the closing bracket and the start of comment */
+ if (*func == '}')
+ func += 5;
+
+ /* remove semi-colon or end of comment at the end */
+ p = line + len - 1;
+ while (!isalnum(*p) && *p != ']') {
+ *p = '\0';
+ --p;
+ }
+
+ return add_func_duration(ftrace, func, duration);
+}
+
+static int cmp_profile_data(const void *a, const void *b)
+{
+ const struct hashmap_entry *e1 = *(const struct hashmap_entry **)a;
+ const struct hashmap_entry *e2 = *(const struct hashmap_entry **)b;
+ struct ftrace_profile_data *p1 = e1->pvalue;
+ struct ftrace_profile_data *p2 = e2->pvalue;
+
+ /* compare by total time */
+ if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean))
+ return -1;
+ else
+ return 1;
+}
+
+static void print_profile_result(struct perf_ftrace *ftrace)
+{
+ struct hashmap_entry *entry, **profile;
+ size_t i, nr, bkt;
+
+ nr = hashmap__size(ftrace->profile_hash);
+ if (nr == 0)
+ return;
+
+ profile = calloc(nr, sizeof(*profile));
+ if (profile == NULL) {
+ pr_err("failed to allocate memory for the result\n");
+ return;
+ }
+
+ i = 0;
+ hashmap__for_each_entry(ftrace->profile_hash, entry, bkt)
+ profile[i++] = entry;
+
+ assert(i == nr);
+
+ //cmp_profile_data(profile[0], profile[1]);
+ qsort(profile, nr, sizeof(*profile), cmp_profile_data);
+
+ printf("# %10s %10s %10s %10s %s\n",
+ "Total (us)", "Avg (us)", "Max (us)", "Count", "Function");
+
+ for (i = 0; i < nr; i++) {
+ const char *name = profile[i]->pkey;
+ struct ftrace_profile_data *p = profile[i]->pvalue;
+
+ printf("%12.3f %10.3f %6"PRIu64".%03"PRIu64" %10.0f %s\n",
+ p->st.n * p->st.mean / 1000, p->st.mean / 1000,
+ p->st.max / 1000, p->st.max % 1000, p->st.n, name);
+ }
+
+ free(profile);
+
+ hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) {
+ free((char *)entry->pkey);
+ free(entry->pvalue);
+ }
+
+ hashmap__free(ftrace->profile_hash);
+ ftrace->profile_hash = NULL;
+}
+
+static int __cmd_profile(struct perf_ftrace *ftrace)
+{
+ char *trace_file;
+ int trace_fd;
+ char buf[4096];
+ struct io io;
+ char *line = NULL;
+ size_t line_len = 0;
+
+ if (prepare_func_profile(ftrace) < 0) {
+ pr_err("failed to prepare func profiler\n");
+ goto out;
+ }
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ goto out;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ goto out;
+
+ if (set_tracing_options(ftrace) < 0)
+ return -1;
+
+ if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
+ pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
+ goto out_reset;
+ }
+
+ setup_pager();
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ trace_fd = open(trace_file, O_RDONLY);
+
+ put_tracing_file(trace_file);
+
+ if (trace_fd < 0) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+
+ evlist__start_workload(ftrace->evlist);
+
+ io__init(&io, trace_fd, buf, sizeof(buf));
+ io.timeout_ms = -1;
+
+ while (!done && !io.eof) {
+ if (io__getline(&io, &line, &line_len) < 0)
+ break;
+
+ if (parse_func_duration(ftrace, line, line_len) < 0)
+ break;
+ }
+
+ write_tracing_file("tracing_on", "0");
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ /* flush stdout first so below error msg appears at the end. */
+ fflush(stdout);
+ pr_err("workload failed: %s\n", emsg);
+ goto out_free_line;
+ }
+
+ /* read remaining buffer contents */
+ io.timeout_ms = 0;
+ while (!io.eof) {
+ if (io__getline(&io, &line, &line_len) < 0)
+ break;
+
+ if (parse_func_duration(ftrace, line, line_len) < 0)
+ break;
+ }
+
+ print_profile_result(ftrace);
+
+out_free_line:
+ free(line);
+out_close_fd:
+ close(trace_fd);
+out_reset:
+ reset_tracing_files(ftrace);
+out:
+ return (done && !workload_exec_errno) ? 0 : -1;
+}
+
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@@ -1126,6 +1418,7 @@ enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
PERF_FTRACE_LATENCY,
+ PERF_FTRACE_PROFILE,
};
int cmd_ftrace(int argc, const char **argv)
@@ -1191,13 +1484,28 @@ int cmd_ftrace(int argc, const char **argv)
"Use nano-second histogram"),
OPT_PARENT(common_options),
};
+ const struct option profile_options[] = {
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "Trace given functions using function tracer",
+ parse_filter_func),
+ OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+ "Do not trace given functions", parse_filter_func),
+ OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+ "Trace given functions using function_graph tracer",
+ parse_filter_func),
+ OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+ "Set nograph filter on given functions", parse_filter_func),
+ OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
+ "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+ OPT_PARENT(common_options),
+ };
const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
- "perf ftrace {trace|latency} [<options>] [<command>]",
- "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
+ "perf ftrace {trace|latency|profile} [<options>] [<command>]",
+ "perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]",
NULL
};
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
@@ -1226,6 +1534,9 @@ int cmd_ftrace(int argc, const char **argv)
} else if (!strcmp(argv[1], "latency")) {
subcmd = PERF_FTRACE_LATENCY;
options = latency_options;
+ } else if (!strcmp(argv[1], "profile")) {
+ subcmd = PERF_FTRACE_PROFILE;
+ options = profile_options;
}
if (subcmd != PERF_FTRACE_NONE) {
@@ -1261,6 +1572,9 @@ int cmd_ftrace(int argc, const char **argv)
}
cmd_func = __cmd_latency;
break;
+ case PERF_FTRACE_PROFILE:
+ cmd_func = __cmd_profile;
+ break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 2515e841c64c..bae649ef50e8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -6,6 +6,7 @@
#include "target.h"
struct evlist;
+struct hashamp;
struct perf_ftrace {
struct evlist *evlist;
@@ -15,6 +16,7 @@ struct perf_ftrace {
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
+ struct hashmap *profile_hash;
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
--
2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 4/4] perf ftrace: Add -s/--sort option to profile sub-command
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
` (2 preceding siblings ...)
2024-07-29 0:41 ` [PATCH 3/4] perf ftrace: Add 'profile' command Namhyung Kim
@ 2024-07-29 0:41 ` Namhyung Kim
2024-07-29 18:45 ` [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Ian Rogers
2024-07-30 19:19 ` Arnaldo Carvalho de Melo
5 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2024-07-29 0:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
linux-perf-users, Steven Rostedt, Changbin Du
The -s/--sort option is to sort the output by given column.
$ sudo perf ftrace profile -s max sync | head
# Total (us) Avg (us) Max (us) Count Function
6301.811 6301.811 6301.811 1 __do_sys_sync
6301.328 6301.328 6301.328 1 ksys_sync
5320.300 1773.433 2858.819 3 iterate_supers
2755.875 17.012 2610.633 162 sync_fs_one_sb
2728.351 682.088 2610.413 4 ext4_sync_fs [ext4]
2603.654 2603.654 2603.654 1 jbd2_log_wait_commit [jbd2]
4750.615 593.827 2597.427 8 schedule
2164.986 26.728 2115.673 81 sync_inodes_one_sb
2143.842 26.467 2115.438 81 sync_inodes_sb
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-ftrace.txt | 5 ++
tools/perf/builtin-ftrace.c | 63 +++++++++++++++++++++++-
2 files changed, 66 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 33f32467f287..eaec8253be68 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -185,6 +185,11 @@ OPTIONS for 'perf ftrace profile'
Set the size of per-cpu tracing buffer, <size> is expected to
be a number with appended unit character - B/K/M/G.
+-s::
+--sort=::
+ Sort the result by the given field. Available values are:
+ total, avg, max, count, name. Default is 'total'.
+
SEE ALSO
--------
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index ae9389435d1b..a615c405d98f 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1090,15 +1090,47 @@ static int parse_func_duration(struct perf_ftrace *ftrace, char *line, size_t le
return add_func_duration(ftrace, func, duration);
}
+enum perf_ftrace_profile_sort_key {
+ PFP_SORT_TOTAL = 0,
+ PFP_SORT_AVG,
+ PFP_SORT_MAX,
+ PFP_SORT_COUNT,
+ PFP_SORT_NAME,
+};
+
+static enum perf_ftrace_profile_sort_key profile_sort = PFP_SORT_TOTAL;
+
static int cmp_profile_data(const void *a, const void *b)
{
const struct hashmap_entry *e1 = *(const struct hashmap_entry **)a;
const struct hashmap_entry *e2 = *(const struct hashmap_entry **)b;
struct ftrace_profile_data *p1 = e1->pvalue;
struct ftrace_profile_data *p2 = e2->pvalue;
+ double v1, v2;
+
+ switch (profile_sort) {
+ case PFP_SORT_NAME:
+ return strcmp(e1->pkey, e2->pkey);
+ case PFP_SORT_AVG:
+ v1 = p1->st.mean;
+ v2 = p2->st.mean;
+ break;
+ case PFP_SORT_MAX:
+ v1 = p1->st.max;
+ v2 = p2->st.max;
+ break;
+ case PFP_SORT_COUNT:
+ v1 = p1->st.n;
+ v2 = p2->st.n;
+ break;
+ case PFP_SORT_TOTAL:
+ default:
+ v1 = p1->st.n * p1->st.mean;
+ v2 = p2->st.n * p2->st.mean;
+ break;
+ }
- /* compare by total time */
- if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean))
+ if (v1 > v2)
return -1;
else
return 1;
@@ -1414,6 +1446,30 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
+static int parse_sort_key(const struct option *opt, const char *str, int unset)
+{
+ enum perf_ftrace_profile_sort_key *key = (void *)opt->value;
+
+ if (unset)
+ return 0;
+
+ if (!strcmp(str, "total"))
+ *key = PFP_SORT_TOTAL;
+ else if (!strcmp(str, "avg"))
+ *key = PFP_SORT_AVG;
+ else if (!strcmp(str, "max"))
+ *key = PFP_SORT_MAX;
+ else if (!strcmp(str, "count"))
+ *key = PFP_SORT_COUNT;
+ else if (!strcmp(str, "name"))
+ *key = PFP_SORT_NAME;
+ else {
+ pr_err("Unknown sort key: %s\n", str);
+ return -1;
+ }
+ return 0;
+}
+
enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
@@ -1497,6 +1553,9 @@ int cmd_ftrace(int argc, const char **argv)
"Set nograph filter on given functions", parse_filter_func),
OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
"Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+ OPT_CALLBACK('s', "sort", &profile_sort, "key",
+ "Sort result by key: total (default), avg, max, count, name.",
+ parse_sort_key),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
--
2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1)
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
` (3 preceding siblings ...)
2024-07-29 0:41 ` [PATCH 4/4] perf ftrace: Add -s/--sort option to profile sub-command Namhyung Kim
@ 2024-07-29 18:45 ` Ian Rogers
2024-07-30 19:19 ` Arnaldo Carvalho de Melo
5 siblings, 0 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-29 18:45 UTC (permalink / raw)
To: Namhyung Kim
Cc: Arnaldo Carvalho de Melo, Kan Liang, Jiri Olsa, Adrian Hunter,
Peter Zijlstra, Ingo Molnar, LKML, linux-perf-users,
Steven Rostedt, Changbin Du
On Sun, Jul 28, 2024 at 5:41 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hello,
>
> This is an attempt to extend perf ftrace command to show a kernel function
> profile using the function_graph tracer. This is useful to see detailed
> info like total, average, max time (in usec) and number of calls for each
> function.
>
> $ sudo perf ftrace profile -- sync | head
> # Total (us) Avg (us) Max (us) Count Function
> 7638.372 7638.372 7638.372 1 __do_sys_sync
> 7638.059 7638.059 7638.059 1 ksys_sync
> 5893.959 1964.653 3747.963 3 iterate_supers
> 5214.181 579.353 1688.752 9 schedule
> 3585.773 44.269 3537.329 81 sync_inodes_one_sb
> 3566.179 44.027 3537.078 81 sync_inodes_sb
> 1976.901 247.113 1968.070 8 filemap_fdatawait_keep_errors
> 1974.367 246.796 1967.895 8 __filemap_fdatawait_range
> 1935.407 37.219 1157.627 52 folio_wait_writeback
>
> While the kernel also provides the similar functionality IIRC under
> CONFIG_FUNCTION_PROFILER, it's often not enabled on disto kernels so I
> implemented it in user space.
>
> Also it can support function filters like 'perf ftrace trace' so users
> can focus on some target functions and change the buffer size if needed.
>
> $ sudo perf ftrace profile -h
>
> Usage: perf ftrace [<options>] [<command>]
> or: perf ftrace [<options>] -- [<command>] [<options>]
> or: perf ftrace {trace|latency|profile} [<options>] [<command>]
> or: perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]
>
> -a, --all-cpus System-wide collection from all CPUs
> -C, --cpu <cpu> List of cpus to monitor
> -G, --graph-funcs <func>
> Trace given functions using function_graph tracer
> -g, --nograph-funcs <func>
> Set nograph filter on given functions
> -m, --buffer-size <size>
> Size of per cpu buffer, needs to use a B, K, M or G suffix.
> -N, --notrace-funcs <func>
> Do not trace given functions
> -p, --pid <pid> Trace on existing process id
> -s, --sort <key> Sort result by key: total (default), avg, max, count, name.
> -T, --trace-funcs <func>
> Trace given functions using function tracer
> -v, --verbose Be more verbose
> --tid <tid> Trace on existing thread id (exclusive to --pid)
>
>
> The code is also available in 'perf/ftrace-profile-v1' branch at
> git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
> Thanks,
> Namhyung
Lgtm, need to think about (rebase, etc) wrt the libcap change I sent
but otherwise:
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> Namhyung Kim (4):
> perf ftrace: Add 'tail' option to --graph-opts
> perf ftrace: Factor out check_ftrace_capable()
> perf ftrace: Add 'profile' command
> perf ftrace: Add -s/--sort option to profile sub-command
>
> tools/perf/Documentation/perf-ftrace.txt | 48 ++-
> tools/perf/builtin-ftrace.c | 439 +++++++++++++++++++++--
> tools/perf/util/ftrace.h | 3 +
> 3 files changed, 463 insertions(+), 27 deletions(-)
>
> --
> 2.46.0.rc1.232.g9752f9e123-goog
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1)
2024-07-29 0:41 [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Namhyung Kim
` (4 preceding siblings ...)
2024-07-29 18:45 ` [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1) Ian Rogers
@ 2024-07-30 19:19 ` Arnaldo Carvalho de Melo
2024-07-31 15:33 ` Ian Rogers
5 siblings, 1 reply; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-07-30 19:19 UTC (permalink / raw)
To: Namhyung Kim
Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users, Steven Rostedt, Changbin Du
On Sun, Jul 28, 2024 at 05:41:23PM -0700, Namhyung Kim wrote:
> Hello,
>
> This is an attempt to extend perf ftrace command to show a kernel function
> profile using the function_graph tracer. This is useful to see detailed
> info like total, average, max time (in usec) and number of calls for each
> function.
>
> $ sudo perf ftrace profile -- sync | head
> # Total (us) Avg (us) Max (us) Count Function
> 7638.372 7638.372 7638.372 1 __do_sys_sync
> 7638.059 7638.059 7638.059 1 ksys_sync
> 5893.959 1964.653 3747.963 3 iterate_supers
> 5214.181 579.353 1688.752 9 schedule
> 3585.773 44.269 3537.329 81 sync_inodes_one_sb
> 3566.179 44.027 3537.078 81 sync_inodes_sb
> 1976.901 247.113 1968.070 8 filemap_fdatawait_keep_errors
> 1974.367 246.796 1967.895 8 __filemap_fdatawait_range
> 1935.407 37.219 1157.627 52 folio_wait_writeback
>
> While the kernel also provides the similar functionality IIRC under
> CONFIG_FUNCTION_PROFILER, it's often not enabled on disto kernels so I
> implemented it in user space.
Great functionality, tested it all and applied to tmp.perf-tools-next,
will be in perf-tools-next after one last round of container builds.
The discussion about libcap seems to still be open, so I'm applying what
is in this series as it is small and simple, we can go on from there.
Thanks!
- Arnaldo
> Also it can support function filters like 'perf ftrace trace' so users
> can focus on some target functions and change the buffer size if needed.
>
> $ sudo perf ftrace profile -h
>
> Usage: perf ftrace [<options>] [<command>]
> or: perf ftrace [<options>] -- [<command>] [<options>]
> or: perf ftrace {trace|latency|profile} [<options>] [<command>]
> or: perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]
>
> -a, --all-cpus System-wide collection from all CPUs
> -C, --cpu <cpu> List of cpus to monitor
> -G, --graph-funcs <func>
> Trace given functions using function_graph tracer
> -g, --nograph-funcs <func>
> Set nograph filter on given functions
> -m, --buffer-size <size>
> Size of per cpu buffer, needs to use a B, K, M or G suffix.
> -N, --notrace-funcs <func>
> Do not trace given functions
> -p, --pid <pid> Trace on existing process id
> -s, --sort <key> Sort result by key: total (default), avg, max, count, name.
> -T, --trace-funcs <func>
> Trace given functions using function tracer
> -v, --verbose Be more verbose
> --tid <tid> Trace on existing thread id (exclusive to --pid)
>
>
> The code is also available in 'perf/ftrace-profile-v1' branch at
> git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
> Thanks,
> Namhyung
>
>
> Namhyung Kim (4):
> perf ftrace: Add 'tail' option to --graph-opts
> perf ftrace: Factor out check_ftrace_capable()
> perf ftrace: Add 'profile' command
> perf ftrace: Add -s/--sort option to profile sub-command
>
> tools/perf/Documentation/perf-ftrace.txt | 48 ++-
> tools/perf/builtin-ftrace.c | 439 +++++++++++++++++++++--
> tools/perf/util/ftrace.h | 3 +
> 3 files changed, 463 insertions(+), 27 deletions(-)
>
> --
> 2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 0/4] perf ftrace: Add 'profile' subcommand (v1)
2024-07-30 19:19 ` Arnaldo Carvalho de Melo
@ 2024-07-31 15:33 ` Ian Rogers
0 siblings, 0 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-31 15:33 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Namhyung Kim, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
Ingo Molnar, LKML, linux-perf-users, Steven Rostedt, Changbin Du
On Tue, Jul 30, 2024 at 12:19 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Sun, Jul 28, 2024 at 05:41:23PM -0700, Namhyung Kim wrote:
> > Hello,
> >
> > This is an attempt to extend perf ftrace command to show a kernel function
> > profile using the function_graph tracer. This is useful to see detailed
> > info like total, average, max time (in usec) and number of calls for each
> > function.
> >
> > $ sudo perf ftrace profile -- sync | head
> > # Total (us) Avg (us) Max (us) Count Function
> > 7638.372 7638.372 7638.372 1 __do_sys_sync
> > 7638.059 7638.059 7638.059 1 ksys_sync
> > 5893.959 1964.653 3747.963 3 iterate_supers
> > 5214.181 579.353 1688.752 9 schedule
> > 3585.773 44.269 3537.329 81 sync_inodes_one_sb
> > 3566.179 44.027 3537.078 81 sync_inodes_sb
> > 1976.901 247.113 1968.070 8 filemap_fdatawait_keep_errors
> > 1974.367 246.796 1967.895 8 __filemap_fdatawait_range
> > 1935.407 37.219 1157.627 52 folio_wait_writeback
> >
> > While the kernel also provides the similar functionality IIRC under
> > CONFIG_FUNCTION_PROFILER, it's often not enabled on disto kernels so I
> > implemented it in user space.
>
> Great functionality, tested it all and applied to tmp.perf-tools-next,
> will be in perf-tools-next after one last round of container builds.
>
> The discussion about libcap seems to still be open, so I'm applying what
> is in this series as it is small and simple, we can go on from there.
Sgtm. I did the libcap cleanup on perf-tools-next, but
tmp.perf-tools-next hasn't been merged there for 3 weeks. It'd be nice
to rebase the patch on perf-tools-next, but should I just shift to
working on tmp.perf-tools-next?
Thanks,
Ian
> Thanks!
>
> - Arnaldo
>
> > Also it can support function filters like 'perf ftrace trace' so users
> > can focus on some target functions and change the buffer size if needed.
> >
> > $ sudo perf ftrace profile -h
> >
> > Usage: perf ftrace [<options>] [<command>]
> > or: perf ftrace [<options>] -- [<command>] [<options>]
> > or: perf ftrace {trace|latency|profile} [<options>] [<command>]
> > or: perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]
> >
> > -a, --all-cpus System-wide collection from all CPUs
> > -C, --cpu <cpu> List of cpus to monitor
> > -G, --graph-funcs <func>
> > Trace given functions using function_graph tracer
> > -g, --nograph-funcs <func>
> > Set nograph filter on given functions
> > -m, --buffer-size <size>
> > Size of per cpu buffer, needs to use a B, K, M or G suffix.
> > -N, --notrace-funcs <func>
> > Do not trace given functions
> > -p, --pid <pid> Trace on existing process id
> > -s, --sort <key> Sort result by key: total (default), avg, max, count, name.
> > -T, --trace-funcs <func>
> > Trace given functions using function tracer
> > -v, --verbose Be more verbose
> > --tid <tid> Trace on existing thread id (exclusive to --pid)
> >
> >
> > The code is also available in 'perf/ftrace-profile-v1' branch at
> > git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
> >
> > Thanks,
> > Namhyung
> >
> >
> > Namhyung Kim (4):
> > perf ftrace: Add 'tail' option to --graph-opts
> > perf ftrace: Factor out check_ftrace_capable()
> > perf ftrace: Add 'profile' command
> > perf ftrace: Add -s/--sort option to profile sub-command
> >
> > tools/perf/Documentation/perf-ftrace.txt | 48 ++-
> > tools/perf/builtin-ftrace.c | 439 +++++++++++++++++++++--
> > tools/perf/util/ftrace.h | 3 +
> > 3 files changed, 463 insertions(+), 27 deletions(-)
> >
> > --
> > 2.46.0.rc1.232.g9752f9e123-goog
^ permalink raw reply [flat|nested] 10+ messages in thread