* [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
@ 2022-03-21 23:46 Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa
Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
Stephane Eranian, Changbin Du
Sometimes we want to see nano-second granularity.
$ sudo perf ftrace latency -T dput -a sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 2098375 | ############################# |
1 - 2 us | 61 | |
2 - 4 us | 33 | |
4 - 8 us | 13 | |
8 - 16 us | 124 | |
16 - 32 us | 123 | |
32 - 64 us | 1 | |
64 - 128 us | 0 | |
128 - 256 us | 1 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
$ sudo perf ftrace latency -T dput -a -n sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 ns | 0 | |
2 - 4 ns | 0 | |
4 - 8 ns | 0 | |
8 - 16 ns | 0 | |
16 - 32 ns | 0 | |
32 - 64 ns | 0 | |
64 - 128 ns | 1163434 | ############## |
128 - 256 ns | 914102 | ############# |
256 - 512 ns | 884 | |
512 - 1024 ns | 613 | |
1 - 2 us | 31 | |
2 - 4 us | 17 | |
4 - 8 us | 7 | |
8 - 16 us | 123 | |
16 - 32 us | 83 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - ... ms | 0 | |
Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-ftrace.c | 24 +++++++++++++--------
tools/perf/util/bpf_ftrace.c | 2 ++
tools/perf/util/bpf_skel/func_latency.bpf.c | 6 ++++--
tools/perf/util/ftrace.h | 1 +
4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+ bool use_nsec)
{
char *p, *q;
char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
if (!unit || strncmp(unit, " us", 3))
goto next;
+ if (use_nsec)
+ num *= 1000;
+
i = log2(num);
if (i < 0)
i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
strcat(linebuf, p);
}
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
{
int i;
int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
- const char *unit = "us";
+ const char *unit = use_nsec ? "ns" : "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
- unit = "ms";
+ unit = use_nsec ? "us" : "ms";
}
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
- 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
- bar_total - bar_len, "");
+ 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+ bar_len, bar, bar_total - bar_len, "");
}
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
if (n < 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
}
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
read_func_latency(ftrace, buckets);
- display_histogram(buckets);
+ display_histogram(buckets, ftrace->use_nsec);
out:
close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
+ OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+ "Use nano-second histogram"),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->use_nsec = ftrace->use_nsec;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
int enabled = 0;
int has_cpu = 0;
int has_task = 0;
+int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
- // calculate index using delta in usec
+ // calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
- if (delta < ((1000UL) << key))
+ if (delta < (cmp_base << key))
break;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
struct list_head nograph_funcs;
unsigned long percpu_buffer_size;
bool inherit;
+ bool use_nsec;
int graph_depth;
int func_stack_trace;
int func_irq_info;
--
2.35.1.894.gb6a874cedc-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] perf ftrace: Update documentation of ftrace command
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
@ 2022-03-21 23:46 ` Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa
Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
Stephane Eranian, Changbin Du
Add description of perf ftrace latency subcommand.
Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-ftrace.txt | 75 ++++++++++++++++--------
1 file changed, 52 insertions(+), 23 deletions(-)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e82b7cc0bf0..df4595563801 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,32 +9,24 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
-'perf ftrace' <command>
+'perf ftrace' {trace|latency} <command>
DESCRIPTION
-----------
-The 'perf ftrace' command is a simple wrapper of kernel's ftrace
-functionality. It only supports single thread tracing currently and
-just reads trace_pipe in text and then write it to stdout.
+The 'perf ftrace' command provides a collection of subcommands which use
+kernel's ftrace infrastructure.
-The following options apply to perf ftrace.
+ 'perf ftrace trace' is a simple wrapper of the ftrace. It only supports
+ single thread tracing currently and just reads trace_pipe in text and then
+ write it to stdout.
-OPTIONS
--------
+ 'perf ftrace latency' calculates execution latency of a given function
+ (optionally with BPF) and display it as a histogram.
--t::
---tracer=::
- Tracer to use when neither -G nor -F option is not
- specified: function_graph or function.
+The following options apply to perf ftrace.
--v::
---verbose::
- Increase the verbosity level.
-
--F::
---funcs::
- List available functions to trace. It accepts a pattern to
- only list interested functions.
+COMMON OPTIONS
+--------------
-p::
--pid=::
@@ -43,10 +35,6 @@ OPTIONS
--tid=::
Trace on existing thread id (comma separated list).
--D::
---delay::
- Time (ms) to wait before starting tracing after program start.
-
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
@@ -61,6 +49,28 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
+-v::
+--verbose::
+ Increase the verbosity level.
+
+
+OPTIONS for 'perf ftrace trace'
+-------------------------------
+
+-t::
+--tracer=::
+ Tracer to use when neither -G nor -F option is not
+ specified: function_graph or function.
+
+-F::
+--funcs::
+ List available functions to trace. It accepts a pattern to
+ only list interested functions.
+
+-D::
+--delay::
+ Time (ms) to wait before starting tracing after program start.
+
-m::
--buffer-size::
Set the size of per-cpu tracing buffer, <size> is expected to
@@ -114,6 +124,25 @@ OPTIONS
thresh=<n> - Setup trace duration threshold in microseconds.
depth=<n> - Set max depth for function graph tracer to follow.
+
+OPTIONS for 'perf ftrace latency'
+---------------------------------
+
+-T::
+--trace-funcs=::
+ Set the function name to get the histogram. Unlike perf ftrace trace,
+ it only allows single function to calculate the histogram.
+
+-b::
+--use-bpf::
+ Use BPF to measure function latency instead of using the ftrace (it
+ uses function_graph tracer internally).
+
+-n::
+--use-nsec::
+ Use nano-second instead of micro-second as a base unit of the histogram.
+
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]
--
2.35.1.894.gb6a874cedc-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
@ 2022-03-22 20:42 ` Arnaldo Carvalho de Melo
1 sibling, 0 replies; 3+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-03-22 20:42 UTC (permalink / raw)
To: Namhyung Kim
Cc: Jiri Olsa, Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen,
Ian Rogers, Stephane Eranian, Changbin Du
Em Mon, Mar 21, 2022 at 04:46:08PM -0700, Namhyung Kim escreveu:
> Sometimes we want to see nano-second granularity.
>
> $ sudo perf ftrace latency -T dput -a -n sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 0 | |
> 1 - 2 ns | 0 | |
> 2 - 4 ns | 0 | |
> 4 - 8 ns | 0 | |
> 8 - 16 ns | 0 | |
> 16 - 32 ns | 0 | |
> 32 - 64 ns | 0 | |
> 64 - 128 ns | 1163434 | ############## |
> 128 - 256 ns | 914102 | ############# |
> 256 - 512 ns | 884 | |
> 512 - 1024 ns | 613 | |
> 1 - 2 us | 31 | |
> 2 - 4 us | 17 | |
> 4 - 8 us | 7 | |
> 8 - 16 us | 123 | |
Thanks, applied.
- Arnaldo
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-03-22 20:43 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox