* [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
@ 2022-03-21 23:46 Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa
Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
Stephane Eranian, Changbin Du
Sometimes we want to see nano-second granularity.
$ sudo perf ftrace latency -T dput -a sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 2098375 | ############################# |
1 - 2 us | 61 | |
2 - 4 us | 33 | |
4 - 8 us | 13 | |
8 - 16 us | 124 | |
16 - 32 us | 123 | |
32 - 64 us | 1 | |
64 - 128 us | 0 | |
128 - 256 us | 1 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
$ sudo perf ftrace latency -T dput -a -n sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 ns | 0 | |
2 - 4 ns | 0 | |
4 - 8 ns | 0 | |
8 - 16 ns | 0 | |
16 - 32 ns | 0 | |
32 - 64 ns | 0 | |
64 - 128 ns | 1163434 | ############## |
128 - 256 ns | 914102 | ############# |
256 - 512 ns | 884 | |
512 - 1024 ns | 613 | |
1 - 2 us | 31 | |
2 - 4 us | 17 | |
4 - 8 us | 7 | |
8 - 16 us | 123 | |
16 - 32 us | 83 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - ... ms | 0 | |
Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-ftrace.c | 24 +++++++++++++--------
tools/perf/util/bpf_ftrace.c | 2 ++
tools/perf/util/bpf_skel/func_latency.bpf.c | 6 ++++--
tools/perf/util/ftrace.h | 1 +
4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+ bool use_nsec)
{
char *p, *q;
char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
if (!unit || strncmp(unit, " us", 3))
goto next;
+ if (use_nsec)
+ num *= 1000;
+
i = log2(num);
if (i < 0)
i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
strcat(linebuf, p);
}
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
{
int i;
int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
- const char *unit = "us";
+ const char *unit = use_nsec ? "ns" : "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
- unit = "ms";
+ unit = use_nsec ? "us" : "ms";
}
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
- 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
- bar_total - bar_len, "");
+ 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+ bar_len, bar, bar_total - bar_len, "");
}
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
if (n < 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
}
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
- make_histogram(buckets, buf, n, line);
+ make_histogram(buckets, buf, n, line, ftrace->use_nsec);
}
read_func_latency(ftrace, buckets);
- display_histogram(buckets);
+ display_histogram(buckets, ftrace->use_nsec);
out:
close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
+ OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+ "Use nano-second histogram"),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
}
}
+ skel->bss->use_nsec = ftrace->use_nsec;
+
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
false, func->name);
if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
int enabled = 0;
int has_cpu = 0;
int has_task = 0;
+int use_nsec = 0;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
- // calculate index using delta in usec
+ // calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
- if (delta < ((1000UL) << key))
+ if (delta < (cmp_base << key))
break;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
struct list_head nograph_funcs;
unsigned long percpu_buffer_size;
bool inherit;
+ bool use_nsec;
int graph_depth;
int func_stack_trace;
int func_irq_info;
--
2.35.1.894.gb6a874cedc-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 2/2] perf ftrace: Update documentation of ftrace command
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
@ 2022-03-21 23:46 ` Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Jiri Olsa
Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
Stephane Eranian, Changbin Du
Add description of perf ftrace latency subcommand.
Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/Documentation/perf-ftrace.txt | 75 ++++++++++++++++--------
1 file changed, 52 insertions(+), 23 deletions(-)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e82b7cc0bf0..df4595563801 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,32 +9,24 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
-'perf ftrace' <command>
+'perf ftrace' {trace|latency} <command>
DESCRIPTION
-----------
-The 'perf ftrace' command is a simple wrapper of kernel's ftrace
-functionality. It only supports single thread tracing currently and
-just reads trace_pipe in text and then write it to stdout.
+The 'perf ftrace' command provides a collection of subcommands which use
+kernel's ftrace infrastructure.
-The following options apply to perf ftrace.
+ 'perf ftrace trace' is a simple wrapper of the ftrace. It only supports
+ single thread tracing currently and just reads trace_pipe in text and then
+ write it to stdout.
-OPTIONS
--------
+ 'perf ftrace latency' calculates execution latency of a given function
+ (optionally with BPF) and display it as a histogram.
--t::
---tracer=::
- Tracer to use when neither -G nor -F option is not
- specified: function_graph or function.
+The following options apply to perf ftrace.
--v::
---verbose::
- Increase the verbosity level.
-
--F::
---funcs::
- List available functions to trace. It accepts a pattern to
- only list interested functions.
+COMMON OPTIONS
+--------------
-p::
--pid=::
@@ -43,10 +35,6 @@ OPTIONS
--tid=::
Trace on existing thread id (comma separated list).
--D::
---delay::
- Time (ms) to wait before starting tracing after program start.
-
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
@@ -61,6 +49,28 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
+-v::
+--verbose::
+ Increase the verbosity level.
+
+
+OPTIONS for 'perf ftrace trace'
+-------------------------------
+
+-t::
+--tracer=::
+ Tracer to use when neither -G nor -F option is not
+ specified: function_graph or function.
+
+-F::
+--funcs::
+ List available functions to trace. It accepts a pattern to
+ only list interested functions.
+
+-D::
+--delay::
+ Time (ms) to wait before starting tracing after program start.
+
-m::
--buffer-size::
Set the size of per-cpu tracing buffer, <size> is expected to
@@ -114,6 +124,25 @@ OPTIONS
thresh=<n> - Setup trace duration threshold in microseconds.
depth=<n> - Set max depth for function graph tracer to follow.
+
+OPTIONS for 'perf ftrace latency'
+---------------------------------
+
+-T::
+--trace-funcs=::
+ Set the function name to get the histogram. Unlike perf ftrace trace,
+ it only allows single function to calculate the histogram.
+
+-b::
+--use-bpf::
+ Use BPF to measure function latency instead of using the ftrace (it
+ uses function_graph tracer internally).
+
+-n::
+--use-nsec::
+ Use nano-second instead of micro-second as a base unit of the histogram.
+
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]
--
2.35.1.894.gb6a874cedc-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
@ 2022-03-22 20:42 ` Arnaldo Carvalho de Melo
1 sibling, 0 replies; 3+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-03-22 20:42 UTC (permalink / raw)
To: Namhyung Kim
Cc: Jiri Olsa, Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen,
Ian Rogers, Stephane Eranian, Changbin Du
Em Mon, Mar 21, 2022 at 04:46:08PM -0700, Namhyung Kim escreveu:
> Sometimes we want to see nano-second granularity.
>
> $ sudo perf ftrace latency -T dput -a -n sleep 1
> # DURATION | COUNT | GRAPH |
> 0 - 1 us | 0 | |
> 1 - 2 ns | 0 | |
> 2 - 4 ns | 0 | |
> 4 - 8 ns | 0 | |
> 8 - 16 ns | 0 | |
> 16 - 32 ns | 0 | |
> 32 - 64 ns | 0 | |
> 64 - 128 ns | 1163434 | ############## |
> 128 - 256 ns | 914102 | ############# |
> 256 - 512 ns | 884 | |
> 512 - 1024 ns | 613 | |
> 1 - 2 us | 31 | |
> 2 - 4 us | 17 | |
> 4 - 8 us | 7 | |
> 8 - 16 us | 123 | |
Thanks, applied.
- Arnaldo
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-03-22 20:43 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.