public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
@ 2022-03-21 23:46 Namhyung Kim
  2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
  2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian, Changbin Du

Sometimes we want to see nano-second granularity.

  $ sudo perf ftrace latency -T dput -a sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |    2098375 | #############################  |
       1 - 2    us |         61 |                                |
       2 - 4    us |         33 |                                |
       4 - 8    us |         13 |                                |
       8 - 16   us |        124 |                                |
      16 - 32   us |        123 |                                |
      32 - 64   us |          1 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          1 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - 2    ms |          0 |                                |
       2 - 4    ms |          0 |                                |
       4 - 8    ms |          0 |                                |
       8 - 16   ms |          0 |                                |
      16 - 32   ms |          0 |                                |
      32 - 64   ms |          0 |                                |
      64 - 128  ms |          0 |                                |
     128 - 256  ms |          0 |                                |
     256 - 512  ms |          0 |                                |
     512 - 1024 ms |          0 |                                |
       1 - ...   s |          0 |                                |

  $ sudo perf ftrace latency -T dput -a -n sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |          0 |                                |
       1 - 2    ns |          0 |                                |
       2 - 4    ns |          0 |                                |
       4 - 8    ns |          0 |                                |
       8 - 16   ns |          0 |                                |
      16 - 32   ns |          0 |                                |
      32 - 64   ns |          0 |                                |
      64 - 128  ns |    1163434 | ##############                 |
     128 - 256  ns |     914102 | #############                  |
     256 - 512  ns |        884 |                                |
     512 - 1024 ns |        613 |                                |
       1 - 2    us |         31 |                                |
       2 - 4    us |         17 |                                |
       4 - 8    us |          7 |                                |
       8 - 16   us |        123 |                                |
      16 - 32   us |         83 |                                |
      32 - 64   us |          0 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          0 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - ...  ms |          0 |                                |

Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-ftrace.c                 | 24 +++++++++++++--------
 tools/perf/util/bpf_ftrace.c                |  2 ++
 tools/perf/util/bpf_skel/func_latency.bpf.c |  6 ++++--
 tools/perf/util/ftrace.h                    |  1 +
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
 	return (done && !workload_exec_errno) ? 0 : -1;
 }
 
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+			   bool use_nsec)
 {
 	char *p, *q;
 	char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 		if (!unit || strncmp(unit, " us", 3))
 			goto next;
 
+		if (use_nsec)
+			num *= 1000;
+
 		i = log2(num);
 		if (i < 0)
 			i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 	strcat(linebuf, p);
 }
 
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
 {
 	int i;
 	int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
 	for (i = 1; i < NUM_BUCKET - 1; i++) {
 		int start = (1 << (i - 1));
 		int stop = 1 << i;
-		const char *unit = "us";
+		const char *unit = use_nsec ? "ns" : "us";
 
 		if (start >= 1024) {
 			start >>= 10;
 			stop >>= 10;
-			unit = "ms";
+			unit = use_nsec ? "us" : "ms";
 		}
 		bar_len = buckets[i] * bar_total / total;
 		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
 
 	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
 	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
-	       1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
-	       bar_total - bar_len, "");
+	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+	       bar_len, bar, bar_total - bar_len, "");
 
 }
 
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 			if (n < 0)
 				break;
 
-			make_histogram(buckets, buf, n, line);
+			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 		}
 	}
 
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 		int n = read(trace_fd, buf, sizeof(buf) - 1);
 		if (n <= 0)
 			break;
-		make_histogram(buckets, buf, n, line);
+		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 	}
 
 	read_func_latency(ftrace, buckets);
 
-	display_histogram(buckets);
+	display_histogram(buckets, ftrace->use_nsec);
 
 out:
 	close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
 	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
 		    "Use BPF to measure function latency"),
 #endif
+	OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+		    "Use nano-second histogram"),
 	OPT_PARENT(common_options),
 	};
 	const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		}
 	}
 
+	skel->bss->use_nsec = ftrace->use_nsec;
+
 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
 							    false, func->name);
 	if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
 int enabled = 0;
 int has_cpu = 0;
 int has_task = 0;
+int use_nsec = 0;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
 {
 	__u64 tid;
 	__u64 *start;
+	__u64 cmp_base = use_nsec ? 1 : 1000;
 
 	if (!enabled)
 		return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
 		if (delta < 0)
 			return 0;
 
-		// calculate index using delta in usec
+		// calculate index using delta
 		for (key = 0; key < (NUM_BUCKET - 1); key++) {
-			if (delta < ((1000UL) << key))
+			if (delta < (cmp_base << key))
 				break;
 		}
 
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
 	struct list_head	nograph_funcs;
 	unsigned long		percpu_buffer_size;
 	bool			inherit;
+	bool			use_nsec;
 	int			graph_depth;
 	int			func_stack_trace;
 	int			func_irq_info;
-- 
2.35.1.894.gb6a874cedc-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] perf ftrace: Update documentation of ftrace command
  2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
@ 2022-03-21 23:46 ` Namhyung Kim
  2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian, Changbin Du

Add description of perf ftrace latency subcommand.

Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/Documentation/perf-ftrace.txt | 75 ++++++++++++++++--------
 1 file changed, 52 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e82b7cc0bf0..df4595563801 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,32 +9,24 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
 SYNOPSIS
 --------
 [verse]
-'perf ftrace' <command>
+'perf ftrace' {trace|latency} <command>
 
 DESCRIPTION
 -----------
-The 'perf ftrace' command is a simple wrapper of kernel's ftrace
-functionality.  It only supports single thread tracing currently and
-just reads trace_pipe in text and then write it to stdout.
+The 'perf ftrace' command provides a collection of subcommands which use
+kernel's ftrace infrastructure.
 
-The following options apply to perf ftrace.
+  'perf ftrace trace' is a simple wrapper of the ftrace.  It only supports
+  single thread tracing currently and just reads trace_pipe in text and then
+  write it to stdout.
 
-OPTIONS
--------
+  'perf ftrace latency' calculates execution latency of a given function
+  (optionally with BPF) and display it as a histogram.
 
--t::
---tracer=::
-	Tracer to use when neither -G nor -F option is not
-	specified: function_graph or function.
+The following options apply to perf ftrace.
 
--v::
---verbose::
-        Increase the verbosity level.
-
--F::
---funcs::
-        List available functions to trace. It accepts a pattern to
-        only list interested functions.
+COMMON OPTIONS
+--------------
 
 -p::
 --pid=::
@@ -43,10 +35,6 @@ OPTIONS
 --tid=::
 	Trace on existing thread id (comma separated list).
 
--D::
---delay::
-	Time (ms) to wait before starting tracing after program start.
-
 -a::
 --all-cpus::
 	Force system-wide collection.  Scripts run without a <command>
@@ -61,6 +49,28 @@ OPTIONS
 	Ranges of CPUs are specified with -: 0-2.
 	Default is to trace on all online CPUs.
 
+-v::
+--verbose::
+        Increase the verbosity level.
+
+
+OPTIONS for 'perf ftrace trace'
+-------------------------------
+
+-t::
+--tracer=::
+	Tracer to use when neither -G nor -F option is not
+	specified: function_graph or function.
+
+-F::
+--funcs::
+        List available functions to trace. It accepts a pattern to
+        only list interested functions.
+
+-D::
+--delay::
+	Time (ms) to wait before starting tracing after program start.
+
 -m::
 --buffer-size::
 	Set the size of per-cpu tracing buffer, <size> is expected to
@@ -114,6 +124,25 @@ OPTIONS
 	  thresh=<n>   - Setup trace duration threshold in microseconds.
 	  depth=<n>    - Set max depth for function graph tracer to follow.
 
+
+OPTIONS for 'perf ftrace latency'
+---------------------------------
+
+-T::
+--trace-funcs=::
+	Set the function name to get the histogram.  Unlike perf ftrace trace,
+	it only allows single function to calculate the histogram.
+
+-b::
+--use-bpf::
+	Use BPF to measure function latency instead of using the ftrace (it
+	uses function_graph tracer internally).
+
+-n::
+--use-nsec::
+	Use nano-second instead of micro-second as a base unit of the histogram.
+
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-trace[1]
-- 
2.35.1.894.gb6a874cedc-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
  2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
  2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
@ 2022-03-22 20:42 ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 3+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-03-22 20:42 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Jiri Olsa, Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen,
	Ian Rogers, Stephane Eranian, Changbin Du

Em Mon, Mar 21, 2022 at 04:46:08PM -0700, Namhyung Kim escreveu:
> Sometimes we want to see nano-second granularity.
> 
>   $ sudo perf ftrace latency -T dput -a -n sleep 1
>   #   DURATION     |      COUNT | GRAPH                          |
>        0 - 1    us |          0 |                                |
>        1 - 2    ns |          0 |                                |
>        2 - 4    ns |          0 |                                |
>        4 - 8    ns |          0 |                                |
>        8 - 16   ns |          0 |                                |
>       16 - 32   ns |          0 |                                |
>       32 - 64   ns |          0 |                                |
>       64 - 128  ns |    1163434 | ##############                 |
>      128 - 256  ns |     914102 | #############                  |
>      256 - 512  ns |        884 |                                |
>      512 - 1024 ns |        613 |                                |
>        1 - 2    us |         31 |                                |
>        2 - 4    us |         17 |                                |
>        4 - 8    us |          7 |                                |
>        8 - 16   us |        123 |                                |


Thanks, applied.

- Arnaldo


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-22 20:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox