All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
@ 2022-03-21 23:46 Namhyung Kim
  2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
  2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian, Changbin Du

Sometimes we want to see nano-second granularity.

  $ sudo perf ftrace latency -T dput -a sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |    2098375 | #############################  |
       1 - 2    us |         61 |                                |
       2 - 4    us |         33 |                                |
       4 - 8    us |         13 |                                |
       8 - 16   us |        124 |                                |
      16 - 32   us |        123 |                                |
      32 - 64   us |          1 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          1 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - 2    ms |          0 |                                |
       2 - 4    ms |          0 |                                |
       4 - 8    ms |          0 |                                |
       8 - 16   ms |          0 |                                |
      16 - 32   ms |          0 |                                |
      32 - 64   ms |          0 |                                |
      64 - 128  ms |          0 |                                |
     128 - 256  ms |          0 |                                |
     256 - 512  ms |          0 |                                |
     512 - 1024 ms |          0 |                                |
       1 - ...   s |          0 |                                |

  $ sudo perf ftrace latency -T dput -a -n sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |          0 |                                |
       1 - 2    ns |          0 |                                |
       2 - 4    ns |          0 |                                |
       4 - 8    ns |          0 |                                |
       8 - 16   ns |          0 |                                |
      16 - 32   ns |          0 |                                |
      32 - 64   ns |          0 |                                |
      64 - 128  ns |    1163434 | ##############                 |
     128 - 256  ns |     914102 | #############                  |
     256 - 512  ns |        884 |                                |
     512 - 1024 ns |        613 |                                |
       1 - 2    us |         31 |                                |
       2 - 4    us |         17 |                                |
       4 - 8    us |          7 |                                |
       8 - 16   us |        123 |                                |
      16 - 32   us |         83 |                                |
      32 - 64   us |          0 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          0 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - ...  ms |          0 |                                |

Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-ftrace.c                 | 24 +++++++++++++--------
 tools/perf/util/bpf_ftrace.c                |  2 ++
 tools/perf/util/bpf_skel/func_latency.bpf.c |  6 ++++--
 tools/perf/util/ftrace.h                    |  1 +
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
 	return (done && !workload_exec_errno) ? 0 : -1;
 }
 
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+			   bool use_nsec)
 {
 	char *p, *q;
 	char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 		if (!unit || strncmp(unit, " us", 3))
 			goto next;
 
+		if (use_nsec)
+			num *= 1000;
+
 		i = log2(num);
 		if (i < 0)
 			i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 	strcat(linebuf, p);
 }
 
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
 {
 	int i;
 	int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
 	for (i = 1; i < NUM_BUCKET - 1; i++) {
 		int start = (1 << (i - 1));
 		int stop = 1 << i;
-		const char *unit = "us";
+		const char *unit = use_nsec ? "ns" : "us";
 
 		if (start >= 1024) {
 			start >>= 10;
 			stop >>= 10;
-			unit = "ms";
+			unit = use_nsec ? "us" : "ms";
 		}
 		bar_len = buckets[i] * bar_total / total;
 		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
 
 	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
 	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
-	       1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
-	       bar_total - bar_len, "");
+	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+	       bar_len, bar, bar_total - bar_len, "");
 
 }
 
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 			if (n < 0)
 				break;
 
-			make_histogram(buckets, buf, n, line);
+			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 		}
 	}
 
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 		int n = read(trace_fd, buf, sizeof(buf) - 1);
 		if (n <= 0)
 			break;
-		make_histogram(buckets, buf, n, line);
+		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 	}
 
 	read_func_latency(ftrace, buckets);
 
-	display_histogram(buckets);
+	display_histogram(buckets, ftrace->use_nsec);
 
 out:
 	close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
 	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
 		    "Use BPF to measure function latency"),
 #endif
+	OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+		    "Use nano-second histogram"),
 	OPT_PARENT(common_options),
 	};
 	const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		}
 	}
 
+	skel->bss->use_nsec = ftrace->use_nsec;
+
 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
 							    false, func->name);
 	if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
 int enabled = 0;
 int has_cpu = 0;
 int has_task = 0;
+int use_nsec = 0;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
 {
 	__u64 tid;
 	__u64 *start;
+	__u64 cmp_base = use_nsec ? 1 : 1000;
 
 	if (!enabled)
 		return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
 		if (delta < 0)
 			return 0;
 
-		// calculate index using delta in usec
+		// calculate index using delta
 		for (key = 0; key < (NUM_BUCKET - 1); key++) {
-			if (delta < ((1000UL) << key))
+			if (delta < (cmp_base << key))
 				break;
 		}
 
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
 	struct list_head	nograph_funcs;
 	unsigned long		percpu_buffer_size;
 	bool			inherit;
+	bool			use_nsec;
 	int			graph_depth;
 	int			func_stack_trace;
 	int			func_irq_info;
-- 
2.35.1.894.gb6a874cedc-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] perf ftrace: Update documentation of ftrace command
  2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
@ 2022-03-21 23:46 ` Namhyung Kim
  2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian, Changbin Du

Add description of perf ftrace latency subcommand.

Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/Documentation/perf-ftrace.txt | 75 ++++++++++++++++--------
 1 file changed, 52 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e82b7cc0bf0..df4595563801 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -9,32 +9,24 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
 SYNOPSIS
 --------
 [verse]
-'perf ftrace' <command>
+'perf ftrace' {trace|latency} <command>
 
 DESCRIPTION
 -----------
-The 'perf ftrace' command is a simple wrapper of kernel's ftrace
-functionality.  It only supports single thread tracing currently and
-just reads trace_pipe in text and then write it to stdout.
+The 'perf ftrace' command provides a collection of subcommands which use
+kernel's ftrace infrastructure.
 
-The following options apply to perf ftrace.
+  'perf ftrace trace' is a simple wrapper of the ftrace.  It only supports
+  single thread tracing currently and just reads trace_pipe in text and then
+  write it to stdout.
 
-OPTIONS
--------
+  'perf ftrace latency' calculates execution latency of a given function
+  (optionally with BPF) and display it as a histogram.
 
--t::
---tracer=::
-	Tracer to use when neither -G nor -F option is not
-	specified: function_graph or function.
+The following options apply to perf ftrace.
 
--v::
---verbose::
-        Increase the verbosity level.
-
--F::
---funcs::
-        List available functions to trace. It accepts a pattern to
-        only list interested functions.
+COMMON OPTIONS
+--------------
 
 -p::
 --pid=::
@@ -43,10 +35,6 @@ OPTIONS
 --tid=::
 	Trace on existing thread id (comma separated list).
 
--D::
---delay::
-	Time (ms) to wait before starting tracing after program start.
-
 -a::
 --all-cpus::
 	Force system-wide collection.  Scripts run without a <command>
@@ -61,6 +49,28 @@ OPTIONS
 	Ranges of CPUs are specified with -: 0-2.
 	Default is to trace on all online CPUs.
 
+-v::
+--verbose::
+        Increase the verbosity level.
+
+
+OPTIONS for 'perf ftrace trace'
+-------------------------------
+
+-t::
+--tracer=::
+	Tracer to use when neither -G nor -F option is not
+	specified: function_graph or function.
+
+-F::
+--funcs::
+        List available functions to trace. It accepts a pattern to
+        only list interested functions.
+
+-D::
+--delay::
+	Time (ms) to wait before starting tracing after program start.
+
 -m::
 --buffer-size::
 	Set the size of per-cpu tracing buffer, <size> is expected to
@@ -114,6 +124,25 @@ OPTIONS
 	  thresh=<n>   - Setup trace duration threshold in microseconds.
 	  depth=<n>    - Set max depth for function graph tracer to follow.
 
+
+OPTIONS for 'perf ftrace latency'
+---------------------------------
+
+-T::
+--trace-funcs=::
+	Set the function name to get the histogram.  Unlike perf ftrace trace,
+	it only allows single function to calculate the histogram.
+
+-b::
+--use-bpf::
+	Use BPF to measure function latency instead of using the ftrace (it
+	uses function_graph tracer internally).
+
+-n::
+--use-nsec::
+	Use nano-second instead of micro-second as a base unit of the histogram.
+
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-trace[1]
-- 
2.35.1.894.gb6a874cedc-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
  2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
  2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
@ 2022-03-22 20:42 ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 3+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-03-22 20:42 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Jiri Olsa, Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen,
	Ian Rogers, Stephane Eranian, Changbin Du

Em Mon, Mar 21, 2022 at 04:46:08PM -0700, Namhyung Kim escreveu:
> Sometimes we want to see nano-second granularity.
> 
>   $ sudo perf ftrace latency -T dput -a -n sleep 1
>   #   DURATION     |      COUNT | GRAPH                          |
>        0 - 1    us |          0 |                                |
>        1 - 2    ns |          0 |                                |
>        2 - 4    ns |          0 |                                |
>        4 - 8    ns |          0 |                                |
>        8 - 16   ns |          0 |                                |
>       16 - 32   ns |          0 |                                |
>       32 - 64   ns |          0 |                                |
>       64 - 128  ns |    1163434 | ##############                 |
>      128 - 256  ns |     914102 | #############                  |
>      256 - 512  ns |        884 |                                |
>      512 - 1024 ns |        613 |                                |
>        1 - 2    us |         31 |                                |
>        2 - 4    us |         17 |                                |
>        4 - 8    us |          7 |                                |
>        8 - 16   us |        123 |                                |


Thanks, applied.

- Arnaldo


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-22 20:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.