All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency
@ 2022-03-21 23:46 Namhyung Kim
  2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
  2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 3+ messages in thread
From: Namhyung Kim @ 2022-03-21 23:46 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: Ingo Molnar, Peter Zijlstra, LKML, Andi Kleen, Ian Rogers,
	Stephane Eranian, Changbin Du

Sometimes we want to see nano-second granularity.

  $ sudo perf ftrace latency -T dput -a sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |    2098375 | #############################  |
       1 - 2    us |         61 |                                |
       2 - 4    us |         33 |                                |
       4 - 8    us |         13 |                                |
       8 - 16   us |        124 |                                |
      16 - 32   us |        123 |                                |
      32 - 64   us |          1 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          1 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - 2    ms |          0 |                                |
       2 - 4    ms |          0 |                                |
       4 - 8    ms |          0 |                                |
       8 - 16   ms |          0 |                                |
      16 - 32   ms |          0 |                                |
      32 - 64   ms |          0 |                                |
      64 - 128  ms |          0 |                                |
     128 - 256  ms |          0 |                                |
     256 - 512  ms |          0 |                                |
     512 - 1024 ms |          0 |                                |
       1 - ...   s |          0 |                                |

  $ sudo perf ftrace latency -T dput -a -n sleep 1
  #   DURATION     |      COUNT | GRAPH                          |
       0 - 1    us |          0 |                                |
       1 - 2    ns |          0 |                                |
       2 - 4    ns |          0 |                                |
       4 - 8    ns |          0 |                                |
       8 - 16   ns |          0 |                                |
      16 - 32   ns |          0 |                                |
      32 - 64   ns |          0 |                                |
      64 - 128  ns |    1163434 | ##############                 |
     128 - 256  ns |     914102 | #############                  |
     256 - 512  ns |        884 |                                |
     512 - 1024 ns |        613 |                                |
       1 - 2    us |         31 |                                |
       2 - 4    us |         17 |                                |
       4 - 8    us |          7 |                                |
       8 - 16   us |        123 |                                |
      16 - 32   us |         83 |                                |
      32 - 64   us |          0 |                                |
      64 - 128  us |          0 |                                |
     128 - 256  us |          0 |                                |
     256 - 512  us |          0 |                                |
     512 - 1024 us |          0 |                                |
       1 - ...  ms |          0 |                                |

Cc: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-ftrace.c                 | 24 +++++++++++++--------
 tools/perf/util/bpf_ftrace.c                |  2 ++
 tools/perf/util/bpf_skel/func_latency.bpf.c |  6 ++++--
 tools/perf/util/ftrace.h                    |  1 +
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a8785dec5ca6..ad9ce1bfffa1 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -680,7 +680,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
 	return (done && !workload_exec_errno) ? 0 : -1;
 }
 
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
+static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
+			   bool use_nsec)
 {
 	char *p, *q;
 	char *unit;
@@ -727,6 +728,9 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 		if (!unit || strncmp(unit, " us", 3))
 			goto next;
 
+		if (use_nsec)
+			num *= 1000;
+
 		i = log2(num);
 		if (i < 0)
 			i = 0;
@@ -744,7 +748,7 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
 	strcat(linebuf, p);
 }
 
-static void display_histogram(int buckets[])
+static void display_histogram(int buckets[], bool use_nsec)
 {
 	int i;
 	int total = 0;
@@ -770,12 +774,12 @@ static void display_histogram(int buckets[])
 	for (i = 1; i < NUM_BUCKET - 1; i++) {
 		int start = (1 << (i - 1));
 		int stop = 1 << i;
-		const char *unit = "us";
+		const char *unit = use_nsec ? "ns" : "us";
 
 		if (start >= 1024) {
 			start >>= 10;
 			stop >>= 10;
-			unit = "ms";
+			unit = use_nsec ? "us" : "ms";
 		}
 		bar_len = buckets[i] * bar_total / total;
 		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
@@ -785,8 +789,8 @@ static void display_histogram(int buckets[])
 
 	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
 	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
-	       1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
-	       bar_total - bar_len, "");
+	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+	       bar_len, bar, bar_total - bar_len, "");
 
 }
 
@@ -913,7 +917,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 			if (n < 0)
 				break;
 
-			make_histogram(buckets, buf, n, line);
+			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 		}
 	}
 
@@ -930,12 +934,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 		int n = read(trace_fd, buf, sizeof(buf) - 1);
 		if (n <= 0)
 			break;
-		make_histogram(buckets, buf, n, line);
+		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
 	}
 
 	read_func_latency(ftrace, buckets);
 
-	display_histogram(buckets);
+	display_histogram(buckets, ftrace->use_nsec);
 
 out:
 	close(trace_fd);
@@ -1171,6 +1175,8 @@ int cmd_ftrace(int argc, const char **argv)
 	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
 		    "Use BPF to measure function latency"),
 #endif
+	OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+		    "Use nano-second histogram"),
 	OPT_PARENT(common_options),
 	};
 	const struct option *options = ftrace_options;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index d756cc66eef3..4f4d3aaff37c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -81,6 +81,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		}
 	}
 
+	skel->bss->use_nsec = ftrace->use_nsec;
+
 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
 							    false, func->name);
 	if (IS_ERR(skel->links.func_begin)) {
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index ea94187fe443..9d01e3af7479 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -39,6 +39,7 @@ struct {
 int enabled = 0;
 int has_cpu = 0;
 int has_task = 0;
+int use_nsec = 0;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -80,6 +81,7 @@ int BPF_PROG(func_end)
 {
 	__u64 tid;
 	__u64 *start;
+	__u64 cmp_base = use_nsec ? 1 : 1000;
 
 	if (!enabled)
 		return 0;
@@ -97,9 +99,9 @@ int BPF_PROG(func_end)
 		if (delta < 0)
 			return 0;
 
-		// calculate index using delta in usec
+		// calculate index using delta
 		for (key = 0; key < (NUM_BUCKET - 1); key++) {
-			if (delta < ((1000UL) << key))
+			if (delta < (cmp_base << key))
 				break;
 		}
 
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 887f68a185f7..a34cd15733b8 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
 	struct list_head	nograph_funcs;
 	unsigned long		percpu_buffer_size;
 	bool			inherit;
+	bool			use_nsec;
 	int			graph_depth;
 	int			func_stack_trace;
 	int			func_irq_info;
-- 
2.35.1.894.gb6a874cedc-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-22 20:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-21 23:46 [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Namhyung Kim
2022-03-21 23:46 ` [PATCH 2/2] perf ftrace: Update documentation of ftrace command Namhyung Kim
2022-03-22 20:42 ` [PATCH 1/2] perf ftrace: Add -n/--use-nsec option for latency Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.