[PATCH v2 1/3] perf ftrace: Display latency statistics at the end

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 1/3] perf ftrace: Display latency statistics at the end
@ 2025-01-07 22:43 Namhyung Kim
  2025-01-07 22:43 ` [PATCH v2 2/3] perf ftrace: Add --graph-opts option to profile subcommand Namhyung Kim
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Namhyung Kim @ 2025-01-07 22:43 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, James Clark

Sometimes users also want to see average latency as well as histogram.
Display latency statistics like avg, max, min at the end.

  $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ...
  #   DURATION     |      COUNT | GRAPH                                          |
       0 - 1    us |          0 |                                                |
       1 - 2    us |          0 |                                                |
       2 - 4    us |          0 |                                                |
       4 - 8    us |          0 |                                                |
       8 - 16   us |          0 |                                                |
      16 - 32   us |          0 |                                                |
      32 - 64   us |          0 |                                                |
      64 - 128  us |          0 |                                                |
     128 - 256  us |          0 |                                                |
     256 - 512  us |          0 |                                                |
     512 - 1024 us |          0 |                                                |
       1 - 2    ms |          0 |                                                |
       2 - 4    ms |          0 |                                                |
       4 - 8    ms |          0 |                                                |
       8 - 16   ms |          1 | #####                                          |
      16 - 32   ms |          7 | ########################################       |
      32 - 64   ms |          0 |                                                |
      64 - 128  ms |          0 |                                                |
     128 - 256  ms |          0 |                                                |
     256 - 512  ms |          0 |                                                |
     512 - 1024 ms |          0 |                                                |
       1 - ...   s |          0 |                                                |

  # statistics  (in usec)
    total time:               171832
      avg time:                21479
      max time:                30906
      min time:                15869
         count:                    8

Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-ftrace.c                 | 14 +++++++++++++-
 tools/perf/util/bpf_ftrace.c                | 12 +++++++++++-
 tools/perf/util/bpf_skel/func_latency.bpf.c | 19 +++++++++++++++++++
 tools/perf/util/ftrace.h                    |  6 ++++--
 4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 834074162a63bf2b..2a786047ba32bcb7 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -43,6 +43,8 @@
 static volatile sig_atomic_t workload_exec_errno;
 static volatile sig_atomic_t done;
 
+static struct stats latency_stats;  /* for tracepoints */
+
 static void sig_handler(int sig __maybe_unused)
 {
 	done = true;
@@ -798,8 +800,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
 		if (i >= NUM_BUCKET || num >= max_latency - min_latency)
 			i = NUM_BUCKET - 1;
 
+		num += min_latency;
 do_inc:
 		buckets[i]++;
+		update_stats(&latency_stats, num);
 
 next:
 		/* empty the line buffer for the next output  */
@@ -894,6 +898,12 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
 	printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
 	       bar_len, bar, bar_total - bar_len, "");
 
+	printf("\n# statistics  (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
+	printf("  total time: %20.0f\n", latency_stats.mean * latency_stats.n);
+	printf("    avg time: %20.0f\n", latency_stats.mean);
+	printf("    max time: %20"PRIu64"\n", latency_stats.max);
+	printf("    min time: %20"PRIu64"\n", latency_stats.min);
+	printf("       count: %20.0f\n", latency_stats.n);
 }
 
 static int prepare_func_latency(struct perf_ftrace *ftrace)
@@ -932,6 +942,8 @@ static int prepare_func_latency(struct perf_ftrace *ftrace)
 	if (fd < 0)
 		pr_err("failed to open trace_pipe\n");
 
+	init_stats(&latency_stats);
+
 	put_tracing_file(trace_file);
 	return fd;
 }
@@ -961,7 +973,7 @@ static int stop_func_latency(struct perf_ftrace *ftrace)
 static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
 {
 	if (ftrace->target.use_bpf)
-		return perf_ftrace__latency_read_bpf(ftrace, buckets);
+		return perf_ftrace__latency_read_bpf(ftrace, buckets, &latency_stats);
 
 	return 0;
 }
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index bc484e65fb8f69ca..25fc280e414ac5fe 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -11,6 +11,7 @@
 #include "util/debug.h"
 #include "util/evlist.h"
 #include "util/bpf_counter.h"
+#include "util/stat.h"
 
 #include "util/bpf_skel/func_latency.skel.h"
 
@@ -86,6 +87,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		}
 	}
 
+	skel->bss->min = INT64_MAX;
+
 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
 							    false, func->name);
 	if (IS_ERR(skel->links.func_begin)) {
@@ -122,7 +125,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
 }
 
 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
-				  int buckets[])
+				  int buckets[], struct stats *stats)
 {
 	int i, fd, err;
 	u32 idx;
@@ -146,6 +149,13 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
 			buckets[idx] += hist[i];
 	}
 
+	if (skel->bss->count) {
+		stats->mean = skel->bss->total / skel->bss->count;
+		stats->n = skel->bss->count;
+		stats->max = skel->bss->max;
+		stats->min = skel->bss->min;
+	}
+
 	free(hist);
 	return 0;
 }
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index 4df54e1b1411b10a..fb144811b34fc72a 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -38,6 +38,12 @@ struct {
 
 int enabled = 0;
 
+// stats
+__s64 total;
+__s64 count;
+__s64 max;
+__s64 min;
+
 const volatile int has_cpu = 0;
 const volatile int has_task = 0;
 const volatile int use_nsec = 0;
@@ -122,6 +128,8 @@ int BPF_PROG(func_end)
 					delta >= max_latency - min_latency)
 					key = NUM_BUCKET - 1;
 			}
+
+			delta += min_latency;
 			goto do_lookup;
 		}
 		// calculate index using delta
@@ -136,6 +144,17 @@ int BPF_PROG(func_end)
 			return 0;
 
 		*hist += 1;
+
+		if (bucket_range == 0)
+			delta /= cmp_base;
+
+		__sync_fetch_and_add(&total, delta);
+		__sync_fetch_and_add(&count, 1);
+
+		if (delta > max)
+			max = delta;
+		if (delta < min)
+			min = delta;
 	}
 
 	return 0;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index f218703063f74786..5dee2caba0fe403f 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -7,6 +7,7 @@
 
 struct evlist;
 struct hashamp;
+struct stats;
 
 struct perf_ftrace {
 	struct evlist		*evlist;
@@ -46,7 +47,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
-				  int buckets[]);
+				  int buckets[], struct stats *stats);
 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
 
 #else  /* !HAVE_BPF_SKEL */
@@ -71,7 +72,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
 
 static inline int
 perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
-			      int buckets[] __maybe_unused)
+			      int buckets[] __maybe_unused,
+			      struct stats *stats __maybe_unused)
 {
 	return -1;
 }
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/3] perf ftrace: Add --graph-opts option to profile subcommand
  2025-01-07 22:43 [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Namhyung Kim
@ 2025-01-07 22:43 ` Namhyung Kim
  2025-01-07 22:43 ` [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts Namhyung Kim
  2025-01-08 14:56 ` [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Arnaldo Carvalho de Melo
  2 siblings, 0 replies; 5+ messages in thread
From: Namhyung Kim @ 2025-01-07 22:43 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, James Clark

Like trace subcommand, it should be able to pass some options to control
the tracing behavior for the function graph tracer.  But some options
are limited in order to maintain the internal behavior.

For example, it can limit the function call depth like below:

  # perf ftrace profile --graph-opts depth=5 -- myprog

Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/Documentation/perf-ftrace.txt | 8 ++++++++
 tools/perf/builtin-ftrace.c              | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index eccc0483f7faecad..b77f58c4d2fdcff9 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -201,6 +201,14 @@ OPTIONS for 'perf ftrace profile'
 	Sort the result by the given field.  Available values are:
 	total, avg, max, count, name.  Default is 'total'.
 
+--graph-opts::
+	List of options allowed to set:
+
+	  - nosleep-time - Measure on-CPU time only for function_graph tracer.
+	  - noirqs       - Ignore functions that happen inside interrupt.
+	  - thresh=<n>   - Setup trace duration threshold in microseconds.
+	  - depth=<n>    - Set max depth for function graph tracer to follow.
+
 
 SEE ALSO
 --------
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 2a786047ba32bcb7..08c1cc429b27e169 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1064,6 +1064,7 @@ static int prepare_func_profile(struct perf_ftrace *ftrace)
 {
 	ftrace->tracer = "function_graph";
 	ftrace->graph_tail = 1;
+	ftrace->graph_verbose = 0;
 
 	ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
 	if (ftrace->profile_hash == NULL)
@@ -1650,6 +1651,9 @@ int cmd_ftrace(int argc, const char **argv)
 	OPT_CALLBACK('s', "sort", &profile_sort, "key",
 		     "Sort result by key: total (default), avg, max, count, name.",
 		     parse_sort_key),
+	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
+		     "Graph tracer options, available options: nosleep-time,noirqs,thresh=<n>,depth=<n>",
+		     parse_graph_tracer_opts),
 	OPT_PARENT(common_options),
 	};
 	const struct option *options = ftrace_options;
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts
  2025-01-07 22:43 [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Namhyung Kim
  2025-01-07 22:43 ` [PATCH v2 2/3] perf ftrace: Add --graph-opts option to profile subcommand Namhyung Kim
@ 2025-01-07 22:43 ` Namhyung Kim
  2025-01-08 19:52   ` Arnaldo Carvalho de Melo
  2025-01-08 14:56 ` [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Arnaldo Carvalho de Melo
  2 siblings, 1 reply; 5+ messages in thread
From: Namhyung Kim @ 2025-01-07 22:43 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Thomas Richter, James Clark

I found it failed on machines with limited memory because 16M byte
per-cpu buffer is too big.  The reason it added the option is not to
miss tracing data.  Thus we can limit the data size by reducing the
function call depth instead of increasing the buffer size to handle the
whole data.

As it used the same option in the test_ftrace_trace() and it was able
to find the sleep function, it should work with the profile subcommand.

Get rid of other grep commands which might be affected by the depth
change.

Cc: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/tests/shell/ftrace.sh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh
index 2df05052c324a21c..c243731d2fbf67aa 100755
--- a/tools/perf/tests/shell/ftrace.sh
+++ b/tools/perf/tests/shell/ftrace.sh
@@ -67,11 +67,8 @@ test_ftrace_latency() {
 
 test_ftrace_profile() {
     echo "perf ftrace profile test"
-    perf ftrace profile -m 16M sleep 0.1 > "${output}"
+    perf ftrace profile --graph-opts depth=5 sleep 0.1 > "${output}"
     grep ^# "${output}"
-    grep sleep "${output}"
-    grep schedule "${output}"
-    grep execve "${output}"
     time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}"
     # 100283.000 100283.000 100283.000          1   __x64_sys_clock_nanosleep
     # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds
-- 
2.47.1.613.gc27f4b7a9f-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/3] perf ftrace: Display latency statistics at the end
  2025-01-07 22:43 [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Namhyung Kim
  2025-01-07 22:43 ` [PATCH v2 2/3] perf ftrace: Add --graph-opts option to profile subcommand Namhyung Kim
  2025-01-07 22:43 ` [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts Namhyung Kim
@ 2025-01-08 14:56 ` Arnaldo Carvalho de Melo
  2 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-01-08 14:56 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users, James Clark, Gabriele Monaco,
	Clark Williams

On Tue, Jan 07, 2025 at 02:43:50PM -0800, Namhyung Kim wrote:
> Sometimes users also want to see average latency as well as histogram.
> Display latency statistics like avg, max, min at the end.


Thanks, applied to perf-tools-next, also CCing Gabriele, since he did
work on this tool recently.

- Arnaldo
 
>   $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ...
>   #   DURATION     |      COUNT | GRAPH                                          |
>        0 - 1    us |          0 |                                                |
>        1 - 2    us |          0 |                                                |
>        2 - 4    us |          0 |                                                |
>        4 - 8    us |          0 |                                                |
>        8 - 16   us |          0 |                                                |
>       16 - 32   us |          0 |                                                |
>       32 - 64   us |          0 |                                                |
>       64 - 128  us |          0 |                                                |
>      128 - 256  us |          0 |                                                |
>      256 - 512  us |          0 |                                                |
>      512 - 1024 us |          0 |                                                |
>        1 - 2    ms |          0 |                                                |
>        2 - 4    ms |          0 |                                                |
>        4 - 8    ms |          0 |                                                |
>        8 - 16   ms |          1 | #####                                          |
>       16 - 32   ms |          7 | ########################################       |
>       32 - 64   ms |          0 |                                                |
>       64 - 128  ms |          0 |                                                |
>      128 - 256  ms |          0 |                                                |
>      256 - 512  ms |          0 |                                                |
>      512 - 1024 ms |          0 |                                                |
>        1 - ...   s |          0 |                                                |
> 
>   # statistics  (in usec)
>     total time:               171832
>       avg time:                21479
>       max time:                30906
>       min time:                15869
>          count:                    8
> 
> Reviewed-by: James Clark <james.clark@linaro.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/builtin-ftrace.c                 | 14 +++++++++++++-
>  tools/perf/util/bpf_ftrace.c                | 12 +++++++++++-
>  tools/perf/util/bpf_skel/func_latency.bpf.c | 19 +++++++++++++++++++
>  tools/perf/util/ftrace.h                    |  6 ++++--
>  4 files changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index 834074162a63bf2b..2a786047ba32bcb7 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -43,6 +43,8 @@
>  static volatile sig_atomic_t workload_exec_errno;
>  static volatile sig_atomic_t done;
>  
> +static struct stats latency_stats;  /* for tracepoints */
> +
>  static void sig_handler(int sig __maybe_unused)
>  {
>  	done = true;
> @@ -798,8 +800,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
>  		if (i >= NUM_BUCKET || num >= max_latency - min_latency)
>  			i = NUM_BUCKET - 1;
>  
> +		num += min_latency;
>  do_inc:
>  		buckets[i]++;
> +		update_stats(&latency_stats, num);
>  
>  next:
>  		/* empty the line buffer for the next output  */
> @@ -894,6 +898,12 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
>  	printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
>  	       bar_len, bar, bar_total - bar_len, "");
>  
> +	printf("\n# statistics  (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
> +	printf("  total time: %20.0f\n", latency_stats.mean * latency_stats.n);
> +	printf("    avg time: %20.0f\n", latency_stats.mean);
> +	printf("    max time: %20"PRIu64"\n", latency_stats.max);
> +	printf("    min time: %20"PRIu64"\n", latency_stats.min);
> +	printf("       count: %20.0f\n", latency_stats.n);
>  }
>  
>  static int prepare_func_latency(struct perf_ftrace *ftrace)
> @@ -932,6 +942,8 @@ static int prepare_func_latency(struct perf_ftrace *ftrace)
>  	if (fd < 0)
>  		pr_err("failed to open trace_pipe\n");
>  
> +	init_stats(&latency_stats);
> +
>  	put_tracing_file(trace_file);
>  	return fd;
>  }
> @@ -961,7 +973,7 @@ static int stop_func_latency(struct perf_ftrace *ftrace)
>  static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
>  {
>  	if (ftrace->target.use_bpf)
> -		return perf_ftrace__latency_read_bpf(ftrace, buckets);
> +		return perf_ftrace__latency_read_bpf(ftrace, buckets, &latency_stats);
>  
>  	return 0;
>  }
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> index bc484e65fb8f69ca..25fc280e414ac5fe 100644
> --- a/tools/perf/util/bpf_ftrace.c
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -11,6 +11,7 @@
>  #include "util/debug.h"
>  #include "util/evlist.h"
>  #include "util/bpf_counter.h"
> +#include "util/stat.h"
>  
>  #include "util/bpf_skel/func_latency.skel.h"
>  
> @@ -86,6 +87,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
>  		}
>  	}
>  
> +	skel->bss->min = INT64_MAX;
> +
>  	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
>  							    false, func->name);
>  	if (IS_ERR(skel->links.func_begin)) {
> @@ -122,7 +125,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
>  }
>  
>  int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
> -				  int buckets[])
> +				  int buckets[], struct stats *stats)
>  {
>  	int i, fd, err;
>  	u32 idx;
> @@ -146,6 +149,13 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
>  			buckets[idx] += hist[i];
>  	}
>  
> +	if (skel->bss->count) {
> +		stats->mean = skel->bss->total / skel->bss->count;
> +		stats->n = skel->bss->count;
> +		stats->max = skel->bss->max;
> +		stats->min = skel->bss->min;
> +	}
> +
>  	free(hist);
>  	return 0;
>  }
> diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> index 4df54e1b1411b10a..fb144811b34fc72a 100644
> --- a/tools/perf/util/bpf_skel/func_latency.bpf.c
> +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> @@ -38,6 +38,12 @@ struct {
>  
>  int enabled = 0;
>  
> +// stats
> +__s64 total;
> +__s64 count;
> +__s64 max;
> +__s64 min;
> +
>  const volatile int has_cpu = 0;
>  const volatile int has_task = 0;
>  const volatile int use_nsec = 0;
> @@ -122,6 +128,8 @@ int BPF_PROG(func_end)
>  					delta >= max_latency - min_latency)
>  					key = NUM_BUCKET - 1;
>  			}
> +
> +			delta += min_latency;
>  			goto do_lookup;
>  		}
>  		// calculate index using delta
> @@ -136,6 +144,17 @@ int BPF_PROG(func_end)
>  			return 0;
>  
>  		*hist += 1;
> +
> +		if (bucket_range == 0)
> +			delta /= cmp_base;
> +
> +		__sync_fetch_and_add(&total, delta);
> +		__sync_fetch_and_add(&count, 1);
> +
> +		if (delta > max)
> +			max = delta;
> +		if (delta < min)
> +			min = delta;
>  	}
>  
>  	return 0;
> diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
> index f218703063f74786..5dee2caba0fe403f 100644
> --- a/tools/perf/util/ftrace.h
> +++ b/tools/perf/util/ftrace.h
> @@ -7,6 +7,7 @@
>  
>  struct evlist;
>  struct hashamp;
> +struct stats;
>  
>  struct perf_ftrace {
>  	struct evlist		*evlist;
> @@ -46,7 +47,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
>  int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
>  int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
>  int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
> -				  int buckets[]);
> +				  int buckets[], struct stats *stats);
>  int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
>  
>  #else  /* !HAVE_BPF_SKEL */
> @@ -71,7 +72,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
>  
>  static inline int
>  perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
> -			      int buckets[] __maybe_unused)
> +			      int buckets[] __maybe_unused,
> +			      struct stats *stats __maybe_unused)
>  {
>  	return -1;
>  }
> -- 
> 2.47.1.613.gc27f4b7a9f-goog

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts
  2025-01-07 22:43 ` [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts Namhyung Kim
@ 2025-01-08 19:52   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-01-08 19:52 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users, Thomas Richter, James Clark,
	Alexander Gordeev, Heiko Carstens, Sumanth Korikkar,
	Vasily Gorbik

On Tue, Jan 07, 2025 at 02:43:52PM -0800, Namhyung Kim wrote:
> I found it failed on machines with limited memory because 16M byte
> per-cpu buffer is too big.  The reason it added the option is not to
> miss tracing data.  Thus we can limit the data size by reducing the
> function call depth instead of increasing the buffer size to handle the
> whole data.
> 
> As it used the same option in the test_ftrace_trace() and it was able
> to find the sleep function, it should work with the profile subcommand.
> 
> Get rid of other grep commands which might be affected by the depth
> change.
> 
> Cc: Thomas Richter <tmricht@linux.ibm.com>

Hey, I think we should convert the above Cc: to:

Reported-by: Thomas Richter <tmricht@linux.ibm.com>

As I think you became aware of this situation from a patch proposed by
Thomas?

I'm also CCing a few other folks that were CCed in Thomas patch, also
CCed here.

- Arnaldo

> Reviewed-by: James Clark <james.clark@linaro.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/tests/shell/ftrace.sh | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh
> index 2df05052c324a21c..c243731d2fbf67aa 100755
> --- a/tools/perf/tests/shell/ftrace.sh
> +++ b/tools/perf/tests/shell/ftrace.sh
> @@ -67,11 +67,8 @@ test_ftrace_latency() {
>  
>  test_ftrace_profile() {
>      echo "perf ftrace profile test"
> -    perf ftrace profile -m 16M sleep 0.1 > "${output}"
> +    perf ftrace profile --graph-opts depth=5 sleep 0.1 > "${output}"
>      grep ^# "${output}"
> -    grep sleep "${output}"
> -    grep schedule "${output}"
> -    grep execve "${output}"
>      time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}"
>      # 100283.000 100283.000 100283.000          1   __x64_sys_clock_nanosleep
>      # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds
> -- 
> 2.47.1.613.gc27f4b7a9f-goog

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-01-08 19:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-07 22:43 [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Namhyung Kim
2025-01-07 22:43 ` [PATCH v2 2/3] perf ftrace: Add --graph-opts option to profile subcommand Namhyung Kim
2025-01-07 22:43 ` [PATCH v2 3/3] perf test: Update ftrace test to use --graph-opts Namhyung Kim
2025-01-08 19:52   ` Arnaldo Carvalho de Melo
2025-01-08 14:56 ` [PATCH v2 1/3] perf ftrace: Display latency statistics at the end Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).