* [PATCH 1/2] perf ftrace latency: variable histogram buckets
@ 2025-02-07 8:04 Gabriele Monaco
2025-02-07 8:04 ` [PATCH 2/2] perf ftrace latency: allow to hide empty buckets Gabriele Monaco
2025-02-27 16:50 ` [PATCH 1/2] perf ftrace latency: variable histogram buckets Namhyung Kim
0 siblings, 2 replies; 3+ messages in thread
From: Gabriele Monaco @ 2025-02-07 8:04 UTC (permalink / raw)
To: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim,
linux-perf-users, bpf
Cc: Gabriele Monaco
The max-latency value can make the histogram smaller, but not larger, we
have a maximum of 22 buckets and specifying a max-latency that would
require more buckets has no effect.
Dynamically allocate the buckets and compute the bucket number from the
max latency as (max-min) / range + 2
If the maximum is not specified, we still set the bucket number to 22
and compute the maximum accordingly.
Fail if the maximum is smaller than min+range, this way we make sure we
always have 3 buckets: those below min, those above max and one in the
middle.
Since max-latency is not available in log2 mode, always use 22 buckets.
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
tools/perf/builtin-ftrace.c | 57 +++++++++++++++------
tools/perf/util/bpf_ftrace.c | 6 ++-
tools/perf/util/bpf_skel/func_latency.bpf.c | 7 +--
tools/perf/util/ftrace.h | 1 +
4 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index cfd770ec72867..4f76094ea06d4 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -733,6 +733,7 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
{
int min_latency = ftrace->min_latency;
int max_latency = ftrace->max_latency;
+ unsigned int bucket_num = ftrace->bucket_num;
char *p, *q;
char *unit;
double num;
@@ -797,10 +798,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
i = num / ftrace->bucket_range + 1;
if (num >= max_latency - min_latency)
- i = NUM_BUCKET -1;
+ i = bucket_num -1;
}
- if (i >= NUM_BUCKET)
- i = NUM_BUCKET - 1;
+ if ((unsigned)i >= bucket_num)
+ i = bucket_num - 1;
num += min_latency;
do_inc:
@@ -820,13 +821,14 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
{
int min_latency = ftrace->min_latency;
bool use_nsec = ftrace->use_nsec;
- int i;
+ unsigned int bucket_num = ftrace->bucket_num;
+ unsigned int i;
int total = 0;
int bar_total = 46; /* to fit in 80 column */
char bar[] = "###############################################";
int bar_len;
- for (i = 0; i < NUM_BUCKET; i++)
+ for (i = 0; i < bucket_num; i++)
total += buckets[i];
if (total == 0) {
@@ -843,7 +845,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
0, min_latency ?: 1, use_nsec ? "ns" : "us",
buckets[0], bar_len, bar, bar_total - bar_len, "");
- for (i = 1; i < NUM_BUCKET - 1; i++) {
+ for (i = 1; i < bucket_num - 1; i++) {
unsigned int start, stop;
const char *unit = use_nsec ? "ns" : "us";
@@ -881,11 +883,11 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
bar_total - bar_len, "");
}
- bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+ bar_len = buckets[bucket_num - 1] * bar_total / total;
if (!ftrace->bucket_range) {
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
} else {
- unsigned int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency;
+ unsigned int upper_outlier = (bucket_num - 2) * ftrace->bucket_range + min_latency;
if (upper_outlier > ftrace->max_latency)
upper_outlier = ftrace->max_latency;
@@ -897,7 +899,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
}
}
- printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
+ printf(" | %10d | %.*s%*s |\n", buckets[bucket_num - 1],
bar_len, bar, bar_total - bar_len, "");
printf("\n# statistics (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
@@ -997,7 +999,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
struct pollfd pollfd = {
.events = POLLIN,
};
- int buckets[NUM_BUCKET] = { };
+ int *buckets;
trace_fd = prepare_func_latency(ftrace);
if (trace_fd < 0)
@@ -1011,6 +1013,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
evlist__start_workload(ftrace->evlist);
+ buckets = calloc(ftrace->bucket_num, sizeof(*buckets));
+ if (buckets == NULL) {
+ pr_err("failed to allocate memory for the buckets\n");
+ goto out;
+ }
+
line[0] = '\0';
while (!done) {
if (poll(&pollfd, 1, -1) < 0)
@@ -1030,7 +1038,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
pr_err("workload failed: %s\n", emsg);
- goto out;
+ goto out_free_buckets;
}
/* read remaining buffer contents */
@@ -1045,6 +1053,8 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
display_histogram(ftrace, buckets);
+out_free_buckets:
+ free(buckets);
out:
close(trace_fd);
cleanup_func_latency(ftrace);
@@ -1634,7 +1644,7 @@ int cmd_ftrace(int argc, const char **argv)
OPT_UINTEGER(0, "min-latency", &ftrace.min_latency,
"Minimum latency (1st bucket). Works only with --bucket-range."),
OPT_UINTEGER(0, "max-latency", &ftrace.max_latency,
- "Maximum latency (last bucket). Works only with --bucket-range and total buckets less than 22."),
+ "Maximum latency (last bucket). Works only with --bucket-range."),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
@@ -1751,10 +1761,25 @@ int cmd_ftrace(int argc, const char **argv)
ret = -EINVAL;
goto out_delete_filters;
}
- if (ftrace.bucket_range && !ftrace.max_latency) {
- /* default max latency should depend on bucket range and num_buckets */
- ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
- ftrace.min_latency;
+ if (ftrace.bucket_range && ftrace.max_latency &&
+ ftrace.max_latency < ftrace.min_latency + ftrace.bucket_range) {
+ /* we need at least 1 bucket excluding min and max buckets */
+ pr_err("--max-latency must be larger than min-latency + bucket-range\n");
+ parse_options_usage(ftrace_usage, options,
+ "max-latency", /*short_opt=*/false);
+ ret = -EINVAL;
+ goto out_delete_filters;
+ }
+ /* set default unless max_latency is set and valid */
+ ftrace.bucket_num = NUM_BUCKET;
+ if (ftrace.bucket_range) {
+ if (ftrace.max_latency)
+ ftrace.bucket_num = (ftrace.max_latency - ftrace.min_latency) /
+ ftrace.bucket_range + 2;
+ else
+ /* default max latency should depend on bucket range and num_buckets */
+ ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
+ ftrace.min_latency;
}
cmd_func = __cmd_latency;
break;
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 25fc280e414ac..51f407a782d6c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
skel->rodata->bucket_range = ftrace->bucket_range;
skel->rodata->min_latency = ftrace->min_latency;
+ skel->rodata->bucket_num = ftrace->bucket_num;
+ if (ftrace->bucket_range && ftrace->bucket_num) {
+ bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
+ }
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
@@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
if (hist == NULL)
return -ENOMEM;
- for (idx = 0; idx < NUM_BUCKET; idx++) {
+ for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
err = bpf_map_lookup_elem(fd, &idx, hist);
if (err) {
buckets[idx] = 0;
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index fb144811b34fc..09e70d40a0f4d 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -50,6 +50,7 @@ const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
+const volatile unsigned int bucket_num = NUM_BUCKET;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -124,16 +125,16 @@ int BPF_PROG(func_end)
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
// clang 12 doesn't like s64 / u32 division
key = (__u64)delta / bucket_range + 1;
- if (key >= NUM_BUCKET ||
+ if (key >= bucket_num ||
delta >= max_latency - min_latency)
- key = NUM_BUCKET - 1;
+ key = bucket_num - 1;
}
delta += min_latency;
goto do_lookup;
}
// calculate index using delta
- for (key = 0; key < (NUM_BUCKET - 1); key++) {
+ for (key = 0; key < (bucket_num - 1); key++) {
if (delta < (cmp_base << key))
break;
}
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 5dee2caba0fe4..395f97b203ead 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -24,6 +24,7 @@ struct perf_ftrace {
unsigned int bucket_range;
unsigned int min_latency;
unsigned int max_latency;
+ unsigned int bucket_num;
int graph_depth;
int func_stack_trace;
int func_irq_info;
base-commit: 92514ef226f511f2ca1fb1b8752966097518edc0
--
2.48.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] perf ftrace latency: allow to hide empty buckets
2025-02-07 8:04 [PATCH 1/2] perf ftrace latency: variable histogram buckets Gabriele Monaco
@ 2025-02-07 8:04 ` Gabriele Monaco
2025-02-27 16:50 ` [PATCH 1/2] perf ftrace latency: variable histogram buckets Namhyung Kim
1 sibling, 0 replies; 3+ messages in thread
From: Gabriele Monaco @ 2025-02-07 8:04 UTC (permalink / raw)
To: linux-kernel, Arnaldo Carvalho de Melo, Namhyung Kim,
linux-perf-users
Cc: Gabriele Monaco
Especially while using several buckets, it isn't uncommon to have some
of them empty and reading the histogram may be a bit more complex:
# perf ftrace latency -a -T mutex_lock --bucket-range 5 --max-latency 200
# DURATION | COUNT | GRAPH |
0 - 5 us | 14816 | ###################################### |
5 - 10 us | 1228 | ### |
10 - 15 us | 438 | # |
15 - 20 us | 106 | |
20 - 25 us | 21 | |
25 - 30 us | 11 | |
30 - 35 us | 1 | |
35 - 40 us | 2 | |
40 - 45 us | 4 | |
45 - 50 us | 0 | |
50 - 55 us | 1 | |
55 - 60 us | 0 | |
60 - 65 us | 1 | |
65 - 70 us | 1 | |
70 - 75 us | 1 | |
75 - 80 us | 2 | |
80 - 85 us | 0 | |
85 - 90 us | 1 | |
90 - 95 us | 0 | |
95 - 100 us | 1 | |
100 - 105 us | 0 | |
105 - 110 us | 0 | |
110 - 115 us | 0 | |
115 - 120 us | 0 | |
120 - 125 us | 1 | |
125 - 130 us | 0 | |
130 - 135 us | 0 | |
135 - 140 us | 1 | |
140 - 145 us | 0 | |
145 - 150 us | 0 | |
150 - 155 us | 0 | |
155 - 160 us | 0 | |
160 - 165 us | 0 | |
165 - 170 us | 0 | |
170 - 175 us | 0 | |
175 - 180 us | 0 | |
180 - 185 us | 0 | |
185 - 190 us | 0 | |
190 - 195 us | 0 | |
195 - 200 us | 0 | |
200 - ... us | 2 | |
Allow the optional flag --hide-empty to remove buckets with no element
and produce a more compact graph. This feature could be misleading since
there is no clear indication for missing buckets, for this reason it's
disabled by default.
# perf ftrace latency -a -T mutex_lock --bucket-range 5 --max-latency --hide-empty 200
# DURATION | COUNT | GRAPH |
0 - 5 us | 14816 | ###################################### |
5 - 10 us | 1228 | ### |
10 - 15 us | 438 | # |
15 - 20 us | 106 | |
20 - 25 us | 21 | |
25 - 30 us | 11 | |
30 - 35 us | 1 | |
35 - 40 us | 2 | |
40 - 45 us | 4 | |
50 - 55 us | 1 | |
60 - 65 us | 1 | |
65 - 70 us | 1 | |
70 - 75 us | 1 | |
75 - 80 us | 2 | |
85 - 90 us | 1 | |
95 - 100 us | 1 | |
120 - 125 us | 1 | |
135 - 140 us | 1 | |
200 - ... us | 2 | |
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
tools/perf/builtin-ftrace.c | 14 +++++++++++---
tools/perf/util/ftrace.h | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 4f76094ea06d4..7caa18d5ffc38 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -841,14 +841,17 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
bar_len = buckets[0] * bar_total / total;
- printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
- 0, min_latency ?: 1, use_nsec ? "ns" : "us",
- buckets[0], bar_len, bar, bar_total - bar_len, "");
+ if (!ftrace->hide_empty || buckets[0])
+ printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
+ 0, min_latency ?: 1, use_nsec ? "ns" : "us",
+ buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < bucket_num - 1; i++) {
unsigned int start, stop;
const char *unit = use_nsec ? "ns" : "us";
+ if (ftrace->hide_empty && !buckets[i])
+ continue;
if (!ftrace->bucket_range) {
start = (1 << (i - 1));
stop = 1 << i;
@@ -884,6 +887,8 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
}
bar_len = buckets[bucket_num - 1] * bar_total / total;
+ if (ftrace->hide_empty && !buckets[bucket_num - 1])
+ goto print_stats;
if (!ftrace->bucket_range) {
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
} else {
@@ -902,6 +907,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
printf(" | %10d | %.*s%*s |\n", buckets[bucket_num - 1],
bar_len, bar, bar_total - bar_len, "");
+print_stats:
printf("\n# statistics (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
printf(" total time: %20.0f\n", latency_stats.mean * latency_stats.n);
printf(" avg time: %20.0f\n", latency_stats.mean);
@@ -1645,6 +1651,8 @@ int cmd_ftrace(int argc, const char **argv)
"Minimum latency (1st bucket). Works only with --bucket-range."),
OPT_UINTEGER(0, "max-latency", &ftrace.max_latency,
"Maximum latency (last bucket). Works only with --bucket-range."),
+ OPT_BOOLEAN(0, "hide-empty", &ftrace.hide_empty,
+ "Hide empty buckets in the histogram"),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index 395f97b203ead..a9bc47da83a56 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -25,6 +25,7 @@ struct perf_ftrace {
unsigned int min_latency;
unsigned int max_latency;
unsigned int bucket_num;
+ bool hide_empty;
int graph_depth;
int func_stack_trace;
int func_irq_info;
--
2.48.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/2] perf ftrace latency: variable histogram buckets
2025-02-07 8:04 [PATCH 1/2] perf ftrace latency: variable histogram buckets Gabriele Monaco
2025-02-07 8:04 ` [PATCH 2/2] perf ftrace latency: allow to hide empty buckets Gabriele Monaco
@ 2025-02-27 16:50 ` Namhyung Kim
1 sibling, 0 replies; 3+ messages in thread
From: Namhyung Kim @ 2025-02-27 16:50 UTC (permalink / raw)
To: linux-kernel, Arnaldo Carvalho de Melo, linux-perf-users, bpf,
Gabriele Monaco
On Fri, 07 Feb 2025 09:04:44 +0100, Gabriele Monaco wrote:
> The max-latency value can make the histogram smaller, but not larger, we
> have a maximum of 22 buckets and specifying a max-latency that would
> require more buckets has no effect.
>
> Dynamically allocate the buckets and compute the bucket number from the
> max latency as (max-min) / range + 2
>
> [...]
Applied to perf-tools-next, thanks!
Best regards,
Namhyung
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-02-27 16:50 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-07 8:04 [PATCH 1/2] perf ftrace latency: variable histogram buckets Gabriele Monaco
2025-02-07 8:04 ` [PATCH 2/2] perf ftrace latency: allow to hide empty buckets Gabriele Monaco
2025-02-27 16:50 ` [PATCH 1/2] perf ftrace latency: variable histogram buckets Namhyung Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).