From: Dmitry Vyukov <dvyukov@google.com>
To: namhyung@kernel.org, irogers@google.com
Cc: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
Dmitry Vyukov <dvyukov@google.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>
Subject: [PATCH v3 6/7] perf report: Add --latency flag
Date: Mon, 27 Jan 2025 10:58:53 +0100 [thread overview]
Message-ID: <70523ae7dd5d5c41d2d954324297d9d2cfad1b1f.1737971364.git.dvyukov@google.com> (raw)
In-Reply-To: <cover.1737971364.git.dvyukov@google.com>
Add record/report --latency flag that allows to capture and show
latency-centric profiles rather than the default CPU-consumption-centric
profiles. For latency profiles record captures context switch events,
and report shows Latency as the first column.
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
tools/perf/builtin-record.c | 20 +++++++++++++++++
tools/perf/builtin-report.c | 32 +++++++++++++++++++++++----
tools/perf/ui/hist.c | 41 ++++++++++++++++++++++++++++-------
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 33 +++++++++++++++++++++++-----
tools/perf/util/sort.h | 2 +-
tools/perf/util/symbol_conf.h | 4 +++-
7 files changed, 113 insertions(+), 20 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5db1aedf48df9..e219639ac401b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -161,6 +161,7 @@ struct record {
struct evlist *sb_evlist;
pthread_t thread_id;
int realtime_prio;
+ bool latency;
bool switch_output_event_set;
bool no_buildid;
bool no_buildid_set;
@@ -3371,6 +3372,9 @@ static struct option __record_options[] = {
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
+ OPT_BOOLEAN(0, "latency", &record.latency,
+ "Enable data collection for latency profiling.\n"
+ "\t\t\t Use perf report --latency for latency-centric profile."),
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
NULL, "don't record events from perf itself",
exclude_perf),
@@ -4017,6 +4021,22 @@ int cmd_record(int argc, const char **argv)
}
+ if (record.latency) {
+ /*
+ * There is no fundamental reason why latency profiling
+ * can't work for system-wide mode, but exact semantics
+ * and details are to be defined.
+ * See the following thread for details:
+ * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
+ */
+ if (record.opts.target.system_wide) {
+ pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
+ err = -EINVAL;
+ goto out_opts;
+ }
+ record.opts.record_switch_events = true;
+ }
+
if (rec->buildid_mmap) {
if (!perf_can_record_build_id()) {
pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a19abdc869a1..69de6dbefecfa 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -112,6 +112,8 @@ struct report {
u64 nr_entries;
u64 queue_size;
u64 total_cycles;
+ u64 total_samples;
+ u64 singlethreaded_samples;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
@@ -331,6 +333,10 @@ static int process_sample_event(const struct perf_tool *tool,
&rep->total_cycles, evsel);
}
+ rep->total_samples++;
+ if (al.parallelism == 1)
+ rep->singlethreaded_samples++;
+
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
@@ -1079,6 +1085,11 @@ static int __cmd_report(struct report *rep)
return ret;
}
+ /* Don't show Latency column for non-parallel profiles by default. */
+ if (rep->singlethreaded_samples * 100 / rep->total_samples >= 99 &&
+ !symbol_conf.prefer_latency)
+ perf_hpp__cancel_latency();
+
evlist__check_mem_load_aux(session->evlist);
if (rep->stats_mode)
@@ -1468,6 +1479,10 @@ int cmd_report(int argc, const char **argv)
"Disable raw trace ordering"),
OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
"Do not display empty (or dummy) events in the output"),
+ OPT_BOOLEAN(0, "latency", &symbol_conf.prefer_latency,
+ "Show latency-centric profile rather than the default\n"
+ "\t\t\t CPU-consumption-centric profile\n"
+ "\t\t\t (requires perf record --latency flag)."),
OPT_END()
};
struct perf_data data = {
@@ -1722,16 +1737,25 @@ int cmd_report(int argc, const char **argv)
symbol_conf.annotate_data_sample = true;
}
+ symbol_conf.enable_latency = true;
if (report.disable_order || !perf_session__has_switch_events(session)) {
if (symbol_conf.parallelism_list_str ||
- (sort_order && strstr(sort_order, "parallelism")) ||
- (field_order && strstr(field_order, "parallelism"))) {
+ symbol_conf.prefer_latency ||
+ (sort_order && (strstr(sort_order, "latency") ||
+ strstr(sort_order, "parallelism"))) ||
+ (field_order && (strstr(field_order, "latency") ||
+ strstr(field_order, "parallelism")))) {
if (report.disable_order)
- ui__error("Use of parallelism is incompatible with --disable-order.\n");
+ ui__error("Use of latency profile or parallelism is incompatible with --disable-order.\n");
else
- ui__error("Use of parallelism requires --switch-events during record.\n");
+ ui__error("Use of latency profile or parallelism requires --latency flag during record.\n");
return -1;
}
+ /*
+ * If user did not ask for anything related to
+ * latency/parallelism explicitly, just don't show it.
+ */
+ symbol_conf.enable_latency = false;
}
if (sort_order && strstr(sort_order, "ipc")) {
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 22e31d835301e..d87046052b432 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -632,27 +632,36 @@ void perf_hpp__init(void)
return;
if (symbol_conf.cumulate_callchain) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC);
+ /* Use idempotent addition to avoid more complex logic. */
+ if (symbol_conf.prefer_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY_ACC, true);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC, true);
+ if (symbol_conf.enable_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY_ACC, true);
perf_hpp__format[PERF_HPP__OVERHEAD].name = "Self";
}
- hpp_dimension__add_output(PERF_HPP__OVERHEAD);
+ if (symbol_conf.prefer_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY, true);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD, true);
+ if (symbol_conf.enable_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY, true);
if (symbol_conf.show_cpu_utilization) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS);
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_US);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS, false);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_US, false);
if (perf_guest) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS);
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS, false);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US, false);
}
}
if (symbol_conf.show_nr_samples)
- hpp_dimension__add_output(PERF_HPP__SAMPLES);
+ hpp_dimension__add_output(PERF_HPP__SAMPLES, false);
if (symbol_conf.show_total_period)
- hpp_dimension__add_output(PERF_HPP__PERIOD);
+ hpp_dimension__add_output(PERF_HPP__PERIOD, false);
}
void perf_hpp_list__column_register(struct perf_hpp_list *list,
@@ -701,6 +710,22 @@ void perf_hpp__cancel_cumulate(void)
}
}
+void perf_hpp__cancel_latency(void)
+{
+ struct perf_hpp_fmt *fmt, *lat, *acc, *tmp;
+
+ if (is_strict_order(field_order) || is_strict_order(sort_order))
+ return;
+
+ lat = &perf_hpp__format[PERF_HPP__LATENCY];
+ acc = &perf_hpp__format[PERF_HPP__LATENCY_ACC];
+
+ perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+ if (fmt_equal(lat, fmt) || fmt_equal(acc, fmt))
+ perf_hpp__column_unregister(fmt);
+ }
+}
+
void perf_hpp__setup_output_field(struct perf_hpp_list *list)
{
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 91159f16c60b2..29d4c7a3d1747 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -582,6 +582,7 @@ enum {
void perf_hpp__init(void);
void perf_hpp__cancel_cumulate(void);
+void perf_hpp__cancel_latency(void);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index bc4c3acfe7552..2b6023de7a53a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2622,6 +2622,7 @@ struct hpp_dimension {
const char *name;
struct perf_hpp_fmt *fmt;
int taken;
+ int was_taken;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
@@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
return -1;
hd->taken = 1;
+ hd->was_taken = 1;
perf_hpp_list__register_sort_field(list, fmt);
return 0;
}
@@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
return 0;
}
-int hpp_dimension__add_output(unsigned col)
+int hpp_dimension__add_output(unsigned col, bool implicit)
{
+ struct hpp_dimension *hd;
+
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+ hd = &hpp_sort_dimensions[col];
+ if (implicit && !hd->was_taken)
+ return 0;
+ return __hpp_dimension__add_output(&perf_hpp_list, hd);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
@@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
if (sort__mode == SORT_MODE__DIFF)
return keys;
- keys = prefix_if_not_in("overhead", keys);
-
- if (symbol_conf.cumulate_callchain)
- keys = prefix_if_not_in("overhead_children", keys);
+ if (symbol_conf.prefer_latency) {
+ keys = prefix_if_not_in("overhead", keys);
+ keys = prefix_if_not_in("latency", keys);
+ if (symbol_conf.cumulate_callchain) {
+ keys = prefix_if_not_in("overhead_children", keys);
+ keys = prefix_if_not_in("latency_children", keys);
+ }
+ } else if (!keys || (!strstr(keys, "overhead") &&
+ !strstr(keys, "latency"))) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency", keys);
+ keys = prefix_if_not_in("overhead", keys);
+ if (symbol_conf.cumulate_callchain) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency_children", keys);
+ keys = prefix_if_not_in("overhead_children", keys);
+ }
+ }
return keys;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 11fb15f914093..180d36a2bea35 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -141,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);
-int hpp_dimension__add_output(unsigned col);
+int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index c5b2e56127e22..cd9aa82c7d5ad 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -49,7 +49,9 @@ struct symbol_conf {
keep_exited_threads,
annotate_data_member,
annotate_data_sample,
- skip_empty;
+ skip_empty,
+ enable_latency,
+ prefer_latency;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
2.48.1.262.g85cc9f2d1e-goog
next prev parent reply other threads:[~2025-01-27 9:59 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-27 9:58 [PATCH v3 0/7] perf report: Add latency and parallelism profiling Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 1/7] perf report: Add machine parallelism Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 2/7] perf report: Add parallelism sort key Dmitry Vyukov
2025-01-29 4:42 ` Namhyung Kim
2025-01-29 7:18 ` Dmitry Vyukov
2025-01-30 5:28 ` Namhyung Kim
2025-02-03 14:40 ` Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 3/7] perf report: Switch filtered from u8 to u16 Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 4/7] perf report: Add parallelism filter Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 5/7] perf report: Add latency output field Dmitry Vyukov
2025-01-29 4:56 ` Namhyung Kim
2025-01-29 6:55 ` Dmitry Vyukov
2025-01-30 5:33 ` Namhyung Kim
2025-01-27 9:58 ` Dmitry Vyukov [this message]
2025-01-29 5:03 ` [PATCH v3 6/7] perf report: Add --latency flag Namhyung Kim
2025-01-29 7:12 ` Dmitry Vyukov
2025-01-30 6:30 ` Namhyung Kim
2025-02-03 14:45 ` Dmitry Vyukov
2025-01-27 9:58 ` [PATCH v3 7/7] perf report: Add latency and parallelism profiling documentation Dmitry Vyukov
2025-01-29 5:05 ` [PATCH v3 0/7] perf report: Add latency and parallelism profiling Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=70523ae7dd5d5c41d2d954324297d9d2cfad1b1f.1737971364.git.dvyukov@google.com \
--to=dvyukov@google.com \
--cc=acme@kernel.org \
--cc=irogers@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=namhyung@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).