* [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:38 ` Arnaldo Carvalho de Melo
2026-05-25 23:18 ` [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks Ian Rogers
` (13 subsequent siblings)
14 siblings, 1 reply; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch introduces the initial infrastructure for decoupling the
perf stat printing API. It declares the struct perf_stat_print_callbacks
interface and the core traversal driver perf_stat__print_cb() inside
the newly created util/stat-print.h and util/stat-print.c files.
The generic traversal driver perf_stat__print_cb() drive traversing
the event lists across all supported cpu aggregation modes (global,
die, socket, cache, cluster, core, thread, none). It implements
the clean display filtering checks (perf_stat__skip_metric_event(),
hybrid wildcard merges) and the basic metrics allowlist filter
(is_basic_shadow_metric()) to keep formatting callbacks decoupled.
This also introduces two format-agnostic shared helpers to centralize
aggregation prefix formatting:
- perf_stat__get_aggr_key(): resolves JSON key names.
- perf_stat__get_aggr_id_char(): formats unified aggregation identifiers.
Adds empty format-specific stubs (perf_stat__print_std, _csv, _json)
to ensure that the new print files link and compile cleanly under util/Build,
without affecting the legacy print path.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/builtin-stat.c | 261 ++++++++--------
tools/perf/util/Build | 4 +
tools/perf/util/stat-display.c | 28 +-
tools/perf/util/stat-print-csv.c | 13 +
tools/perf/util/stat-print-json.c | 13 +
tools/perf/util/stat-print-std.c | 13 +
tools/perf/util/stat-print.c | 490 ++++++++++++++++++++++++++++++
tools/perf/util/stat-print.h | 133 ++++++++
tools/perf/util/stat.h | 2 +
9 files changed, 815 insertions(+), 142 deletions(-)
create mode 100644 tools/perf/util/stat-print-csv.c
create mode 100644 tools/perf/util/stat-print-json.c
create mode 100644 tools/perf/util/stat-print-std.c
create mode 100644 tools/perf/util/stat-print.c
create mode 100644 tools/perf/util/stat-print.h
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 99d7db372b48..ef8f5da99c64 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -40,62 +40,64 @@
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*/
+#include <errno.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <math.h>
+#include <signal.h>
+#include <stdlib.h>
+
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/list_sort.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <api/fs/fs.h>
+#include <internal/threadmap.h>
+#include <perf/evlist.h>
+#include <subcmd/parse-options.h>
+
+#include "asm/bug.h"
#include "builtin.h"
+#include "util/affinity.h"
+#include "util/bpf_counter.h"
#include "util/cgroup.h"
-#include <subcmd/parse-options.h>
-#include "util/parse-events.h"
-#include "util/pmus.h"
-#include "util/pmu.h"
-#include "util/tool_pmu.h"
+#include "util/color.h"
+#include "util/counts.h"
+#include "util/cpumap.h"
+#include "util/debug.h"
#include "util/event.h"
#include "util/evlist.h"
#include "util/evsel.h"
-#include "util/debug.h"
-#include "util/color.h"
-#include "util/stat.h"
#include "util/header.h"
-#include "util/cpumap.h"
-#include "util/thread_map.h"
-#include "util/counts.h"
-#include "util/topdown.h"
+#include "util/intel-tpebs.h"
+#include "util/iostat.h"
+#include "util/metricgroup.h"
+#include "util/parse-events.h"
+#include "util/pfm.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
#include "util/session.h"
-#include "util/tool.h"
+#include "util/stat-print.h"
+#include "util/stat.h"
#include "util/string2.h"
-#include "util/metricgroup.h"
#include "util/synthetic-events.h"
#include "util/target.h"
+#include "util/thread_map.h"
#include "util/time-utils.h"
+#include "util/tool.h"
+#include "util/tool_pmu.h"
#include "util/top.h"
-#include "util/affinity.h"
-#include "util/pfm.h"
-#include "util/bpf_counter.h"
-#include "util/iostat.h"
+#include "util/topdown.h"
#include "util/util.h"
-#include "util/intel-tpebs.h"
-#include "asm/bug.h"
-
-#include <linux/list_sort.h>
-#include <linux/time64.h>
-#include <linux/zalloc.h>
-#include <api/fs/fs.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <sys/prctl.h>
-#include <inttypes.h>
-#include <locale.h>
-#include <math.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <linux/err.h>
-
-#include <linux/ctype.h>
-#include <perf/evlist.h>
-#include <internal/threadmap.h>
#ifdef HAVE_BPF_SKEL
#include "util/bpf_skel/bperf_cgroup.h"
@@ -123,6 +125,7 @@ static struct target target;
static volatile sig_atomic_t child_pid = -1;
static int detailed_run = 0;
static bool transaction_run;
+static bool use_perf_stat_print;
static bool topdown_run = false;
static bool smi_cost = false;
static bool smi_reset = false;
@@ -1091,7 +1094,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
if (quiet)
return;
- evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
+ if (use_perf_stat_print)
+ perf_stat__print(evsel_list, &stat_config, &target, ts, argc, argv);
+ else
+ evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
}
static volatile sig_atomic_t signr = -1;
@@ -2455,155 +2461,152 @@ int cmd_stat(int argc, const char **argv)
bool affinity = true, affinity_set = false;
struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
- "hardware transaction statistics"),
+ "hardware transaction statistics"),
OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
- "event selector. use 'perf list' to list available events",
- parse_events_option),
- OPT_CALLBACK(0, "filter", &evsel_list, "filter",
- "event filter", parse_filter),
+ "event selector. use 'perf list' to list available events",
+ parse_events_option),
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter", "event filter", parse_filter),
OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
- "child tasks do not inherit counters"),
- OPT_STRING('p', "pid", &target.pid, "pid",
- "stat events on existing process id"),
- OPT_STRING('t', "tid", &target.tid, "tid",
- "stat events on existing thread id"),
+ "child tasks do not inherit counters"),
+ OPT_STRING('p', "pid", &target.pid, "pid", "stat events on existing process id"),
+ OPT_STRING('t', "tid", &target.tid, "tid", "stat events on existing thread id"),
#ifdef HAVE_BPF_SKEL
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
- "stat events on existing bpf program id"),
- OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
- "use bpf program to count events"),
+ "stat events on existing bpf program id"),
+ OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, "use bpf program to count events"),
OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
- "path to perf_event_attr map"),
+ "path to perf_event_attr map"),
#endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
- "system-wide collection from all CPUs"),
+ "system-wide collection from all CPUs"),
OPT_BOOLEAN(0, "scale", &stat_config.scale,
- "Use --no-scale to disable counter scaling for multiplexing"),
+ "Use --no-scale to disable counter scaling for multiplexing"),
OPT_INCR('v', "verbose", &verbose,
- "be more verbose (show counter open errors, etc)"),
+ "be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &stat_config.run_count,
- "repeat command and print average + stddev (max: 100, forever: 0)"),
+ "repeat command and print average + stddev (max: 100, forever: 0)"),
OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
- "display details about each run (only with -r option)"),
+ "display details about each run (only with -r option)"),
OPT_BOOLEAN('n', "null", &stat_config.null_run,
- "null run - dont start any counters"),
- OPT_INCR('d', "detailed", &detailed_run,
- "detailed run - start a lot of events"),
- OPT_BOOLEAN('S', "sync", &sync_run,
- "call sync() before starting a run"),
+ "null run - dont start any counters"),
+ OPT_INCR('d', "detailed", &detailed_run, "detailed run - start a lot of events"),
+ OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"),
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
- "print large numbers with thousands\' separators",
- stat__set_big_num),
+ "print large numbers with thousands\' separators",
+ stat__set_big_num),
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
- "list of cpus to monitor in system-wide"),
+ "list of cpus to monitor in system-wide"),
OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr,
- "disable aggregation across CPUs or PMUs"),
+ "disable aggregation across CPUs or PMUs"),
OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr,
- "disable aggregation the same as -A or -no-aggr"),
+ "disable aggregation the same as -A or -no-aggr"),
OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
- "Merge identical named hybrid events"),
+ "Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
- "print counts with custom separator"),
+ "print counts with custom separator"),
OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
- "print counts in JSON format"),
+ "print counts in JSON format"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
- "monitor event in cgroup name only", parse_stat_cgroups),
+ "monitor event in cgroup name only", parse_stat_cgroups),
OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
- "expand events for each cgroup"),
+ "expand events for each cgroup"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
- OPT_INTEGER(0, "log-fd", &output_fd,
- "log output to fd, instead of stderr"),
+ OPT_INTEGER(0, "log-fd", &output_fd, "log output to fd, instead of stderr"),
OPT_STRING(0, "pre", &pre_cmd, "command",
- "command to run prior to the measured command"),
+ "command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command",
- "command to run after to the measured command"),
+ "command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
- "print counts at regular interval in ms "
- "(overhead is possible for values <= 100ms)"),
+ "print counts at regular interval in ms "
+ "(overhead is possible for values <= 100ms)"),
OPT_INTEGER(0, "interval-count", &stat_config.times,
- "print counts for fixed number of times"),
+ "print counts for fixed number of times"),
OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
- "clear screen in between new interval"),
- OPT_UINTEGER(0, "timeout", &stat_config.timeout,
+ "clear screen in between new interval"),
+ OPT_UINTEGER(
+ 0, "timeout", &stat_config.timeout,
"stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_BOOLEAN(0, "per-socket", &opt_mode.socket,
- "aggregate counts per processor socket"),
+ "aggregate counts per processor socket"),
OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster,
- "aggregate counts per processor cluster"),
- OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode.cache, &stat_config.aggr_level,
- "cache level", "aggregate count at this cache level (Default: LLC)",
- parse_cache_level),
+ "aggregate counts per processor cluster"),
+ OPT_CALLBACK_OPTARG(
+ 0, "per-cache", &opt_mode.cache, &stat_config.aggr_level, "cache level",
+ "aggregate count at this cache level (Default: LLC)", parse_cache_level),
OPT_BOOLEAN(0, "per-core", &opt_mode.core,
- "aggregate counts per physical processor core"),
+ "aggregate counts per physical processor core"),
OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
- OPT_INTEGER('D', "delay", &target.initial_delay,
+ OPT_INTEGER(
+ 'D', "delay", &target.initial_delay,
"ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
- "Only print computed metrics. No raw values", enable_metric_only),
+ "Only print computed metrics. No raw values",
+ enable_metric_only),
OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
- "don't group metric events, impacts multiplexing"),
+ "don't group metric events, impacts multiplexing"),
OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
- "don't try to share events between metrics in a group"),
+ "don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
- "disable adding events for the metric threshold calculation"),
- OPT_BOOLEAN(0, "topdown", &topdown_run,
- "measure top-down statistics"),
+ "disable adding events for the metric threshold calculation"),
+ OPT_BOOLEAN(0, "topdown", &topdown_run, "measure top-down statistics"),
#ifdef HAVE_ARCH_X86_64_SUPPORT
OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
- "enable recording for tpebs when retire_latency required"),
+ "enable recording for tpebs when retire_latency required"),
OPT_CALLBACK(0, "tpebs-mode", &tpebs_mode, "tpebs-mode",
- "Mode of TPEBS recording: mean, min or max",
- parse_tpebs_mode),
+ "Mode of TPEBS recording: mean, min or max", parse_tpebs_mode),
#endif
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
- "Set the metrics level for the top-down statistics (0: max level)"),
- OPT_BOOLEAN(0, "smi-cost", &smi_cost,
- "measure SMI cost"),
+ "Set the metrics level for the top-down statistics (0: max level)"),
+ OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost"),
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
- "monitor specified metrics or metric groups (separated by ,)",
- append_metric_groups),
+ "monitor specified metrics or metric groups (separated by ,)",
+ append_metric_groups),
OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
- "Configure all used events to run in kernel space.",
- PARSE_OPT_EXCLUSIVE),
+ "Configure all used events to run in kernel space.",
+ PARSE_OPT_EXCLUSIVE),
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
- "Configure all used events to run in user space.",
- PARSE_OPT_EXCLUSIVE),
+ "Configure all used events to run in user space.",
+ PARSE_OPT_EXCLUSIVE),
OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
- "Use with 'percore' event qualifier to show the event "
- "counts of one hardware thread by sum up total hardware "
- "threads of same physical core"),
- OPT_BOOLEAN(0, "summary", &stat_config.summary,
- "print summary for interval mode"),
+ "Use with 'percore' event qualifier to show the event "
+ "counts of one hardware thread by sum up total hardware "
+ "threads of same physical core"),
+ OPT_BOOLEAN(0, "new", &use_perf_stat_print,
+ "use new clean API code for display output"),
+ OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"),
OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
- "don't print 'summary' for CSV summary output"),
+ "don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &quiet,
- "don't print any output, messages or warnings (useful with record)"),
+ "don't print any output, messages or warnings (useful with record)"),
OPT_BOOLEAN_SET(0, "affinity", &affinity, &affinity_set,
- "enable (default) or disable affinity optimizations to reduce IPIs"),
+ "enable (default) or disable affinity optimizations to reduce IPIs"),
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
- "Only enable events on applying cpu with this type "
- "for hybrid platform (e.g. core or atom)",
- parse_cputype),
- OPT_CALLBACK(0, "pmu-filter", &evsel_list, "pmu",
+ "Only enable events on applying cpu with this type "
+ "for hybrid platform (e.g. core or atom)",
+ parse_cputype),
+ OPT_CALLBACK(
+ 0, "pmu-filter", &evsel_list, "pmu",
"Only enable events on applying pmu with specified "
"for multiple pmus with same type(e.g. hisi_sicl2_cpa0 or hisi_sicl0_cpa0)",
parse_pmu_filter),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
- "libpfm4 event selector. use 'perf list' to list available events",
- parse_libpfm_events_option),
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
#endif
- OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
+ OPT_CALLBACK(
+ 0, "control", &stat_config,
+ "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default",
- "measure I/O performance metrics provided by arch/platform",
- iostat_parse),
+ "measure I/O performance metrics provided by arch/platform",
+ iostat_parse),
OPT_END()
};
const char * const stat_usage[] = {
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 4bbc78b1f741..b03099e820d4 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -113,6 +113,10 @@ perf-util-y += counts.o
perf-util-y += stat.o
perf-util-y += stat-shadow.o
perf-util-y += stat-display.o
+perf-util-y += stat-print.o
+perf-util-y += stat-print-std.o
+perf-util-y += stat-print-csv.o
+perf-util-y += stat-print-json.o
perf-util-y += perf_api_probe.o
perf-util-y += record.o
perf-util-y += srcline.o
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 2b69d238858c..e5aed8d629e6 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -37,7 +37,7 @@
#define PID_LEN 7
#define CPUS_LEN 4
-static int aggr_header_lens[] = {
+const int aggr_header_lens[] = {
[AGGR_CORE] = 18,
[AGGR_CACHE] = 22,
[AGGR_CLUSTER] = 20,
@@ -49,7 +49,7 @@ static int aggr_header_lens[] = {
[AGGR_GLOBAL] = 0,
};
-static const char *aggr_header_csv[] = {
+const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,ctrs,",
[AGGR_CACHE] = "cache,ctrs,",
[AGGR_CLUSTER] = "cluster,ctrs,",
@@ -61,7 +61,7 @@ static const char *aggr_header_csv[] = {
[AGGR_GLOBAL] = ""
};
-static const char *aggr_header_std[] = {
+const char *aggr_header_std[] = {
[AGGR_CORE] = "core",
[AGGR_CACHE] = "cache",
[AGGR_CLUSTER] = "cluster",
@@ -580,13 +580,16 @@ static void print_metricgroup_header_std(struct perf_stat_config *config,
const char *metricgroup_name)
{
struct outstate *os = ctx;
+ int n;
if (!metricgroup_name) {
__new_line_std(config, os);
return;
}
- fprintf(config->output, " %*s", config->metric_only_len, metricgroup_name);
+ n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name);
+
+ fprintf(config->output, "%*s", MGROUP_LEN + config->unit_width + 2 - n, "");
}
static void print_metric_only(struct perf_stat_config *config,
@@ -596,20 +599,19 @@ static void print_metric_only(struct perf_stat_config *config,
struct outstate *os = ctx;
FILE *out = os->fh;
char str[1024];
- unsigned mlen;
+ unsigned mlen = config->metric_only_len;
const char *color = metric_threshold_classify__color(thresh);
- int olen;
- if (!unit) {
- os->first = false;
- return;
- }
+ if (!unit)
+ unit = "";
+ if (mlen < strlen(unit))
+ mlen = strlen(unit) + 1;
- mlen = max_t(unsigned, strlen(unit), config->metric_only_len);
+ if (color)
+ mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
- olen = snprintf(str, sizeof(str), fmt ?: "", val);
color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val);
- fprintf(out, "%*s%s", max_t(int, mlen - olen, 1), "", str);
+ fprintf(out, "%*s ", mlen, str);
os->first = false;
}
diff --git a/tools/perf/util/stat-print-csv.c b/tools/perf/util/stat-print-csv.c
new file mode 100644
index 000000000000..e9d1e7c30c90
--- /dev/null
+++ b/tools/perf/util/stat-print-csv.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "stat-print.h"
+#include <linux/compiler.h>
+
+int perf_stat__print_csv(struct evlist *evlist __maybe_unused,
+ const struct perf_stat_config *config __maybe_unused,
+ const struct target *target __maybe_unused,
+ const struct timespec *ts __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/stat-print-json.c b/tools/perf/util/stat-print-json.c
new file mode 100644
index 000000000000..72df7a94095d
--- /dev/null
+++ b/tools/perf/util/stat-print-json.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "stat-print.h"
+#include <linux/compiler.h>
+
+int perf_stat__print_json(struct evlist *evlist __maybe_unused,
+ const struct perf_stat_config *config __maybe_unused,
+ const struct target *target __maybe_unused,
+ const struct timespec *ts __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/stat-print-std.c b/tools/perf/util/stat-print-std.c
new file mode 100644
index 000000000000..83987e97c889
--- /dev/null
+++ b/tools/perf/util/stat-print-std.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "stat-print.h"
+#include <linux/compiler.h>
+
+int perf_stat__print_std(struct evlist *evlist __maybe_unused,
+ const struct perf_stat_config *config __maybe_unused,
+ const struct target *target __maybe_unused,
+ const struct timespec *ts __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/stat-print.c b/tools/perf/util/stat-print.c
new file mode 100644
index 000000000000..92ff9f1fe31c
--- /dev/null
+++ b/tools/perf/util/stat-print.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "stat-print.h"
+
+#include <errno.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/compiler.h>
+
+#include "cpumap.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "expr.h"
+#include "metricgroup.h"
+#include "stat.h"
+#include "thread_map.h"
+#include "tool_pmu.h"
+
+/*
+ * Unified Aggregation Helpers (Shared by STD, CSV, JSON Formats)
+ */
+
+const char *perf_stat__get_aggr_key(const struct perf_stat_config *config,
+ const struct evsel *evsel)
+{
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ return "core";
+ case AGGR_CACHE:
+ return "cache";
+ case AGGR_CLUSTER:
+ return "cluster";
+ case AGGR_DIE:
+ return "die";
+ case AGGR_SOCKET:
+ return "socket";
+ case AGGR_NODE:
+ return "node";
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread)
+ return "core";
+ return "cpu";
+ case AGGR_THREAD:
+ return "thread";
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ return "";
+ }
+}
+
+int perf_stat__get_aggr_id_char(const struct perf_stat_config *config, struct evsel *evsel,
+ struct aggr_cpu_id id, char *buf, size_t buf_size)
+{
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ return scnprintf(buf, buf_size, "S%d-D%d-C%d", id.socket, id.die, id.core);
+ case AGGR_CACHE:
+ return scnprintf(buf, buf_size, "S%d-D%d-L%d-ID%d", id.socket, id.die, id.cache_lvl,
+ id.cache);
+ case AGGR_CLUSTER:
+ return scnprintf(buf, buf_size, "S%d-D%d-CLS%d", id.socket, id.die, id.cluster);
+ case AGGR_DIE:
+ return scnprintf(buf, buf_size, "S%d-D%d", id.socket, id.die);
+ case AGGR_SOCKET:
+ return scnprintf(buf, buf_size, "S%d", id.socket);
+ case AGGR_NODE:
+ return scnprintf(buf, buf_size, "N%d", id.node);
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ return scnprintf(buf, buf_size, "S%d-D%d-C%d", id.socket, id.die, id.core);
+ } else if (id.cpu.cpu > -1) {
+ return scnprintf(buf, buf_size, "%d", id.cpu.cpu);
+ }
+ break;
+ case AGGR_THREAD:
+ return scnprintf(buf, buf_size, "%s-%d",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+ buf[0] = '\0';
+ return -1;
+}
+
+/*
+ * Traversal Driver and Calculation Code
+ */
+
+/**
+ * tool_pmu__is_time_event - Check if event is a tool PMU time event.
+ *
+ * Copied from stat-shadow.c to make stat-print.c self-contained.
+ */
+static bool tool_pmu__is_time_event(const struct perf_stat_config *config,
+ const struct evsel *evsel, int *tool_aggr_idx)
+{
+ enum tool_pmu_event event = evsel__tool_event(evsel);
+ int aggr_idx;
+
+ if (event != TOOL_PMU__EVENT_DURATION_TIME && event != TOOL_PMU__EVENT_USER_TIME &&
+ event != TOOL_PMU__EVENT_SYSTEM_TIME)
+ return false;
+
+ if (config) {
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ if (config->aggr_map->map[aggr_idx].cpu.cpu == 0) {
+ *tool_aggr_idx = aggr_idx;
+ return true;
+ }
+ }
+ pr_debug("Unexpected CPU0 missing in aggregation for tool event.\n");
+ }
+ *tool_aggr_idx = 0; /* Assume the first aggregation index works. */
+ return true;
+}
+
+/**
+ * prepare_metric - Collect event values required for a metric.
+ * @config: Perf stat configuration.
+ * @mexp: The metric expression.
+ * @evsel: The associated event selector.
+ * @pctx: Expr parse context to add ID/values to.
+ * @aggr_idx: Aggregation index to read values from.
+ *
+ * Iterates over the events required for the metric expression, reads their
+ * counts for the given aggregation index, and adds them to the expression
+ * parser context.
+ *
+ * Copied and refactored from stat-shadow.c.
+ */
+static int prepare_metric(const struct perf_stat_config *config, const struct metric_expr *mexp,
+ struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx)
+{
+ struct evsel *const *metric_events = mexp->metric_events;
+ struct metric_ref *metric_refs = mexp->metric_refs;
+ int i;
+
+ for (i = 0; metric_events[i]; i++) {
+ int source_count = 0, tool_aggr_idx;
+ bool is_tool_time =
+ tool_pmu__is_time_event(config, metric_events[i], &tool_aggr_idx);
+ struct perf_stat_evsel *ps = metric_events[i]->stats;
+ char *n;
+ double val;
+
+ /*
+ * If there are multiple uncore PMUs and we're not reading the
+ * leader's stats, determine the stats for the appropriate
+ * uncore PMU.
+ */
+ if (evsel && evsel->metric_leader && evsel->pmu != evsel->metric_leader->pmu &&
+ mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos->pmu != evsel->pmu)
+ continue;
+ if (pos->metric_leader != mexp->metric_events[i])
+ continue;
+ ps = pos->stats;
+ source_count = 1;
+ break;
+ }
+ }
+ /* Time events are always on CPU0, the first aggregation index. */
+ if (!ps || !metric_events[i]->supported) {
+ val = NAN;
+ source_count = 0;
+ } else {
+ struct perf_stat_aggr *aggr =
+ &ps->aggr[is_tool_time ? tool_aggr_idx : aggr_idx];
+
+ if (aggr->counts.run == 0) {
+ val = NAN;
+ source_count = 0;
+ } else {
+ val = aggr->counts.val;
+ if (is_tool_time) {
+ /* Convert time event nanoseconds to seconds. */
+ val *= 1e-9;
+ }
+ if (!source_count)
+ source_count = evsel__source_count(metric_events[i]);
+ }
+ }
+ n = strdup(evsel__metric_id(metric_events[i]));
+ if (!n)
+ return -ENOMEM;
+
+ expr__add_id_val_source_count(pctx, n, val, source_count);
+ }
+
+ for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+ int ret = expr__add_ref(pctx, &metric_refs[j]);
+
+ if (ret)
+ return ret;
+ }
+
+ return i;
+}
+
+/**
+ * calculate_and_print_metric - Compute and print a single metric.
+ *
+ * Parses the metric expression, computes the ratio, and calls the print_metric
+ * callback directly with clean parameters.
+ * Returns the return value of the print_metric callback (0 on success, or error).
+ */
+static int calculate_and_print_metric(const struct perf_stat_config *config,
+ const struct perf_stat_print_callbacks *cb, void *outer_ctx,
+ struct metric_expr *mexp, struct evsel *evsel, int aggr_idx)
+{
+ const char *metric_name = mexp->metric_name;
+ const char *metric_expr = mexp->metric_expr;
+ const char *metric_threshold = mexp->metric_threshold;
+ const char *metric_unit = mexp->metric_unit;
+ struct evsel *const *metric_events = mexp->metric_events;
+ int runtime = mexp->runtime;
+ struct expr_parse_ctx *pctx;
+ double ratio, scale, threshold;
+ int i;
+ enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN;
+ int ret = 0;
+
+ if (!cb->print_metric)
+ return 0;
+
+ pctx = expr__ctx_new();
+ if (!pctx)
+ return -ENOMEM;
+
+ if (config->user_requested_cpu_list)
+ pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
+ pctx->sctx.runtime = runtime;
+ pctx->sctx.system_wide = config->system_wide;
+ i = prepare_metric(config, mexp, evsel, pctx, aggr_idx);
+ if (i < 0) {
+ expr__ctx_free(pctx);
+ return i;
+ }
+ if (!metric_events[i]) {
+ if (expr__parse(&ratio, pctx, metric_expr) == 0) {
+ char *unit;
+
+ if (metric_threshold &&
+ expr__parse(&threshold, pctx, metric_threshold) == 0 &&
+ !isnan(threshold)) {
+ thresh = fpclassify(threshold) == FP_ZERO ? METRIC_THRESHOLD_GOOD :
+ METRIC_THRESHOLD_BAD;
+ }
+
+ if (metric_unit && metric_name) {
+ if (perf_pmu__convert_scale(metric_unit, &unit, &scale) >= 0) {
+ ratio *= scale;
+ }
+ ret = cb->print_metric(outer_ctx, config, evsel, aggr_idx,
+ metric_name, unit, ratio, thresh);
+ } else {
+ ret = cb->print_metric(outer_ctx, config, evsel, aggr_idx,
+ metric_name ?: (evsel->name ?: ""), NULL,
+ ratio, thresh);
+ }
+ }
+ }
+
+ expr__ctx_free(pctx);
+ return ret;
+}
+
+/**
+ * perf_stat_print_metricgroup - Traverse metrics for an event.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+static bool is_basic_shadow_metric(const char *name)
+{
+ static const char *const basic_metrics[] = {
+ "insn_per_cycle", "branch_miss_rate", "branch_frequency",
+ "cycles_frequency", "page_faults_per_second", "migrations_per_second",
+ "cs_per_second", "CPUs_utilized",
+ };
+ for (size_t i = 0; i < ARRAY_SIZE(basic_metrics); i++) {
+ if (!strcmp(basic_metrics[i], name))
+ return true;
+ }
+ return false;
+}
+
+static int perf_stat_print_metricgroup(const struct perf_stat_config *config,
+ const struct perf_stat_print_callbacks *cb, void *outer_ctx,
+ struct evsel *evsel, int aggr_idx)
+{
+ struct metric_event *me;
+ struct metric_expr *mexp;
+ struct rblist *metric_events = &evsel->evlist->metric_events;
+ int ret;
+
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me == NULL)
+ return 0;
+
+ list_for_each_entry(mexp, &me->head, nd) {
+ if (!config->metric_only &&
+ (!evsel->default_metricgroup || evsel->default_show_events)) {
+ if (!is_basic_shadow_metric(mexp->metric_name))
+ continue;
+ }
+
+ ret = calculate_and_print_metric(config, cb, outer_ctx, mexp, evsel, aggr_idx);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * perf_stat_print_metrics - Entry point for metric calculation & printing.
+ *
+ * Returns 0 on success, or a negative error code on failure.
+ */
+static int perf_stat_print_metrics(const struct perf_stat_config *config,
+ const struct perf_stat_print_callbacks *cb, void *outer_ctx,
+ struct evsel *evsel, int aggr_idx)
+{
+ if (config->iostat_run) {
+ /* IOSTAT metrics not supported yet in new API */
+ return 0;
+ }
+
+ return perf_stat_print_metricgroup(config, cb, outer_ctx, evsel, aggr_idx);
+}
+
+int perf_stat__print_cb(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target __maybe_unused,
+ const struct timespec *ts __maybe_unused, int argc __maybe_unused,
+ const char **argv __maybe_unused,
+ const struct perf_stat_print_callbacks *cb, void *ctx)
+{
+ struct evsel *counter;
+ int aggr_idx;
+ int ret = 0;
+
+ evlist__uniquify_evsel_names(evlist, config);
+
+ if (cb->print_start) {
+ ret = cb->print_start(ctx, config);
+ if (ret)
+ return ret;
+ }
+
+ switch (config->aggr_mode) {
+ case AGGR_GLOBAL:
+ case AGGR_NONE:
+ case AGGR_SOCKET:
+ case AGGR_DIE:
+ case AGGR_CLUSTER:
+ case AGGR_CACHE:
+ case AGGR_CORE:
+ case AGGR_THREAD:
+ case AGGR_NODE:
+ if (config->aggr_map) {
+ cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+ evlist__for_each_entry(evlist, counter) {
+ struct perf_stat_evsel *ps = counter->stats;
+ u64 val = 0, ena = 0, run = 0;
+
+ if (ps && ps->aggr) {
+ val = ps->aggr[aggr_idx].counts.val;
+ ena = ps->aggr[aggr_idx].counts.ena;
+ run = ps->aggr[aggr_idx].counts.run;
+ }
+
+ /* Skip already merged uncore/hybrid events */
+ if (config->aggr_mode != AGGR_NONE) {
+ if (evsel__is_hybrid(counter)) {
+ if (config->hybrid_merge &&
+ counter->first_wildcard_match != NULL)
+ continue;
+ } else {
+ if (counter->first_wildcard_match != NULL)
+ continue;
+ }
+ }
+
+ if (perf_stat__skip_metric_event(counter))
+ continue;
+
+ if (cb->print_event) {
+ double stdev_pct = 0.0;
+ if (ps && ps->res_stats.n > 1) {
+ stdev_pct = rel_stddev_stats(
+ stddev_stats(&ps->res_stats), val);
+ }
+ ret = cb->print_event(ctx, config, counter,
+ aggr_idx, val, ena, run,
+ stdev_pct);
+ if (ret)
+ goto out;
+ }
+
+ ret = perf_stat_print_metrics(config, cb, ctx, counter,
+ aggr_idx);
+ if (ret)
+ goto out;
+ }
+ }
+ } else {
+ evlist__for_each_entry(evlist, counter) {
+ struct perf_stat_evsel *ps = counter->stats;
+ u64 val = 0, ena = 0, run = 0;
+
+ if (ps && ps->aggr) {
+ val = ps->aggr[0].counts.val;
+ ena = ps->aggr[0].counts.ena;
+ run = ps->aggr[0].counts.run;
+ }
+
+ /* Skip already merged uncore/hybrid events */
+ if (config->aggr_mode != AGGR_NONE) {
+ if (evsel__is_hybrid(counter)) {
+ if (config->hybrid_merge &&
+ counter->first_wildcard_match != NULL)
+ continue;
+ } else {
+ if (counter->first_wildcard_match != NULL)
+ continue;
+ }
+ }
+
+ if (perf_stat__skip_metric_event(counter))
+ continue;
+
+ if (cb->print_event) {
+ double stdev_pct = 0.0;
+ if (ps && ps->res_stats.n > 1) {
+ stdev_pct = rel_stddev_stats(
+ stddev_stats(&ps->res_stats), val);
+ }
+ ret = cb->print_event(ctx, config, counter, 0, val, ena,
+ run, stdev_pct);
+ if (ret)
+ goto out;
+ }
+
+ ret = perf_stat_print_metrics(config, cb, ctx, counter, 0);
+ if (ret)
+ goto out;
+ }
+ }
+ break;
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ fprintf(config->output, "Aggregation mode %d not supported in new API yet\n",
+ config->aggr_mode);
+ break;
+ }
+
+out:
+ if (cb->print_end) {
+ int err = cb->print_end(ctx, config);
+ if (!ret)
+ ret = err;
+ }
+
+ return ret;
+}
+
+int perf_stat__print(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv)
+{
+ if (config->csv_output) {
+ return perf_stat__print_csv(evlist, config, target, ts, argc, argv);
+ } else if (config->json_output) {
+ return perf_stat__print_json(evlist, config, target, ts, argc, argv);
+ } else {
+ return perf_stat__print_std(evlist, config, target, ts, argc, argv);
+ }
+}
diff --git a/tools/perf/util/stat-print.h b/tools/perf/util/stat-print.h
new file mode 100644
index 000000000000..a86414f32584
--- /dev/null
+++ b/tools/perf/util/stat-print.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STAT_PRINT_H
+#define __PERF_STAT_PRINT_H
+
+#include <linux/types.h>
+
+#include "stat.h"
+
+#define CNTR_NOT_SUPPORTED "<not supported>"
+#define CNTR_NOT_COUNTED "<not counted>"
+
+struct evlist;
+struct perf_stat_config;
+struct target;
+struct timespec;
+struct evsel;
+struct aggr_cpu_id;
+
+extern const int aggr_header_lens[];
+extern const char *aggr_header_csv[];
+extern const char *aggr_header_std[];
+
+/**
+ * struct perf_stat_print_callbacks - Callbacks for rendering perf stat output.
+ *
+ * This structure defines the interface for different output formats (e.g.,
+ * Standard, CSV, JSON) to render the collected performance counter statistics.
+ * The core display logic traverses the events and metrics and calls these
+ * callbacks in a streaming fashion, which build an in-memory DOM tree. The
+ * final rendering and output formatting is executed entirely in print_end.
+ */
+struct perf_stat_print_callbacks {
+ /**
+ * print_start - Called before any event or metric is printed.
+ * @ctx: Opaque context pointer passed to the print function.
+ * @config: Perf stat configuration.
+ */
+ int (*print_start)(void *ctx, const struct perf_stat_config *config);
+
+ /**
+ * print_end - Called after all events and metrics have been traversed.
+ * Executes the actual formatting and printing of the buffered tree.
+ * @ctx: Opaque context pointer.
+ * @config: Perf stat configuration.
+ */
+ int (*print_end)(void *ctx, const struct perf_stat_config *config);
+
+ /**
+ * print_event - Called to buffer an event (counter) value.
+ * @ctx: Opaque context pointer.
+ * @config: Perf stat configuration.
+ * @evsel: The event selector being printed (mutable for lazy initialization).
+ * @aggr_idx: Aggregation index in evsel->stats.
+ * @val: Raw counter value.
+ * @ena: Enabled time for the counter (for multiplexing).
+ * @run: Running time for the counter (for multiplexing).
+ * @stdev_pct: Standard deviation percentage across multiple repeated runs.
+ *
+ * Returns 0 on success, or a negative error code (e.g., -ENOMEM) on failure.
+ */
+ int (*print_event)(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
+ int aggr_idx, u64 val, u64 ena, u64 run, double stdev_pct);
+
+ /**
+ * print_metric - Called to buffer a metric value associated with an event.
+ * @ctx: Opaque context pointer.
+ * @config: Perf stat configuration.
+ * @evsel: The event selector associated with the metric (mutable).
+ * @aggr_idx: Aggregation index.
+ * @name: The display name of the metric.
+ * @unit: The unit of the metric (e.g., "%", "GHz", or NULL).
+ * @val: The calculated metric value.
+ * @thresh: Threshold classification (e.g., good, bad) for color coding.
+ *
+ * Returns 0 on success, or a negative error code (e.g., -ENOMEM) on failure.
+ */
+ int (*print_metric)(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
+ int aggr_idx, const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh);
+};
+
+/**
+ * perf_stat__get_aggr_key - Get the JSON key name for an aggregation mode.
+ */
+const char *perf_stat__get_aggr_key(const struct perf_stat_config *config,
+ const struct evsel *evsel);
+
+/**
+ * perf_stat__get_aggr_id_char - Get the unified aggregation ID string.
+ *
+ * Returns the formatted string size, or a negative error code on failure.
+ */
+int perf_stat__get_aggr_id_char(const struct perf_stat_config *config, struct evsel *evsel,
+ struct aggr_cpu_id id, char *buf, size_t buf_size);
+
+/**
+ * perf_stat__print_cb - Drive the traversal and call callbacks.
+ *
+ * Defined in stat-print.c. Called by format-specific entry points.
+ * Returns 0 on success, or a negative error code on failure.
+ */
+int perf_stat__print_cb(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv, const struct perf_stat_print_callbacks *cb, void *ctx);
+
+/**
+ * perf_stat__print - Entry point for the decoupled print API.
+ *
+ * Defined in stat-print.c. Dispatches to format-specific entry points.
+ * Returns 0 on success, or a negative error code on failure.
+ */
+int perf_stat__print(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv);
+
+/*
+ * Format-specific entry points, implemented in their respective files.
+ * All return 0 on success, or a negative error code on failure.
+ */
+
+int perf_stat__print_std(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv);
+
+int perf_stat__print_csv(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv);
+
+int perf_stat__print_json(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv);
+
+#endif /* __PERF_STAT_PRINT_H */
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 4bced233d2fc..77873d51786c 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -106,6 +106,8 @@ struct perf_stat_config {
bool ctl_fd_close;
const char *cgroup_list;
unsigned int topdown_level;
+ bool headers_printed;
+ bool metric_only_headers_printed;
};
extern struct perf_stat_config stat_config;
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs
2026-05-25 23:18 ` [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs Ian Rogers
@ 2026-05-25 23:38 ` Arnaldo Carvalho de Melo
2026-05-25 23:48 ` Ian Rogers
0 siblings, 1 reply; 46+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-05-25 23:38 UTC (permalink / raw)
To: Ian Rogers
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 04:18:47PM -0700, Ian Rogers wrote:
> This patch introduces the initial infrastructure for decoupling the
> perf stat printing API. It declares the struct perf_stat_print_callbacks
> interface and the core traversal driver perf_stat__print_cb() inside
> the newly created util/stat-print.h and util/stat-print.c files.
Can we avoid these kinds of reflowings:
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
- "Set the metrics level for the top-down statistics (0: max level)"),
- OPT_BOOLEAN(0, "smi-cost", &smi_cost,
- "measure SMI cost"),
+ "Set the metrics level for the top-down statistics (0: max level)"),
+ OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost")
There is a lot to process and these things gets in the way.
I'm trying to make sure this doesnt' happen in my patches with or
without AI assistance :-)
We need to have this as a skill so that AI don't do this.
Thanks,
- Arnaldo
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs
2026-05-25 23:38 ` Arnaldo Carvalho de Melo
@ 2026-05-25 23:48 ` Ian Rogers
2026-05-26 0:20 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:48 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 4:38 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Mon, May 25, 2026 at 04:18:47PM -0700, Ian Rogers wrote:
> > This patch introduces the initial infrastructure for decoupling the
> > perf stat printing API. It declares the struct perf_stat_print_callbacks
> > interface and the core traversal driver perf_stat__print_cb() inside
> > the newly created util/stat-print.h and util/stat-print.c files.
>
> Can we avoid these kinds of reflowings:
>
> OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
> - "Set the metrics level for the top-down statistics (0: max level)"),
> - OPT_BOOLEAN(0, "smi-cost", &smi_cost,
> - "measure SMI cost"),
> + "Set the metrics level for the top-down statistics (0: max level)"),
> + OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost")
>
> There is a lot to process and these things gets in the way.
>
> I'm trying to make sure this doesnt' happen in my patches with or
> without AI assistance :-)
>
> We need to have this as a skill so that AI don't do this.
Actually, it was primarily from running "git clang-format" to clean up
the #includes, but also for the layout of the AI aided stuff. git
clang-format likes to think that if you edited a function then tidying
up any style issues within it is fair game. I can resend with a fix.
Thanks,
Ian
> Thanks,
>
> - Arnaldo
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs
2026-05-25 23:48 ` Ian Rogers
@ 2026-05-26 0:20 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 46+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-05-26 0:20 UTC (permalink / raw)
To: Ian Rogers
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 04:48:57PM -0700, Ian Rogers wrote:
> On Mon, May 25, 2026 at 4:38 PM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > On Mon, May 25, 2026 at 04:18:47PM -0700, Ian Rogers wrote:
> > > This patch introduces the initial infrastructure for decoupling the
> > > perf stat printing API. It declares the struct perf_stat_print_callbacks
> > > interface and the core traversal driver perf_stat__print_cb() inside
> > > the newly created util/stat-print.h and util/stat-print.c files.
> >
> > Can we avoid these kinds of reflowings:
> >
> > OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
> > - "Set the metrics level for the top-down statistics (0: max level)"),
> > - OPT_BOOLEAN(0, "smi-cost", &smi_cost,
> > - "measure SMI cost"),
> > + "Set the metrics level for the top-down statistics (0: max level)"),
> > + OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost")
> >
> > There is a lot to process and these things gets in the way.
> >
> > I'm trying to make sure this doesnt' happen in my patches with or
> > without AI assistance :-)
> >
> > We need to have this as a skill so that AI don't do this.
>
> Actually, it was primarily from running "git clang-format" to clean up
> the #includes, but also for the layout of the AI aided stuff. git
> clang-format likes to think that if you edited a function then tidying
> up any style issues within it is fair game. I can resend with a fix.
If the patch is just about reflowing to standardize on some format, then
we expect that to be what the patch is about, but then it still doesn't
add value at all, its just something we have to look at to see if it
isn't introducing a problem even when obviously it probably will not add
any.
- Arnaldo
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:49 ` Arnaldo Carvalho de Melo
2026-05-25 23:53 ` sashiko-bot
2026-05-25 23:18 ` [RFC PATCH v2 03/14] perf stat: Extend STD output linter to test basic New API checks Ian Rogers
` (12 subsequent siblings)
14 siblings, 2 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch implements standard console formatting callbacks inside
util/stat-print-std.c, replacing the empty stubs introduced in Commit 1.
Introduces the format-private `struct queued_event` and `struct queued_metric`
DOM nodes to buffer traversal streams, and fully encapsulates DOM state
initialization and queue cleanups inside std_print_start() and std_print_end().
Utilizes the newly centralized unified aggregation helpers to resolve CPU and
thread prefixes cleanly, and incorporates full interval-mode timestamp
printing support across all rows.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/util/stat-print-std.c | 776 ++++++++++++++++++++++++++++++-
1 file changed, 768 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/stat-print-std.c b/tools/perf/util/stat-print-std.c
index 83987e97c889..aa4a083bb85a 100644
--- a/tools/perf/util/stat-print-std.c
+++ b/tools/perf/util/stat-print-std.c
@@ -1,13 +1,773 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "stat-print.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include "color.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "metricgroup.h"
+#include "stat-print.h"
+#include "stat.h"
+#include "target.h"
+#include "thread_map.h"
+#include "tool_pmu.h"
+
+#define COUNTS_LEN 18
+#define EVNAME_LEN 32
+#define COMM_LEN 16
+#define PID_LEN 7
+#define MGROUP_LEN 50
+#define METRIC_LEN 38
+
+
+
+/**
+ * struct queued_metric - In-memory record of a buffered metric.
+ * @list: Linked list node for queueing.
+ * @name: The display name of the metric.
+ * @unit: The metric's unit (e.g., "%", "GHz", or NULL).
+ * @val: The calculated ratio/metric value.
+ * @thresh: Threshold classification for color coding.
+ * @aggr_idx: Aggregation index in evsel stats.
+ */
+struct queued_metric {
+ struct list_head list;
+ char *name;
+ char *unit;
+ double val;
+ enum metric_threshold_classify thresh;
+ int aggr_idx;
+};
+
+/**
+ * struct queued_event - In-memory record of a buffered counter event.
+ * @list: Linked list node for queueing.
+ * @evsel: The associated performance event selector.
+ * @name: The uniquely formatted/resolved event name.
+ * @val: Raw aggregated counter value.
+ * @ena: Enabled time for multiplexing percentage.
+ * @run: Running time for multiplexing percentage.
+ * @stdev_pct: Standard deviation percentage across repeated runs.
+ * @aggr_idx: Aggregation index.
+ * @is_metricgroup: Whether this represents a unified metricgroup header.
+ * @metrics_list: Linked list head containing nested queued_metric structures.
+ */
+struct queued_event {
+ struct list_head list;
+ struct evsel *evsel;
+ char *name;
+ u64 val, ena, run;
+ double stdev_pct;
+ int aggr_idx;
+ bool is_metricgroup;
+ struct list_head metrics_list;
+};
+
+/**
+ * struct std_print_state - Print state context for Standard console output.
+ * @fp: File descriptor to output to.
+ * @timestamp: Formatted interval timestamp (optional).
+ * @events_list: Linked list head containing queued_event nodes.
+ * @current_event: Pointer to the currently active event being printed.
+ * Serves as a temporary bridge to associate streaming metrics back to
+ * their parent event node during list buffering. This relies on a
+ * strict temporal coupling in the traversal driver: the driver always
+ * invokes print_metric() callbacks for a counter synchronously and
+ * immediately after its print_event() callback, prior to advancing
+ * to the next event or aggregation node. This pointer is completely
+ * private to standard printing, keeping the traversal driver decoupled
+ * and preserving strict encapsulation.
+ * @target: target query parameters for header printout.
+ * @argc: Command argument count.
+ * @argv: Command argument values.
+ */
+struct std_print_state {
+ FILE *fp;
+ char timestamp[64];
+ struct list_head events_list;
+ struct queued_event *current_event;
+ const struct target *target;
+ int argc;
+ const char **argv;
+};
+
+/**
+ * struct std_metric_only_print_state - Metric-only print state context for Standard console output.
+ * @fp: File descriptor to output to.
+ * @queued_metrics: Linked list head containing queued_metric nodes.
+ * @timestamp: Formatted interval timestamp (optional).
+ * @target: target query parameters.
+ * @argc: Command argument count.
+ * @argv: Command argument values.
+ */
+struct std_metric_only_print_state {
+ FILE *fp;
+ struct list_head queued_metrics;
+ char timestamp[64];
+ const struct target *target;
+ int argc;
+ const char **argv;
+ struct evlist *evlist;
+};
+
+/**
+ * print_aggr_id_std - Print the aggregation prefix for STD format.
+ *
+ * Uses the unified perf_stat__get_aggr_id_char helper to format the base
+ * aggregation string, and pads it dynamically using aggr_header_lens.
+ */
+static void print_aggr_id_std(const struct perf_stat_config *config, FILE *output,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ char buf[128];
+
+ if (perf_stat__get_aggr_id_char(config, evsel, id, buf, sizeof(buf)) < 0)
+ return;
+
+ if (config->aggr_mode == AGGR_NONE) {
+ if (evsel->percore && !config->percore_show_thread) {
+ fprintf(output, "%-*s ", aggr_header_lens[AGGR_CORE], buf);
+ } else if (id.cpu.cpu > -1) {
+ /* For CPU none mode, prepend "CPU" during console print */
+ char cpu_buf[160];
+ snprintf(cpu_buf, sizeof(cpu_buf), "CPU%s", buf);
+ fprintf(output, "%-*s ", aggr_header_lens[AGGR_NONE], cpu_buf);
+ }
+ return;
+ }
+
+ if (config->aggr_mode == AGGR_THREAD) {
+ fprintf(output, "%-*s ", aggr_header_lens[AGGR_THREAD], buf);
+ return;
+ }
+
+ /* Socket/Die/Node/Cache/Cluster modes print base ID and aggr count */
+ fprintf(output, "%-s %*d ", buf, 4, aggr_nr);
+}
+
+/**
+ * should_skip_zero_counter - Check if a zero-valued counter should be skipped.
+ *
+ * Implemented locally for standard console formatting.
+ */
+static bool should_skip_zero_counter(const struct perf_stat_config *config, struct evsel *counter,
+ int aggr_idx)
+{
+ struct perf_cpu cpu;
+ unsigned int idx;
+ struct aggr_cpu_id id;
+
+ if (verbose == 0 && counter->skippable && !counter->supported)
+ return true;
+
+ if (config->metric_only)
+ return false;
+
+ if (config->aggr_mode == AGGR_THREAD && config->system_wide)
+ return true;
+
+ if (aggr_idx < 0 || !config->aggr_map || !config->aggr_get_id)
+ return false;
+
+ id = config->aggr_map->map[aggr_idx];
+
+ if (evsel__is_tool(counter)) {
+ struct aggr_cpu_id own_id = config->aggr_get_id((struct perf_stat_config *)config,
+ (struct perf_cpu){ .cpu = 0 });
+
+ return !aggr_cpu_id__equal(&id, &own_id);
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) {
+ struct aggr_cpu_id own_id =
+ config->aggr_get_id((struct perf_stat_config *)config, cpu);
+
+ if (aggr_cpu_id__equal(&id, &own_id))
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Standard (STD) Output Callbacks - Normal Mode
+ */
+
+static int std_print_start(void *ctx, const struct perf_stat_config *config __maybe_unused)
+{
+ struct std_print_state *ps = ctx;
-int perf_stat__print_std(struct evlist *evlist __maybe_unused,
- const struct perf_stat_config *config __maybe_unused,
- const struct target *target __maybe_unused,
- const struct timespec *ts __maybe_unused,
- int argc __maybe_unused,
- const char **argv __maybe_unused)
+ INIT_LIST_HEAD(&ps->events_list);
+ ps->current_event = NULL;
+ return 0;
+}
+
+static int std_print_event(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
+ int aggr_idx, u64 val, u64 ena, u64 run, double stdev_pct)
{
+ struct std_print_state *ps = ctx;
+ struct queued_event *ev;
+
+ /* Skip zero counters locally in STD callbacks if they qualify */
+ if (val == 0 && should_skip_zero_counter(config, evsel, aggr_idx)) {
+ ps->current_event = NULL;
+ return 0;
+ }
+
+ ev = malloc(sizeof(*ev));
+ if (!ev)
+ return -ENOMEM;
+
+ ev->name = strdup(evsel__name(evsel));
+ if (!ev->name) {
+ free(ev);
+ return -ENOMEM;
+ }
+
+ ev->evsel = evsel;
+ ev->val = val;
+ ev->ena = ena;
+ ev->run = run;
+ ev->stdev_pct = stdev_pct;
+ ev->aggr_idx = aggr_idx;
+ INIT_LIST_HEAD(&ev->metrics_list);
+
+ list_add_tail(&ev->list, &ps->events_list);
+ ps->current_event = ev;
+
return 0;
}
+
+static int std_print_metric(void *ctx, const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused, int aggr_idx __maybe_unused,
+ const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh)
+{
+ struct std_print_state *ps = ctx;
+ struct queued_metric *b;
+
+ if (!ps->current_event)
+ return 0;
+
+ if (evsel != ps->current_event->evsel) {
+ pr_err("decoupled print engine: temporal coupling violation: evsel mismatch!\n");
+ return -EINVAL;
+ }
+
+ b = malloc(sizeof(*b));
+ if (!b)
+ return -ENOMEM;
+
+ b->name = strdup(name);
+ if (!b->name) {
+ free(b);
+ return -ENOMEM;
+ }
+
+ if (unit && unit[0]) {
+ b->unit = strdup(unit);
+ if (!b->unit) {
+ free(b->name);
+ free(b);
+ return -ENOMEM;
+ }
+ } else {
+ b->unit = NULL;
+ }
+
+ b->val = val;
+ b->thresh = thresh;
+ list_add_tail(&b->list, &ps->current_event->metrics_list);
+
+ return 0;
+}
+
+#define USEC_PER_SEC 1000000ULL
+#define NSEC_PER_SEC 1000000000ULL
+
+static double timeval2double(struct timeval *t)
+{
+ return t->tv_sec + (double)t->tv_usec / USEC_PER_SEC;
+}
+
+static void print_footer_std(const struct perf_stat_config *config)
+{
+ double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+ FILE *output = config->output;
+
+ if (config->interval)
+ return;
+
+ if (!config->null_run)
+ fprintf(output, "\n");
+
+ if (config->run_count == 1) {
+ fprintf(output, " %17.9f seconds time elapsed", avg);
+
+ if (config->ru_display) {
+ double ru_utime =
+ timeval2double((struct timeval *)&config->ru_data.ru_utime);
+ double ru_stime =
+ timeval2double((struct timeval *)&config->ru_data.ru_stime);
+
+ fprintf(output, "\n\n");
+ fprintf(output, " %17.9f seconds user\n", ru_utime);
+ fprintf(output, " %17.9f seconds sys\n", ru_stime);
+ }
+ } else {
+ double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+ fprintf(output, " %17.9f +- %-17.9f seconds time elapsed", avg, sd);
+ }
+ fprintf(output, "\n");
+}
+
+/**
+ * print_header_std - Print the header prefix matching old API.
+ *
+ * Copied and adapted from stat-display.c.
+ */
+static void print_header_std(const struct perf_stat_config *config, const struct target *target,
+ int argc, const char **argv)
+{
+ FILE *output = config->output;
+ int i;
+
+ fprintf(output, "\n");
+ fprintf(output, " Performance counter stats for ");
+ if (target->bpf_str)
+ fprintf(output, "\'BPF program(s) %s", target->bpf_str);
+ else if (target->system_wide)
+ fprintf(output, "\'system wide");
+ else if (target->cpu_list)
+ fprintf(output, "\'CPU(s) %s", target->cpu_list);
+ else if (!target__has_task(target)) {
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
+ fprintf(output, " %s", argv[i]);
+ } else if (target->pid)
+ fprintf(output, "process id \'%s", target->pid);
+ else
+ fprintf(output, "thread id \'%s", target->tid);
+
+ fprintf(output, "\'");
+ if (config->run_count > 1)
+ fprintf(output, " (%d runs)", config->run_count);
+ fprintf(output, ":\n\n");
+}
+
+static int std_print_end(void *ctx, const struct perf_stat_config *config)
+{
+ struct std_print_state *ps = ctx;
+ struct queued_event *ev, *tmp_ev;
+ struct queued_metric *met, *tmp_met;
+ FILE *out = ps->fp;
+ bool first;
+ const char *last_mg_name = NULL;
+ const struct perf_pmu *last_pmu = NULL;
+ int last_aggr_idx = -1;
+
+ /* Print the formatted header prefix (only in non-interval mode) */
+ if (!config->interval)
+ print_header_std(config, ps->target, ps->argc, ps->argv);
+
+ list_for_each_entry_safe(ev, tmp_ev, &ps->events_list, list) {
+ struct evsel *evsel = ev->evsel;
+ double sc = evsel->scale;
+ const char *fmt;
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+ struct metric_event *me =
+ metricgroup__lookup(&evsel->evlist->metric_events, evsel, false);
+ bool is_metricgroup = false;
+ bool skip_header = false;
+ char full_name[128] = "";
+
+ if (me && me->is_default && !evsel->default_show_events) {
+ struct metric_expr *mexp =
+ list_first_entry(&me->head, struct metric_expr, nd);
+ const char *mg_name = mexp->default_metricgroup_name;
+ bool need_full_name = perf_pmus__num_core_pmus() > 1;
+
+ if (need_full_name && evsel->pmu)
+ scnprintf(full_name, sizeof(full_name), "%s (%s)", mg_name,
+ evsel->pmu->name);
+ else
+ scnprintf(full_name, sizeof(full_name), "%s", mg_name);
+ is_metricgroup = true;
+
+ if (last_mg_name && !strcmp(last_mg_name, mg_name) &&
+ last_pmu == evsel->pmu && last_aggr_idx == ev->aggr_idx) {
+ skip_header = true;
+ }
+ last_mg_name = mg_name;
+ last_pmu = evsel->pmu;
+ last_aggr_idx = ev->aggr_idx;
+ }
+
+ /* Print interval timestamp if configured */
+ if (config->interval && ps->timestamp[0] && !skip_header)
+ fprintf(out, "%s", ps->timestamp);
+
+ /* 1. Print aggregation prefix first (if we don't skip header) */
+ if (!skip_header && config->aggr_map && ev->aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
+ int aggr_nr = 0;
+ if (evsel->stats && evsel->stats->aggr) {
+ aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
+ }
+ print_aggr_id_std(config, out, evsel, id, aggr_nr);
+ }
+
+ /* 2. Print event value (scaled) or spaces if metricgroup */
+ if (is_metricgroup) {
+ if (!skip_header) {
+ int n = fprintf(out, " %*s", EVNAME_LEN, full_name);
+ fprintf(out, "%*s", MGROUP_LEN + config->unit_width + 2 - n, "");
+ }
+ } else {
+ if (config->big_num)
+ fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
+ else
+ fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
+
+ if (ev->run == 0 || ev->ena == 0) {
+ fprintf(out, "%*s ", COUNTS_LEN, bad_count);
+ } else {
+ double scaled = (double)ev->val;
+ double avg;
+ if (ev->ena < ev->run) {
+ scaled = (double)ev->val * ev->run / ev->ena;
+ }
+ avg = scaled * sc;
+ fprintf(out, fmt, COUNTS_LEN, avg);
+ }
+
+ /* 3. Print unit */
+ if (evsel->unit) {
+ fprintf(out, "%-*s ", config->unit_width, evsel->unit);
+ } else {
+ if (config->unit_width > 0)
+ fprintf(out, "%-*s ", config->unit_width, "");
+ }
+
+ /* 4. Print event name */
+ fprintf(out, "%-*s", EVNAME_LEN, evsel__name(evsel));
+
+ /* If there are no metrics, print noise and multiplexing percentage */
+ if (list_empty(&ev->metrics_list)) {
+ if (ev->stdev_pct)
+ fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
+ if (ev->run != ev->ena)
+ fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
+ }
+ }
+
+ first = true;
+ list_for_each_entry_safe(met, tmp_met, &ev->metrics_list, list) {
+ const char *color = metric_threshold_classify__color(met->thresh);
+ char unit_name[128];
+ const char *m_fmt = (met->unit && met->unit[0]) ? "%8.1f" : "%8.2f";
+
+ if (met->unit && met->unit[0]) {
+ snprintf(unit_name, sizeof(unit_name), "%s %s", met->unit,
+ met->name);
+ } else {
+ snprintf(unit_name, sizeof(unit_name), "%s", met->name);
+ }
+
+ if (first) {
+ if (skip_header) {
+ if (config->interval && ps->timestamp[0])
+ fprintf(out, "%s", ps->timestamp);
+ if (config->aggr_map && ev->aggr_idx >= 0) {
+ struct aggr_cpu_id id =
+ config->aggr_map->map[ev->aggr_idx];
+ int aggr_nr = 0;
+ if (evsel->stats && evsel->stats->aggr) {
+ aggr_nr =
+ evsel->stats->aggr[ev->aggr_idx].nr;
+ }
+ print_aggr_id_std(config, out, evsel, id, aggr_nr);
+ }
+ fprintf(out, "%*s# ",
+ COUNTS_LEN + EVNAME_LEN + config->unit_width + 3,
+ "");
+ } else {
+ fprintf(out, " # ");
+ }
+ first = false;
+ } else {
+ /* Align subsequent metric lines */
+ fprintf(out, "\n");
+ if (config->interval && ps->timestamp[0])
+ fprintf(out, "%s", ps->timestamp);
+ if (config->aggr_map && ev->aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
+ int aggr_nr = 0;
+ if (evsel->stats && evsel->stats->aggr) {
+ aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
+ }
+ print_aggr_id_std(config, out, evsel, id, aggr_nr);
+ }
+ fprintf(out, "%*s# ",
+ COUNTS_LEN + EVNAME_LEN + config->unit_width + 3, "");
+ }
+
+ if (color && color[0]) {
+ color_fprintf(out, color, m_fmt, met->val);
+ } else {
+ fprintf(out, m_fmt, met->val);
+ }
+ /* Print the metric unit and name left-aligned padded to METRIC_LEN - n - 1 = 26 */
+ fprintf(out, " %-26s", unit_name);
+
+ /* If this is the last metric in the list, print noise and multiplexing percentage */
+ if (list_is_last(&met->list, &ev->metrics_list)) {
+ if (ev->stdev_pct)
+ fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
+ if (ev->run != ev->ena)
+ fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
+ }
+
+ list_del(&met->list);
+ free(met->name);
+ free(met->unit);
+ free(met);
+ }
+ fprintf(out, "\n");
+
+ list_del(&ev->list);
+ free(ev->name);
+ free(ev);
+ }
+ print_footer_std(config);
+ return 0;
+}
+
+static const struct perf_stat_print_callbacks std_print_callbacks = {
+ .print_start = std_print_start,
+ .print_end = std_print_end,
+ .print_event = std_print_event,
+ .print_metric = std_print_metric,
+};
+
+/*
+ * Standard (STD) Output Callbacks - Metric-Only Mode
+ */
+
+static int std_metric_only_print_start(void *ctx,
+ const struct perf_stat_config *config __maybe_unused)
+{
+ struct std_metric_only_print_state *ps = ctx;
+ INIT_LIST_HEAD(&ps->queued_metrics);
+ return 0;
+}
+
+static int std_metric_only_print_metric(void *ctx,
+ const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused, int aggr_idx,
+ const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh)
+{
+ struct std_metric_only_print_state *ps = ctx;
+ struct queued_metric *b = malloc(sizeof(*b));
+
+ if (!b)
+ return -ENOMEM;
+
+ b->name = strdup(name);
+ if (!b->name) {
+ free(b);
+ return -ENOMEM;
+ }
+
+ if (unit && unit[0]) {
+ b->unit = strdup(unit);
+ if (!b->unit) {
+ free(b->name);
+ free(b);
+ return -ENOMEM;
+ }
+ } else {
+ b->unit = NULL;
+ }
+
+ b->val = val;
+ b->thresh = thresh;
+ b->aggr_idx = aggr_idx;
+ list_add_tail(&b->list, &ps->queued_metrics);
+
+ return 0;
+}
+
+static int std_metric_only_print_end(void *ctx, const struct perf_stat_config *config)
+{
+ struct std_metric_only_print_state *ps = ctx;
+ struct queued_metric *b, *tmp;
+ FILE *out = ps->fp;
+ int first_aggr = -1;
+ /* Initialize to -2 to distinguish from -1 (a valid index in AGGR_GLOBAL mode) */
+ int current_aggr = -2;
+ const char *color;
+ char *str;
+ int mlen;
+ int ret = 0;
+ int err;
+
+ if (list_empty(&ps->queued_metrics))
+ return 0;
+
+ first_aggr = list_first_entry(&ps->queued_metrics, struct queued_metric, list)->aggr_idx;
+
+ if (!config->metric_only_headers_printed) {
+ /* Print the formatted header prefix */
+ if (!config->interval)
+ print_header_std(config, ps->target, ps->argc, ps->argv);
+
+ if (config->aggr_map && first_aggr >= 0) {
+ int len = aggr_header_lens[config->aggr_mode];
+
+ fprintf(out, "%*s", len + 1, "");
+ }
+
+ /* Print headers */
+ list_for_each_entry(b, &ps->queued_metrics, list) {
+ if (b->aggr_idx == first_aggr) {
+ char *header_name;
+
+ if (b->unit && b->unit[0]) {
+ err = asprintf(&header_name, "%s %s", b->unit, b->name);
+ } else {
+ header_name = strdup(b->name);
+ err = header_name ? 0 : -1;
+ }
+ if (err < 0) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ fprintf(out, "%*s ", config->metric_only_len, header_name);
+ free(header_name);
+ }
+ }
+ fprintf(out, "\n\n");
+ ((struct perf_stat_config *)config)->metric_only_headers_printed = true;
+ }
+
+ /* Print values */
+ list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
+ if (b->aggr_idx != current_aggr) {
+ if (current_aggr != -2)
+ fprintf(out, "\n");
+ current_aggr = b->aggr_idx;
+ if (config->interval && ps->timestamp[0])
+ fprintf(out, "%s", ps->timestamp);
+ if (config->aggr_map && current_aggr >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[current_aggr];
+ struct evsel *mock_evsel = list_first_entry(&ps->evlist->core.entries, struct evsel, core.node);
+ int aggr_nr = 0;
+
+ if (mock_evsel->stats && mock_evsel->stats->aggr)
+ aggr_nr = mock_evsel->stats->aggr[current_aggr].nr;
+
+ print_aggr_id_std(config, out, mock_evsel, id, aggr_nr);
+ }
+ }
+ color = metric_threshold_classify__color(b->thresh);
+ mlen = config->metric_only_len;
+
+ if (color && color[0]) {
+ err = asprintf(&str, "%s%.1f%s", color, b->val, PERF_COLOR_RESET);
+ mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+ } else {
+ err = asprintf(&str, "%.1f", b->val);
+ }
+ if (err < 0) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ fprintf(out, "%*s ", mlen, str);
+ free(str);
+
+ list_del(&b->list);
+ free(b->name);
+ free(b->unit);
+ free(b);
+ }
+ print_footer_std(config);
+ return 0;
+
+cleanup:
+ list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
+ list_del(&b->list);
+ free(b->name);
+ free(b->unit);
+ free(b);
+ }
+ return ret;
+}
+
+static const struct perf_stat_print_callbacks std_metric_only_print_callbacks = {
+ .print_start = std_metric_only_print_start,
+ .print_end = std_metric_only_print_end,
+ .print_event = NULL,
+ .print_metric = std_metric_only_print_metric,
+};
+
+int perf_stat__print_std(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv)
+{
+ struct std_print_state ps = {
+ .fp = config->output,
+ .target = target,
+ .argc = argc,
+ .argv = argv,
+ };
+
+ if (config->metric_only) {
+ struct std_metric_only_print_state mops = {
+ .fp = config->output,
+ .target = target,
+ .argc = argc,
+ .argv = argv,
+ .evlist = evlist,
+ };
+ if (config->interval && ts) {
+ scnprintf(mops.timestamp, sizeof(mops.timestamp), "%6lu.%09lu ",
+ (unsigned long)ts->tv_sec, ts->tv_nsec);
+ } else {
+ mops.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &std_metric_only_print_callbacks, &mops);
+ } else {
+ if (config->interval && !config->headers_printed) {
+ FILE *output = config->output;
+
+ if (config->aggr_mode == AGGR_GLOBAL) {
+ fprintf(output, "#%*s %*s %*s events\n", 15 - 2, "time", 18, "counts", config->unit_width, "unit");
+ } else {
+ fprintf(output, "#%*s %-*s ctrs %*s %*s events\n",
+ 15 - 2, "time",
+ aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode],
+ 18, "counts", config->unit_width, "unit");
+ }
+ ((struct perf_stat_config *)config)->headers_printed = true;
+ }
+ if (config->interval && ts) {
+ scnprintf(ps.timestamp, sizeof(ps.timestamp), "%6lu.%09lu ",
+ (unsigned long)ts->tv_sec, ts->tv_nsec);
+ } else {
+ ps.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &std_print_callbacks, &ps);
+ }
+}
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks
2026-05-25 23:18 ` [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks Ian Rogers
@ 2026-05-25 23:49 ` Arnaldo Carvalho de Melo
2026-05-26 0:09 ` Ian Rogers
2026-05-25 23:53 ` sashiko-bot
1 sibling, 1 reply; 46+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-05-25 23:49 UTC (permalink / raw)
To: Ian Rogers
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 04:18:48PM -0700, Ian Rogers wrote:
> This patch implements standard console formatting callbacks inside
> util/stat-print-std.c, replacing the empty stubs introduced in Commit 1.
>
> Introduces the format-private `struct queued_event` and `struct queued_metric`
> DOM nodes to buffer traversal streams, and fully encapsulates DOM state
> initialization and queue cleanups inside std_print_start() and std_print_end().
>
> Utilizes the newly centralized unified aggregation helpers to resolve CPU and
> thread prefixes cleanly, and incorporates full interval-mode timestamp
> printing support across all rows.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> Assisted-by: Antigravity:gemini-3.5-flash
> ---
> tools/perf/util/stat-print-std.c | 776 ++++++++++++++++++++++++++++++-
> 1 file changed, 768 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/util/stat-print-std.c b/tools/perf/util/stat-print-std.c
> index 83987e97c889..aa4a083bb85a 100644
> --- a/tools/perf/util/stat-print-std.c
> +++ b/tools/perf/util/stat-print-std.c
> @@ -1,13 +1,773 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include "stat-print.h"
> +// SPDX-License-Identifier: GPL-2.0
What is the value of switching from /* bla */ to // bla when both are
acceptable? Ends up being just noise.
Moving the first include after the SPDX because standardizing into some
sort of ordering enforced by tooling and is considered considered better
for some reason at least has some motivation, so, whatever, no problem.
> +#include <errno.h>
> +#include <math.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> #include <linux/compiler.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +
> +#include "color.h"
> +#include "cpumap.h"
> +#include "debug.h"
> +#include "evlist.h"
> +#include "evsel.h"
> +#include "metricgroup.h"
> +#include "stat-print.h"
> +#include "stat.h"
> +#include "target.h"
> +#include "thread_map.h"
> +#include "tool_pmu.h"
> +
> +#define COUNTS_LEN 18
> +#define EVNAME_LEN 32
> +#define COMM_LEN 16
> +#define PID_LEN 7
> +#define MGROUP_LEN 50
> +#define METRIC_LEN 38
> +
> +
> +
> +/**
> + * struct queued_metric - In-memory record of a buffered metric.
Shouldn't we have an space after the struct header and its members?
> + * @list: Linked list node for queueing.
If this is a node, shouldn't we name it node instead of list, list makes
it look like we have a list here, not that this entry is part of a list.
> + * @name: The display name of the metric.
> + * @unit: The metric's unit (e.g., "%", "GHz", or NULL).
Can this be just:
+ * @unit: "%", "GHz", or NULL
As its implied that being a metric it should be its unit?
> + * @val: The calculated ratio/metric value.
> + * @thresh: Threshold classification for color coding.
@thresh: classification for color coding.
> + * @aggr_idx: Aggregation index in evsel stats.
> + */
> +struct queued_metric {
> + struct list_head list;
> + char *name;
> + char *unit;
> + double val;
> + enum metric_threshold_classify thresh;
> + int aggr_idx;
> +};
Documenting has value, now its a matter of making sure it stays in sync
Ditto for all the lines in the next structs
> +/**
> + * struct queued_event - In-memory record of a buffered counter event.
> + * @list: Linked list node for queueing.
> + * @evsel: The associated performance event selector.
> + * @name: The uniquely formatted/resolved event name.
> + * @val: Raw aggregated counter value.
> + * @ena: Enabled time for multiplexing percentage.
> + * @run: Running time for multiplexing percentage.
> + * @stdev_pct: Standard deviation percentage across repeated runs.
> + * @aggr_idx: Aggregation index.
> + * @is_metricgroup: Whether this represents a unified metricgroup header.
> + * @metrics_list: Linked list head containing nested queued_metric structures.
> + */
> +struct queued_event {
> + struct list_head list;
> + struct evsel *evsel;
> + char *name;
> + u64 val, ena, run;
> + double stdev_pct;
> + int aggr_idx;
> + bool is_metricgroup;
> + struct list_head metrics_list;
> +};
> +
> +/**
> + * struct std_print_state - Print state context for Standard console output.
> + * @fp: File descriptor to output to.
> + * @timestamp: Formatted interval timestamp (optional).
> + * @events_list: Linked list head containing queued_event nodes.
> + * @current_event: Pointer to the currently active event being printed.
> + * Serves as a temporary bridge to associate streaming metrics back to
> + * their parent event node during list buffering. This relies on a
> + * strict temporal coupling in the traversal driver: the driver always
> + * invokes print_metric() callbacks for a counter synchronously and
> + * immediately after its print_event() callback, prior to advancing
> + * to the next event or aggregation node. This pointer is completely
> + * private to standard printing, keeping the traversal driver decoupled
> + * and preserving strict encapsulation.
> + * @target: target query parameters for header printout.
> + * @argc: Command argument count.
> + * @argv: Command argument values.
> + */
> +struct std_print_state {
> + FILE *fp;
> + char timestamp[64];
> + struct list_head events_list;
> + struct queued_event *current_event;
> + const struct target *target;
> + int argc;
> + const char **argv;
> +};
> +
> +/**
> + * struct std_metric_only_print_state - Metric-only print state context for Standard console output.
> + * @fp: File descriptor to output to.
> + * @queued_metrics: Linked list head containing queued_metric nodes.
> + * @timestamp: Formatted interval timestamp (optional).
> + * @target: target query parameters.
> + * @argc: Command argument count.
> + * @argv: Command argument values.
> + */
> +struct std_metric_only_print_state {
> + FILE *fp;
> + struct list_head queued_metrics;
> + char timestamp[64];
> + const struct target *target;
> + int argc;
> + const char **argv;
> + struct evlist *evlist;
> +};
> +
> +/**
> + * print_aggr_id_std - Print the aggregation prefix for STD format.
> + *
> + * Uses the unified perf_stat__get_aggr_id_char helper to format the base
> + * aggregation string, and pads it dynamically using aggr_header_lens.
> + */
> +static void print_aggr_id_std(const struct perf_stat_config *config, FILE *output,
> + struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
> +{
> + char buf[128];
> +
> + if (perf_stat__get_aggr_id_char(config, evsel, id, buf, sizeof(buf)) < 0)
> + return;
So the contract here is clear: if there is some problem with doing what
is expected, it should print nothing? I would expect that if this is
called, being a "print" fuction, something would be printed? Is the
above failure (< 0) a problem the user should be warned about?
> +
> + if (config->aggr_mode == AGGR_NONE) {
> + if (evsel->percore && !config->percore_show_thread) {
> + fprintf(output, "%-*s ", aggr_header_lens[AGGR_CORE], buf);
> + } else if (id.cpu.cpu > -1) {
> + /* For CPU none mode, prepend "CPU" during console print */
> + char cpu_buf[160];
> + snprintf(cpu_buf, sizeof(cpu_buf), "CPU%s", buf);
> + fprintf(output, "%-*s ", aggr_header_lens[AGGR_NONE], cpu_buf);
> + }
> + return;
> + }
> +
> + if (config->aggr_mode == AGGR_THREAD) {
> + fprintf(output, "%-*s ", aggr_header_lens[AGGR_THREAD], buf);
> + return;
> + }
> +
> + /* Socket/Die/Node/Cache/Cluster modes print base ID and aggr count */
> + fprintf(output, "%-s %*d ", buf, 4, aggr_nr);
So, only in the first case (< 0) this function doesn't print something.
> +}
> +
> +/**
> + * should_skip_zero_counter - Check if a zero-valued counter should be skipped.
> + *
> + * Implemented locally for standard console formatting.
> + */
> +static bool should_skip_zero_counter(const struct perf_stat_config *config, struct evsel *counter,
> + int aggr_idx)
> +{
> + struct perf_cpu cpu;
> + unsigned int idx;
> + struct aggr_cpu_id id;
> +
> + if (verbose == 0 && counter->skippable && !counter->supported)
> + return true;
IS this really && && or should it be || ||?
> +
> + if (config->metric_only)
> + return false;
> +
> + if (config->aggr_mode == AGGR_THREAD && config->system_wide)
> + return true;
> +
> + if (aggr_idx < 0 || !config->aggr_map || !config->aggr_get_id)
> + return false;
> +
> + id = config->aggr_map->map[aggr_idx];
> +
> + if (evsel__is_tool(counter)) {
> + struct aggr_cpu_id own_id = config->aggr_get_id((struct perf_stat_config *)config,
> + (struct perf_cpu){ .cpu = 0 });
> +
> + return !aggr_cpu_id__equal(&id, &own_id);
> + }
> +
> + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) {
> + struct aggr_cpu_id own_id =
> + config->aggr_get_id((struct perf_stat_config *)config, cpu);
> +
> + if (aggr_cpu_id__equal(&id, &own_id))
> + return false;
> + }
> + return true;
> +}
> +
> +/*
> + * Standard (STD) Output Callbacks - Normal Mode
> + */
> +
> +static int std_print_start(void *ctx, const struct perf_stat_config *config __maybe_unused)
> +{
> + struct std_print_state *ps = ctx;
>
> -int perf_stat__print_std(struct evlist *evlist __maybe_unused,
> - const struct perf_stat_config *config __maybe_unused,
> - const struct target *target __maybe_unused,
> - const struct timespec *ts __maybe_unused,
> - int argc __maybe_unused,
> - const char **argv __maybe_unused)
> + INIT_LIST_HEAD(&ps->events_list);
> + ps->current_event = NULL;
> + return 0;
> +}
> +
> +static int std_print_event(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
> + int aggr_idx, u64 val, u64 ena, u64 run, double stdev_pct)
> {
> + struct std_print_state *ps = ctx;
> + struct queued_event *ev;
> +
> + /* Skip zero counters locally in STD callbacks if they qualify */
> + if (val == 0 && should_skip_zero_counter(config, evsel, aggr_idx)) {
> + ps->current_event = NULL;
> + return 0;
> + }
> +
> + ev = malloc(sizeof(*ev));
> + if (!ev)
> + return -ENOMEM;
> +
> + ev->name = strdup(evsel__name(evsel));
> + if (!ev->name) {
> + free(ev);
> + return -ENOMEM;
> + }
> +
> + ev->evsel = evsel;
> + ev->val = val;
> + ev->ena = ena;
> + ev->run = run;
> + ev->stdev_pct = stdev_pct;
> + ev->aggr_idx = aggr_idx;
> + INIT_LIST_HEAD(&ev->metrics_list);
> +
> + list_add_tail(&ev->list, &ps->events_list);
> + ps->current_event = ev;
> +
> return 0;
> }
> +
> +static int std_print_metric(void *ctx, const struct perf_stat_config *config __maybe_unused,
> + struct evsel *evsel __maybe_unused, int aggr_idx __maybe_unused,
> + const char *name, const char *unit, double val,
> + enum metric_threshold_classify thresh)
> +{
> + struct std_print_state *ps = ctx;
> + struct queued_metric *b;
> +
> + if (!ps->current_event)
> + return 0;
> +
> + if (evsel != ps->current_event->evsel) {
> + pr_err("decoupled print engine: temporal coupling violation: evsel mismatch!\n");
> + return -EINVAL;
> + }
> +
> + b = malloc(sizeof(*b));
> + if (!b)
> + return -ENOMEM;
> +
> + b->name = strdup(name);
> + if (!b->name) {
> + free(b);
> + return -ENOMEM;
> + }
> +
> + if (unit && unit[0]) {
> + b->unit = strdup(unit);
> + if (!b->unit) {
> + free(b->name);
> + free(b);
> + return -ENOMEM;
> + }
> + } else {
> + b->unit = NULL;
> + }
> +
> + b->val = val;
> + b->thresh = thresh;
> + list_add_tail(&b->list, &ps->current_event->metrics_list);
> +
> + return 0;
> +}
> +
> +#define USEC_PER_SEC 1000000ULL
> +#define NSEC_PER_SEC 1000000000ULL
> +
> +static double timeval2double(struct timeval *t)
> +{
> + return t->tv_sec + (double)t->tv_usec / USEC_PER_SEC;
> +}
> +
> +static void print_footer_std(const struct perf_stat_config *config)
> +{
> + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
> + FILE *output = config->output;
> +
> + if (config->interval)
> + return;
> +
> + if (!config->null_run)
> + fprintf(output, "\n");
> +
> + if (config->run_count == 1) {
> + fprintf(output, " %17.9f seconds time elapsed", avg);
> +
> + if (config->ru_display) {
> + double ru_utime =
> + timeval2double((struct timeval *)&config->ru_data.ru_utime);
> + double ru_stime =
> + timeval2double((struct timeval *)&config->ru_data.ru_stime);
> +
> + fprintf(output, "\n\n");
> + fprintf(output, " %17.9f seconds user\n", ru_utime);
> + fprintf(output, " %17.9f seconds sys\n", ru_stime);
> + }
> + } else {
> + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
> + fprintf(output, " %17.9f +- %-17.9f seconds time elapsed", avg, sd);
> + }
> + fprintf(output, "\n");
> +}
> +
> +/**
> + * print_header_std - Print the header prefix matching old API.
> + *
> + * Copied and adapted from stat-display.c.
> + */
> +static void print_header_std(const struct perf_stat_config *config, const struct target *target,
> + int argc, const char **argv)
> +{
> + FILE *output = config->output;
> + int i;
> +
> + fprintf(output, "\n");
> + fprintf(output, " Performance counter stats for ");
> + if (target->bpf_str)
> + fprintf(output, "\'BPF program(s) %s", target->bpf_str);
> + else if (target->system_wide)
> + fprintf(output, "\'system wide");
> + else if (target->cpu_list)
> + fprintf(output, "\'CPU(s) %s", target->cpu_list);
> + else if (!target__has_task(target)) {
> + fprintf(output, "\'%s", argv ? argv[0] : "pipe");
> + for (i = 1; argv && (i < argc); i++)
> + fprintf(output, " %s", argv[i]);
> + } else if (target->pid)
> + fprintf(output, "process id \'%s", target->pid);
> + else
> + fprintf(output, "thread id \'%s", target->tid);
> +
> + fprintf(output, "\'");
> + if (config->run_count > 1)
> + fprintf(output, " (%d runs)", config->run_count);
> + fprintf(output, ":\n\n");
> +}
> +
> +static int std_print_end(void *ctx, const struct perf_stat_config *config)
> +{
> + struct std_print_state *ps = ctx;
> + struct queued_event *ev, *tmp_ev;
> + struct queued_metric *met, *tmp_met;
> + FILE *out = ps->fp;
> + bool first;
> + const char *last_mg_name = NULL;
> + const struct perf_pmu *last_pmu = NULL;
> + int last_aggr_idx = -1;
> +
> + /* Print the formatted header prefix (only in non-interval mode) */
> + if (!config->interval)
> + print_header_std(config, ps->target, ps->argc, ps->argv);
> +
> + list_for_each_entry_safe(ev, tmp_ev, &ps->events_list, list) {
> + struct evsel *evsel = ev->evsel;
> + double sc = evsel->scale;
> + const char *fmt;
> + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
> + struct metric_event *me =
> + metricgroup__lookup(&evsel->evlist->metric_events, evsel, false);
> + bool is_metricgroup = false;
> + bool skip_header = false;
> + char full_name[128] = "";
> +
> + if (me && me->is_default && !evsel->default_show_events) {
> + struct metric_expr *mexp =
> + list_first_entry(&me->head, struct metric_expr, nd);
> + const char *mg_name = mexp->default_metricgroup_name;
> + bool need_full_name = perf_pmus__num_core_pmus() > 1;
> +
> + if (need_full_name && evsel->pmu)
> + scnprintf(full_name, sizeof(full_name), "%s (%s)", mg_name,
> + evsel->pmu->name);
> + else
> + scnprintf(full_name, sizeof(full_name), "%s", mg_name);
> + is_metricgroup = true;
> +
> + if (last_mg_name && !strcmp(last_mg_name, mg_name) &&
> + last_pmu == evsel->pmu && last_aggr_idx == ev->aggr_idx) {
> + skip_header = true;
> + }
> + last_mg_name = mg_name;
> + last_pmu = evsel->pmu;
> + last_aggr_idx = ev->aggr_idx;
> + }
> +
> + /* Print interval timestamp if configured */
> + if (config->interval && ps->timestamp[0] && !skip_header)
> + fprintf(out, "%s", ps->timestamp);
> +
> + /* 1. Print aggregation prefix first (if we don't skip header) */
> + if (!skip_header && config->aggr_map && ev->aggr_idx >= 0) {
> + struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
> + int aggr_nr = 0;
> + if (evsel->stats && evsel->stats->aggr) {
> + aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
> + }
> + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> + }
> +
> + /* 2. Print event value (scaled) or spaces if metricgroup */
> + if (is_metricgroup) {
> + if (!skip_header) {
> + int n = fprintf(out, " %*s", EVNAME_LEN, full_name);
> + fprintf(out, "%*s", MGROUP_LEN + config->unit_width + 2 - n, "");
> + }
> + } else {
> + if (config->big_num)
> + fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
> + else
> + fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
> +
> + if (ev->run == 0 || ev->ena == 0) {
> + fprintf(out, "%*s ", COUNTS_LEN, bad_count);
> + } else {
> + double scaled = (double)ev->val;
> + double avg;
> + if (ev->ena < ev->run) {
> + scaled = (double)ev->val * ev->run / ev->ena;
> + }
> + avg = scaled * sc;
> + fprintf(out, fmt, COUNTS_LEN, avg);
> + }
> +
> + /* 3. Print unit */
> + if (evsel->unit) {
> + fprintf(out, "%-*s ", config->unit_width, evsel->unit);
> + } else {
> + if (config->unit_width > 0)
> + fprintf(out, "%-*s ", config->unit_width, "");
> + }
> +
> + /* 4. Print event name */
> + fprintf(out, "%-*s", EVNAME_LEN, evsel__name(evsel));
> +
> + /* If there are no metrics, print noise and multiplexing percentage */
> + if (list_empty(&ev->metrics_list)) {
> + if (ev->stdev_pct)
> + fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
> + if (ev->run != ev->ena)
> + fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
> + }
> + }
> +
> + first = true;
> + list_for_each_entry_safe(met, tmp_met, &ev->metrics_list, list) {
> + const char *color = metric_threshold_classify__color(met->thresh);
> + char unit_name[128];
> + const char *m_fmt = (met->unit && met->unit[0]) ? "%8.1f" : "%8.2f";
> +
> + if (met->unit && met->unit[0]) {
> + snprintf(unit_name, sizeof(unit_name), "%s %s", met->unit,
> + met->name);
> + } else {
> + snprintf(unit_name, sizeof(unit_name), "%s", met->name);
> + }
> +
> + if (first) {
> + if (skip_header) {
> + if (config->interval && ps->timestamp[0])
> + fprintf(out, "%s", ps->timestamp);
> + if (config->aggr_map && ev->aggr_idx >= 0) {
> + struct aggr_cpu_id id =
> + config->aggr_map->map[ev->aggr_idx];
> + int aggr_nr = 0;
> + if (evsel->stats && evsel->stats->aggr) {
> + aggr_nr =
> + evsel->stats->aggr[ev->aggr_idx].nr;
> + }
> + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> + }
> + fprintf(out, "%*s# ",
> + COUNTS_LEN + EVNAME_LEN + config->unit_width + 3,
> + "");
> + } else {
> + fprintf(out, " # ");
> + }
> + first = false;
> + } else {
> + /* Align subsequent metric lines */
> + fprintf(out, "\n");
> + if (config->interval && ps->timestamp[0])
> + fprintf(out, "%s", ps->timestamp);
> + if (config->aggr_map && ev->aggr_idx >= 0) {
> + struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
> + int aggr_nr = 0;
> + if (evsel->stats && evsel->stats->aggr) {
> + aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
> + }
> + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> + }
> + fprintf(out, "%*s# ",
> + COUNTS_LEN + EVNAME_LEN + config->unit_width + 3, "");
> + }
> +
> + if (color && color[0]) {
> + color_fprintf(out, color, m_fmt, met->val);
> + } else {
> + fprintf(out, m_fmt, met->val);
> + }
> + /* Print the metric unit and name left-aligned padded to METRIC_LEN - n - 1 = 26 */
> + fprintf(out, " %-26s", unit_name);
> +
> + /* If this is the last metric in the list, print noise and multiplexing percentage */
> + if (list_is_last(&met->list, &ev->metrics_list)) {
> + if (ev->stdev_pct)
> + fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
> + if (ev->run != ev->ena)
> + fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
> + }
> +
> + list_del(&met->list);
> + free(met->name);
> + free(met->unit);
> + free(met);
> + }
> + fprintf(out, "\n");
> +
> + list_del(&ev->list);
> + free(ev->name);
> + free(ev);
> + }
> + print_footer_std(config);
> + return 0;
> +}
> +
> +static const struct perf_stat_print_callbacks std_print_callbacks = {
> + .print_start = std_print_start,
> + .print_end = std_print_end,
> + .print_event = std_print_event,
> + .print_metric = std_print_metric,
> +};
> +
> +/*
> + * Standard (STD) Output Callbacks - Metric-Only Mode
> + */
> +
> +static int std_metric_only_print_start(void *ctx,
> + const struct perf_stat_config *config __maybe_unused)
> +{
> + struct std_metric_only_print_state *ps = ctx;
> + INIT_LIST_HEAD(&ps->queued_metrics);
> + return 0;
> +}
> +
> +static int std_metric_only_print_metric(void *ctx,
> + const struct perf_stat_config *config __maybe_unused,
> + struct evsel *evsel __maybe_unused, int aggr_idx,
> + const char *name, const char *unit, double val,
> + enum metric_threshold_classify thresh)
> +{
> + struct std_metric_only_print_state *ps = ctx;
> + struct queued_metric *b = malloc(sizeof(*b));
> +
> + if (!b)
> + return -ENOMEM;
> +
> + b->name = strdup(name);
> + if (!b->name) {
> + free(b);
> + return -ENOMEM;
> + }
> +
> + if (unit && unit[0]) {
> + b->unit = strdup(unit);
> + if (!b->unit) {
> + free(b->name);
> + free(b);
> + return -ENOMEM;
> + }
> + } else {
> + b->unit = NULL;
> + }
> +
> + b->val = val;
> + b->thresh = thresh;
> + b->aggr_idx = aggr_idx;
> + list_add_tail(&b->list, &ps->queued_metrics);
> +
> + return 0;
> +}
> +
> +static int std_metric_only_print_end(void *ctx, const struct perf_stat_config *config)
> +{
> + struct std_metric_only_print_state *ps = ctx;
> + struct queued_metric *b, *tmp;
> + FILE *out = ps->fp;
> + int first_aggr = -1;
> + /* Initialize to -2 to distinguish from -1 (a valid index in AGGR_GLOBAL mode) */
> + int current_aggr = -2;
> + const char *color;
> + char *str;
> + int mlen;
> + int ret = 0;
> + int err;
> +
> + if (list_empty(&ps->queued_metrics))
> + return 0;
> +
> + first_aggr = list_first_entry(&ps->queued_metrics, struct queued_metric, list)->aggr_idx;
> +
> + if (!config->metric_only_headers_printed) {
> + /* Print the formatted header prefix */
> + if (!config->interval)
> + print_header_std(config, ps->target, ps->argc, ps->argv);
> +
> + if (config->aggr_map && first_aggr >= 0) {
> + int len = aggr_header_lens[config->aggr_mode];
> +
> + fprintf(out, "%*s", len + 1, "");
> + }
> +
> + /* Print headers */
> + list_for_each_entry(b, &ps->queued_metrics, list) {
> + if (b->aggr_idx == first_aggr) {
> + char *header_name;
> +
> + if (b->unit && b->unit[0]) {
> + err = asprintf(&header_name, "%s %s", b->unit, b->name);
> + } else {
> + header_name = strdup(b->name);
> + err = header_name ? 0 : -1;
> + }
> + if (err < 0) {
> + ret = -ENOMEM;
> + goto cleanup;
> + }
> + fprintf(out, "%*s ", config->metric_only_len, header_name);
> + free(header_name);
> + }
> + }
> + fprintf(out, "\n\n");
> + ((struct perf_stat_config *)config)->metric_only_headers_printed = true;
> + }
> +
> + /* Print values */
> + list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
> + if (b->aggr_idx != current_aggr) {
> + if (current_aggr != -2)
> + fprintf(out, "\n");
> + current_aggr = b->aggr_idx;
> + if (config->interval && ps->timestamp[0])
> + fprintf(out, "%s", ps->timestamp);
> + if (config->aggr_map && current_aggr >= 0) {
> + struct aggr_cpu_id id = config->aggr_map->map[current_aggr];
> + struct evsel *mock_evsel = list_first_entry(&ps->evlist->core.entries, struct evsel, core.node);
> + int aggr_nr = 0;
> +
> + if (mock_evsel->stats && mock_evsel->stats->aggr)
> + aggr_nr = mock_evsel->stats->aggr[current_aggr].nr;
> +
> + print_aggr_id_std(config, out, mock_evsel, id, aggr_nr);
> + }
> + }
> + color = metric_threshold_classify__color(b->thresh);
> + mlen = config->metric_only_len;
> +
> + if (color && color[0]) {
> + err = asprintf(&str, "%s%.1f%s", color, b->val, PERF_COLOR_RESET);
> + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
> + } else {
> + err = asprintf(&str, "%.1f", b->val);
> + }
> + if (err < 0) {
> + ret = -ENOMEM;
> + goto cleanup;
> + }
> + fprintf(out, "%*s ", mlen, str);
> + free(str);
> +
> + list_del(&b->list);
> + free(b->name);
> + free(b->unit);
> + free(b);
> + }
> + print_footer_std(config);
> + return 0;
> +
> +cleanup:
> + list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
> + list_del(&b->list);
> + free(b->name);
> + free(b->unit);
> + free(b);
> + }
> + return ret;
> +}
> +
> +static const struct perf_stat_print_callbacks std_metric_only_print_callbacks = {
> + .print_start = std_metric_only_print_start,
> + .print_end = std_metric_only_print_end,
> + .print_event = NULL,
> + .print_metric = std_metric_only_print_metric,
> +};
> +
> +int perf_stat__print_std(struct evlist *evlist, const struct perf_stat_config *config,
> + const struct target *target, const struct timespec *ts, int argc,
> + const char **argv)
> +{
> + struct std_print_state ps = {
> + .fp = config->output,
> + .target = target,
> + .argc = argc,
> + .argv = argv,
> + };
> +
> + if (config->metric_only) {
> + struct std_metric_only_print_state mops = {
> + .fp = config->output,
> + .target = target,
> + .argc = argc,
> + .argv = argv,
> + .evlist = evlist,
> + };
> + if (config->interval && ts) {
> + scnprintf(mops.timestamp, sizeof(mops.timestamp), "%6lu.%09lu ",
> + (unsigned long)ts->tv_sec, ts->tv_nsec);
> + } else {
> + mops.timestamp[0] = '\0';
> + }
> + return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
> + &std_metric_only_print_callbacks, &mops);
> + } else {
> + if (config->interval && !config->headers_printed) {
> + FILE *output = config->output;
> +
> + if (config->aggr_mode == AGGR_GLOBAL) {
> + fprintf(output, "#%*s %*s %*s events\n", 15 - 2, "time", 18, "counts", config->unit_width, "unit");
> + } else {
> + fprintf(output, "#%*s %-*s ctrs %*s %*s events\n",
> + 15 - 2, "time",
> + aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode],
> + 18, "counts", config->unit_width, "unit");
> + }
> + ((struct perf_stat_config *)config)->headers_printed = true;
> + }
> + if (config->interval && ts) {
> + scnprintf(ps.timestamp, sizeof(ps.timestamp), "%6lu.%09lu ",
> + (unsigned long)ts->tv_sec, ts->tv_nsec);
> + } else {
> + ps.timestamp[0] = '\0';
> + }
> + return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
> + &std_print_callbacks, &ps);
> + }
> +}
> --
> 2.54.0.794.g4f17f83d09-goog
>
^ permalink raw reply [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks
2026-05-25 23:49 ` Arnaldo Carvalho de Melo
@ 2026-05-26 0:09 ` Ian Rogers
0 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-26 0:09 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 4:49 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Mon, May 25, 2026 at 04:18:48PM -0700, Ian Rogers wrote:
> > This patch implements standard console formatting callbacks inside
> > util/stat-print-std.c, replacing the empty stubs introduced in Commit 1.
> >
> > Introduces the format-private `struct queued_event` and `struct queued_metric`
> > DOM nodes to buffer traversal streams, and fully encapsulates DOM state
> > initialization and queue cleanups inside std_print_start() and std_print_end().
> >
> > Utilizes the newly centralized unified aggregation helpers to resolve CPU and
> > thread prefixes cleanly, and incorporates full interval-mode timestamp
> > printing support across all rows.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > Assisted-by: Antigravity:gemini-3.5-flash
> > ---
> > tools/perf/util/stat-print-std.c | 776 ++++++++++++++++++++++++++++++-
> > 1 file changed, 768 insertions(+), 8 deletions(-)
> >
> > diff --git a/tools/perf/util/stat-print-std.c b/tools/perf/util/stat-print-std.c
> > index 83987e97c889..aa4a083bb85a 100644
> > --- a/tools/perf/util/stat-print-std.c
> > +++ b/tools/perf/util/stat-print-std.c
> > @@ -1,13 +1,773 @@
> > -/* SPDX-License-Identifier: GPL-2.0 */
> > -#include "stat-print.h"
> > +// SPDX-License-Identifier: GPL-2.0
>
> What is the value of switching from /* bla */ to // bla when both are
> acceptable? Ends up being just noise.
>
> Moving the first include after the SPDX because standardizing into some
> sort of ordering enforced by tooling and is considered considered better
> for some reason at least has some motivation, so, whatever, no problem.
Agreed. Will fix. I suspect the original SPDX was brought over from
something like stat.h and then checkpatch.pl warned about preferring
//. But given this is a new file there's no reason to modify it.
> > +#include <errno.h>
> > +#include <math.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +
> > #include <linux/compiler.h>
> > +#include <linux/kernel.h>
> > +#include <linux/list.h>
> > +
> > +#include "color.h"
> > +#include "cpumap.h"
> > +#include "debug.h"
> > +#include "evlist.h"
> > +#include "evsel.h"
> > +#include "metricgroup.h"
> > +#include "stat-print.h"
> > +#include "stat.h"
> > +#include "target.h"
> > +#include "thread_map.h"
> > +#include "tool_pmu.h"
> > +
> > +#define COUNTS_LEN 18
> > +#define EVNAME_LEN 32
> > +#define COMM_LEN 16
> > +#define PID_LEN 7
> > +#define MGROUP_LEN 50
> > +#define METRIC_LEN 38
> > +
> > +
> > +
> > +/**
> > + * struct queued_metric - In-memory record of a buffered metric.
>
> Shouldn't we have an space after the struct header and its members?
>
> > + * @list: Linked list node for queueing.
>
> If this is a node, shouldn't we name it node instead of list, list makes
> it look like we have a list here, not that this entry is part of a list.
>
> > + * @name: The display name of the metric.
> > + * @unit: The metric's unit (e.g., "%", "GHz", or NULL).
>
> Can this be just:
>
> + * @unit: "%", "GHz", or NULL
>
> As its implied that being a metric it should be its unit?
>
> > + * @val: The calculated ratio/metric value.
> > + * @thresh: Threshold classification for color coding.
>
>
> @thresh: classification for color coding.
>
> > + * @aggr_idx: Aggregation index in evsel stats.
> > + */
> > +struct queued_metric {
> > + struct list_head list;
> > + char *name;
> > + char *unit;
> > + double val;
> > + enum metric_threshold_classify thresh;
> > + int aggr_idx;
> > +};
>
> Documenting has value, now its a matter of making sure it stays in sync
>
> Ditto for all the lines in the next structs
I'll clean up the structs.
> > +/**
> > + * struct queued_event - In-memory record of a buffered counter event.
> > + * @list: Linked list node for queueing.
> > + * @evsel: The associated performance event selector.
> > + * @name: The uniquely formatted/resolved event name.
> > + * @val: Raw aggregated counter value.
> > + * @ena: Enabled time for multiplexing percentage.
> > + * @run: Running time for multiplexing percentage.
> > + * @stdev_pct: Standard deviation percentage across repeated runs.
> > + * @aggr_idx: Aggregation index.
> > + * @is_metricgroup: Whether this represents a unified metricgroup header.
> > + * @metrics_list: Linked list head containing nested queued_metric structures.
> > + */
> > +struct queued_event {
> > + struct list_head list;
> > + struct evsel *evsel;
> > + char *name;
> > + u64 val, ena, run;
> > + double stdev_pct;
> > + int aggr_idx;
> > + bool is_metricgroup;
> > + struct list_head metrics_list;
> > +};
> > +
> > +/**
> > + * struct std_print_state - Print state context for Standard console output.
> > + * @fp: File descriptor to output to.
> > + * @timestamp: Formatted interval timestamp (optional).
> > + * @events_list: Linked list head containing queued_event nodes.
> > + * @current_event: Pointer to the currently active event being printed.
> > + * Serves as a temporary bridge to associate streaming metrics back to
> > + * their parent event node during list buffering. This relies on a
> > + * strict temporal coupling in the traversal driver: the driver always
> > + * invokes print_metric() callbacks for a counter synchronously and
> > + * immediately after its print_event() callback, prior to advancing
> > + * to the next event or aggregation node. This pointer is completely
> > + * private to standard printing, keeping the traversal driver decoupled
> > + * and preserving strict encapsulation.
> > + * @target: target query parameters for header printout.
> > + * @argc: Command argument count.
> > + * @argv: Command argument values.
> > + */
> > +struct std_print_state {
> > + FILE *fp;
> > + char timestamp[64];
> > + struct list_head events_list;
> > + struct queued_event *current_event;
> > + const struct target *target;
> > + int argc;
> > + const char **argv;
> > +};
> > +
> > +/**
> > + * struct std_metric_only_print_state - Metric-only print state context for Standard console output.
> > + * @fp: File descriptor to output to.
> > + * @queued_metrics: Linked list head containing queued_metric nodes.
> > + * @timestamp: Formatted interval timestamp (optional).
> > + * @target: target query parameters.
> > + * @argc: Command argument count.
> > + * @argv: Command argument values.
> > + */
> > +struct std_metric_only_print_state {
> > + FILE *fp;
> > + struct list_head queued_metrics;
> > + char timestamp[64];
> > + const struct target *target;
> > + int argc;
> > + const char **argv;
> > + struct evlist *evlist;
> > +};
> > +
> > +/**
> > + * print_aggr_id_std - Print the aggregation prefix for STD format.
> > + *
> > + * Uses the unified perf_stat__get_aggr_id_char helper to format the base
> > + * aggregation string, and pads it dynamically using aggr_header_lens.
> > + */
> > +static void print_aggr_id_std(const struct perf_stat_config *config, FILE *output,
> > + struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
> > +{
> > + char buf[128];
> > +
> > + if (perf_stat__get_aggr_id_char(config, evsel, id, buf, sizeof(buf)) < 0)
> > + return;
>
>
> So the contract here is clear: if there is some problem with doing what
> is expected, it should print nothing? I would expect that if this is
> called, being a "print" fuction, something would be printed? Is the
> above failure (< 0) a problem the user should be warned about?
I'll follow up.
> > +
> > + if (config->aggr_mode == AGGR_NONE) {
> > + if (evsel->percore && !config->percore_show_thread) {
> > + fprintf(output, "%-*s ", aggr_header_lens[AGGR_CORE], buf);
> > + } else if (id.cpu.cpu > -1) {
> > + /* For CPU none mode, prepend "CPU" during console print */
> > + char cpu_buf[160];
> > + snprintf(cpu_buf, sizeof(cpu_buf), "CPU%s", buf);
> > + fprintf(output, "%-*s ", aggr_header_lens[AGGR_NONE], cpu_buf);
> > + }
> > + return;
> > + }
> > +
> > + if (config->aggr_mode == AGGR_THREAD) {
> > + fprintf(output, "%-*s ", aggr_header_lens[AGGR_THREAD], buf);
> > + return;
> > + }
> > +
> > + /* Socket/Die/Node/Cache/Cluster modes print base ID and aggr count */
> > + fprintf(output, "%-s %*d ", buf, 4, aggr_nr);
>
>
> So, only in the first case (< 0) this function doesn't print something.
>
> > +}
> > +
> > +/**
> > + * should_skip_zero_counter - Check if a zero-valued counter should be skipped.
> > + *
> > + * Implemented locally for standard console formatting.
> > + */
> > +static bool should_skip_zero_counter(const struct perf_stat_config *config, struct evsel *counter,
> > + int aggr_idx)
> > +{
> > + struct perf_cpu cpu;
> > + unsigned int idx;
> > + struct aggr_cpu_id id;
> > +
> > + if (verbose == 0 && counter->skippable && !counter->supported)
> > + return true;
>
> IS this really && && or should it be || ||?
This function is brought over from stat-display.c to try to make the
output as similar as possible. The && is deliberate, the code should
read: if we're not being verbose AND the counter can be skipped (i.e.,
it is a default created one) AND the counter isn't supported (open
failed) then skip outputting the counter if its value is zero.
This function has a long history of issues, so decoupling it here from
CSV and JSON output is an improvement. I think we can do better, but
my initial hope is that the new output code can be compatible with
existing tools that use perf output. Using the same code should be a
pretty strong guarantee of this.
I'm going to buffer up feedback before working on v3. Given Namhyung
and CT have both worked on this code, I think their feedback regarding
the clean up and direction would be very valuable.
Thanks,
Ian
> > +
> > + if (config->metric_only)
> > + return false;
> > +
> > + if (config->aggr_mode == AGGR_THREAD && config->system_wide)
> > + return true;
> > +
> > + if (aggr_idx < 0 || !config->aggr_map || !config->aggr_get_id)
> > + return false;
> > +
> > + id = config->aggr_map->map[aggr_idx];
> > +
> > + if (evsel__is_tool(counter)) {
> > + struct aggr_cpu_id own_id = config->aggr_get_id((struct perf_stat_config *)config,
> > + (struct perf_cpu){ .cpu = 0 });
> > +
> > + return !aggr_cpu_id__equal(&id, &own_id);
> > + }
> > +
> > + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) {
> > + struct aggr_cpu_id own_id =
> > + config->aggr_get_id((struct perf_stat_config *)config, cpu);
> > +
> > + if (aggr_cpu_id__equal(&id, &own_id))
> > + return false;
> > + }
> > + return true;
> > +}
> > +
> > +/*
> > + * Standard (STD) Output Callbacks - Normal Mode
> > + */
> > +
> > +static int std_print_start(void *ctx, const struct perf_stat_config *config __maybe_unused)
> > +{
> > + struct std_print_state *ps = ctx;
> >
> > -int perf_stat__print_std(struct evlist *evlist __maybe_unused,
> > - const struct perf_stat_config *config __maybe_unused,
> > - const struct target *target __maybe_unused,
> > - const struct timespec *ts __maybe_unused,
> > - int argc __maybe_unused,
> > - const char **argv __maybe_unused)
> > + INIT_LIST_HEAD(&ps->events_list);
> > + ps->current_event = NULL;
> > + return 0;
> > +}
> > +
> > +static int std_print_event(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
> > + int aggr_idx, u64 val, u64 ena, u64 run, double stdev_pct)
> > {
> > + struct std_print_state *ps = ctx;
> > + struct queued_event *ev;
> > +
> > + /* Skip zero counters locally in STD callbacks if they qualify */
> > + if (val == 0 && should_skip_zero_counter(config, evsel, aggr_idx)) {
> > + ps->current_event = NULL;
> > + return 0;
> > + }
> > +
> > + ev = malloc(sizeof(*ev));
> > + if (!ev)
> > + return -ENOMEM;
> > +
> > + ev->name = strdup(evsel__name(evsel));
> > + if (!ev->name) {
> > + free(ev);
> > + return -ENOMEM;
> > + }
> > +
> > + ev->evsel = evsel;
> > + ev->val = val;
> > + ev->ena = ena;
> > + ev->run = run;
> > + ev->stdev_pct = stdev_pct;
> > + ev->aggr_idx = aggr_idx;
> > + INIT_LIST_HEAD(&ev->metrics_list);
> > +
> > + list_add_tail(&ev->list, &ps->events_list);
> > + ps->current_event = ev;
> > +
> > return 0;
> > }
> > +
> > +static int std_print_metric(void *ctx, const struct perf_stat_config *config __maybe_unused,
> > + struct evsel *evsel __maybe_unused, int aggr_idx __maybe_unused,
> > + const char *name, const char *unit, double val,
> > + enum metric_threshold_classify thresh)
> > +{
> > + struct std_print_state *ps = ctx;
> > + struct queued_metric *b;
> > +
> > + if (!ps->current_event)
> > + return 0;
> > +
> > + if (evsel != ps->current_event->evsel) {
> > + pr_err("decoupled print engine: temporal coupling violation: evsel mismatch!\n");
> > + return -EINVAL;
> > + }
> > +
> > + b = malloc(sizeof(*b));
> > + if (!b)
> > + return -ENOMEM;
> > +
> > + b->name = strdup(name);
> > + if (!b->name) {
> > + free(b);
> > + return -ENOMEM;
> > + }
> > +
> > + if (unit && unit[0]) {
> > + b->unit = strdup(unit);
> > + if (!b->unit) {
> > + free(b->name);
> > + free(b);
> > + return -ENOMEM;
> > + }
> > + } else {
> > + b->unit = NULL;
> > + }
> > +
> > + b->val = val;
> > + b->thresh = thresh;
> > + list_add_tail(&b->list, &ps->current_event->metrics_list);
> > +
> > + return 0;
> > +}
> > +
> > +#define USEC_PER_SEC 1000000ULL
> > +#define NSEC_PER_SEC 1000000000ULL
> > +
> > +static double timeval2double(struct timeval *t)
> > +{
> > + return t->tv_sec + (double)t->tv_usec / USEC_PER_SEC;
> > +}
> > +
> > +static void print_footer_std(const struct perf_stat_config *config)
> > +{
> > + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
> > + FILE *output = config->output;
> > +
> > + if (config->interval)
> > + return;
> > +
> > + if (!config->null_run)
> > + fprintf(output, "\n");
> > +
> > + if (config->run_count == 1) {
> > + fprintf(output, " %17.9f seconds time elapsed", avg);
> > +
> > + if (config->ru_display) {
> > + double ru_utime =
> > + timeval2double((struct timeval *)&config->ru_data.ru_utime);
> > + double ru_stime =
> > + timeval2double((struct timeval *)&config->ru_data.ru_stime);
> > +
> > + fprintf(output, "\n\n");
> > + fprintf(output, " %17.9f seconds user\n", ru_utime);
> > + fprintf(output, " %17.9f seconds sys\n", ru_stime);
> > + }
> > + } else {
> > + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
> > + fprintf(output, " %17.9f +- %-17.9f seconds time elapsed", avg, sd);
> > + }
> > + fprintf(output, "\n");
> > +}
> > +
> > +/**
> > + * print_header_std - Print the header prefix matching old API.
> > + *
> > + * Copied and adapted from stat-display.c.
> > + */
> > +static void print_header_std(const struct perf_stat_config *config, const struct target *target,
> > + int argc, const char **argv)
> > +{
> > + FILE *output = config->output;
> > + int i;
> > +
> > + fprintf(output, "\n");
> > + fprintf(output, " Performance counter stats for ");
> > + if (target->bpf_str)
> > + fprintf(output, "\'BPF program(s) %s", target->bpf_str);
> > + else if (target->system_wide)
> > + fprintf(output, "\'system wide");
> > + else if (target->cpu_list)
> > + fprintf(output, "\'CPU(s) %s", target->cpu_list);
> > + else if (!target__has_task(target)) {
> > + fprintf(output, "\'%s", argv ? argv[0] : "pipe");
> > + for (i = 1; argv && (i < argc); i++)
> > + fprintf(output, " %s", argv[i]);
> > + } else if (target->pid)
> > + fprintf(output, "process id \'%s", target->pid);
> > + else
> > + fprintf(output, "thread id \'%s", target->tid);
> > +
> > + fprintf(output, "\'");
> > + if (config->run_count > 1)
> > + fprintf(output, " (%d runs)", config->run_count);
> > + fprintf(output, ":\n\n");
> > +}
> > +
> > +static int std_print_end(void *ctx, const struct perf_stat_config *config)
> > +{
> > + struct std_print_state *ps = ctx;
> > + struct queued_event *ev, *tmp_ev;
> > + struct queued_metric *met, *tmp_met;
> > + FILE *out = ps->fp;
> > + bool first;
> > + const char *last_mg_name = NULL;
> > + const struct perf_pmu *last_pmu = NULL;
> > + int last_aggr_idx = -1;
> > +
> > + /* Print the formatted header prefix (only in non-interval mode) */
> > + if (!config->interval)
> > + print_header_std(config, ps->target, ps->argc, ps->argv);
> > +
> > + list_for_each_entry_safe(ev, tmp_ev, &ps->events_list, list) {
> > + struct evsel *evsel = ev->evsel;
> > + double sc = evsel->scale;
> > + const char *fmt;
> > + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
> > + struct metric_event *me =
> > + metricgroup__lookup(&evsel->evlist->metric_events, evsel, false);
> > + bool is_metricgroup = false;
> > + bool skip_header = false;
> > + char full_name[128] = "";
> > +
> > + if (me && me->is_default && !evsel->default_show_events) {
> > + struct metric_expr *mexp =
> > + list_first_entry(&me->head, struct metric_expr, nd);
> > + const char *mg_name = mexp->default_metricgroup_name;
> > + bool need_full_name = perf_pmus__num_core_pmus() > 1;
> > +
> > + if (need_full_name && evsel->pmu)
> > + scnprintf(full_name, sizeof(full_name), "%s (%s)", mg_name,
> > + evsel->pmu->name);
> > + else
> > + scnprintf(full_name, sizeof(full_name), "%s", mg_name);
> > + is_metricgroup = true;
> > +
> > + if (last_mg_name && !strcmp(last_mg_name, mg_name) &&
> > + last_pmu == evsel->pmu && last_aggr_idx == ev->aggr_idx) {
> > + skip_header = true;
> > + }
> > + last_mg_name = mg_name;
> > + last_pmu = evsel->pmu;
> > + last_aggr_idx = ev->aggr_idx;
> > + }
> > +
> > + /* Print interval timestamp if configured */
> > + if (config->interval && ps->timestamp[0] && !skip_header)
> > + fprintf(out, "%s", ps->timestamp);
> > +
> > + /* 1. Print aggregation prefix first (if we don't skip header) */
> > + if (!skip_header && config->aggr_map && ev->aggr_idx >= 0) {
> > + struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
> > + int aggr_nr = 0;
> > + if (evsel->stats && evsel->stats->aggr) {
> > + aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
> > + }
> > + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> > + }
> > +
> > + /* 2. Print event value (scaled) or spaces if metricgroup */
> > + if (is_metricgroup) {
> > + if (!skip_header) {
> > + int n = fprintf(out, " %*s", EVNAME_LEN, full_name);
> > + fprintf(out, "%*s", MGROUP_LEN + config->unit_width + 2 - n, "");
> > + }
> > + } else {
> > + if (config->big_num)
> > + fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
> > + else
> > + fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
> > +
> > + if (ev->run == 0 || ev->ena == 0) {
> > + fprintf(out, "%*s ", COUNTS_LEN, bad_count);
> > + } else {
> > + double scaled = (double)ev->val;
> > + double avg;
> > + if (ev->ena < ev->run) {
> > + scaled = (double)ev->val * ev->run / ev->ena;
> > + }
> > + avg = scaled * sc;
> > + fprintf(out, fmt, COUNTS_LEN, avg);
> > + }
> > +
> > + /* 3. Print unit */
> > + if (evsel->unit) {
> > + fprintf(out, "%-*s ", config->unit_width, evsel->unit);
> > + } else {
> > + if (config->unit_width > 0)
> > + fprintf(out, "%-*s ", config->unit_width, "");
> > + }
> > +
> > + /* 4. Print event name */
> > + fprintf(out, "%-*s", EVNAME_LEN, evsel__name(evsel));
> > +
> > + /* If there are no metrics, print noise and multiplexing percentage */
> > + if (list_empty(&ev->metrics_list)) {
> > + if (ev->stdev_pct)
> > + fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
> > + if (ev->run != ev->ena)
> > + fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
> > + }
> > + }
> > +
> > + first = true;
> > + list_for_each_entry_safe(met, tmp_met, &ev->metrics_list, list) {
> > + const char *color = metric_threshold_classify__color(met->thresh);
> > + char unit_name[128];
> > + const char *m_fmt = (met->unit && met->unit[0]) ? "%8.1f" : "%8.2f";
> > +
> > + if (met->unit && met->unit[0]) {
> > + snprintf(unit_name, sizeof(unit_name), "%s %s", met->unit,
> > + met->name);
> > + } else {
> > + snprintf(unit_name, sizeof(unit_name), "%s", met->name);
> > + }
> > +
> > + if (first) {
> > + if (skip_header) {
> > + if (config->interval && ps->timestamp[0])
> > + fprintf(out, "%s", ps->timestamp);
> > + if (config->aggr_map && ev->aggr_idx >= 0) {
> > + struct aggr_cpu_id id =
> > + config->aggr_map->map[ev->aggr_idx];
> > + int aggr_nr = 0;
> > + if (evsel->stats && evsel->stats->aggr) {
> > + aggr_nr =
> > + evsel->stats->aggr[ev->aggr_idx].nr;
> > + }
> > + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> > + }
> > + fprintf(out, "%*s# ",
> > + COUNTS_LEN + EVNAME_LEN + config->unit_width + 3,
> > + "");
> > + } else {
> > + fprintf(out, " # ");
> > + }
> > + first = false;
> > + } else {
> > + /* Align subsequent metric lines */
> > + fprintf(out, "\n");
> > + if (config->interval && ps->timestamp[0])
> > + fprintf(out, "%s", ps->timestamp);
> > + if (config->aggr_map && ev->aggr_idx >= 0) {
> > + struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
> > + int aggr_nr = 0;
> > + if (evsel->stats && evsel->stats->aggr) {
> > + aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
> > + }
> > + print_aggr_id_std(config, out, evsel, id, aggr_nr);
> > + }
> > + fprintf(out, "%*s# ",
> > + COUNTS_LEN + EVNAME_LEN + config->unit_width + 3, "");
> > + }
> > +
> > + if (color && color[0]) {
> > + color_fprintf(out, color, m_fmt, met->val);
> > + } else {
> > + fprintf(out, m_fmt, met->val);
> > + }
> > + /* Print the metric unit and name left-aligned padded to METRIC_LEN - n - 1 = 26 */
> > + fprintf(out, " %-26s", unit_name);
> > +
> > + /* If this is the last metric in the list, print noise and multiplexing percentage */
> > + if (list_is_last(&met->list, &ev->metrics_list)) {
> > + if (ev->stdev_pct)
> > + fprintf(out, " ( +-%6.2f%% )", ev->stdev_pct);
> > + if (ev->run != ev->ena)
> > + fprintf(out, " (%.2f%%)", 100.0 * ev->run / ev->ena);
> > + }
> > +
> > + list_del(&met->list);
> > + free(met->name);
> > + free(met->unit);
> > + free(met);
> > + }
> > + fprintf(out, "\n");
> > +
> > + list_del(&ev->list);
> > + free(ev->name);
> > + free(ev);
> > + }
> > + print_footer_std(config);
> > + return 0;
> > +}
> > +
> > +static const struct perf_stat_print_callbacks std_print_callbacks = {
> > + .print_start = std_print_start,
> > + .print_end = std_print_end,
> > + .print_event = std_print_event,
> > + .print_metric = std_print_metric,
> > +};
> > +
> > +/*
> > + * Standard (STD) Output Callbacks - Metric-Only Mode
> > + */
> > +
> > +static int std_metric_only_print_start(void *ctx,
> > + const struct perf_stat_config *config __maybe_unused)
> > +{
> > + struct std_metric_only_print_state *ps = ctx;
> > + INIT_LIST_HEAD(&ps->queued_metrics);
> > + return 0;
> > +}
> > +
> > +static int std_metric_only_print_metric(void *ctx,
> > + const struct perf_stat_config *config __maybe_unused,
> > + struct evsel *evsel __maybe_unused, int aggr_idx,
> > + const char *name, const char *unit, double val,
> > + enum metric_threshold_classify thresh)
> > +{
> > + struct std_metric_only_print_state *ps = ctx;
> > + struct queued_metric *b = malloc(sizeof(*b));
> > +
> > + if (!b)
> > + return -ENOMEM;
> > +
> > + b->name = strdup(name);
> > + if (!b->name) {
> > + free(b);
> > + return -ENOMEM;
> > + }
> > +
> > + if (unit && unit[0]) {
> > + b->unit = strdup(unit);
> > + if (!b->unit) {
> > + free(b->name);
> > + free(b);
> > + return -ENOMEM;
> > + }
> > + } else {
> > + b->unit = NULL;
> > + }
> > +
> > + b->val = val;
> > + b->thresh = thresh;
> > + b->aggr_idx = aggr_idx;
> > + list_add_tail(&b->list, &ps->queued_metrics);
> > +
> > + return 0;
> > +}
> > +
> > +static int std_metric_only_print_end(void *ctx, const struct perf_stat_config *config)
> > +{
> > + struct std_metric_only_print_state *ps = ctx;
> > + struct queued_metric *b, *tmp;
> > + FILE *out = ps->fp;
> > + int first_aggr = -1;
> > + /* Initialize to -2 to distinguish from -1 (a valid index in AGGR_GLOBAL mode) */
> > + int current_aggr = -2;
> > + const char *color;
> > + char *str;
> > + int mlen;
> > + int ret = 0;
> > + int err;
> > +
> > + if (list_empty(&ps->queued_metrics))
> > + return 0;
> > +
> > + first_aggr = list_first_entry(&ps->queued_metrics, struct queued_metric, list)->aggr_idx;
> > +
> > + if (!config->metric_only_headers_printed) {
> > + /* Print the formatted header prefix */
> > + if (!config->interval)
> > + print_header_std(config, ps->target, ps->argc, ps->argv);
> > +
> > + if (config->aggr_map && first_aggr >= 0) {
> > + int len = aggr_header_lens[config->aggr_mode];
> > +
> > + fprintf(out, "%*s", len + 1, "");
> > + }
> > +
> > + /* Print headers */
> > + list_for_each_entry(b, &ps->queued_metrics, list) {
> > + if (b->aggr_idx == first_aggr) {
> > + char *header_name;
> > +
> > + if (b->unit && b->unit[0]) {
> > + err = asprintf(&header_name, "%s %s", b->unit, b->name);
> > + } else {
> > + header_name = strdup(b->name);
> > + err = header_name ? 0 : -1;
> > + }
> > + if (err < 0) {
> > + ret = -ENOMEM;
> > + goto cleanup;
> > + }
> > + fprintf(out, "%*s ", config->metric_only_len, header_name);
> > + free(header_name);
> > + }
> > + }
> > + fprintf(out, "\n\n");
> > + ((struct perf_stat_config *)config)->metric_only_headers_printed = true;
> > + }
> > +
> > + /* Print values */
> > + list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
> > + if (b->aggr_idx != current_aggr) {
> > + if (current_aggr != -2)
> > + fprintf(out, "\n");
> > + current_aggr = b->aggr_idx;
> > + if (config->interval && ps->timestamp[0])
> > + fprintf(out, "%s", ps->timestamp);
> > + if (config->aggr_map && current_aggr >= 0) {
> > + struct aggr_cpu_id id = config->aggr_map->map[current_aggr];
> > + struct evsel *mock_evsel = list_first_entry(&ps->evlist->core.entries, struct evsel, core.node);
> > + int aggr_nr = 0;
> > +
> > + if (mock_evsel->stats && mock_evsel->stats->aggr)
> > + aggr_nr = mock_evsel->stats->aggr[current_aggr].nr;
> > +
> > + print_aggr_id_std(config, out, mock_evsel, id, aggr_nr);
> > + }
> > + }
> > + color = metric_threshold_classify__color(b->thresh);
> > + mlen = config->metric_only_len;
> > +
> > + if (color && color[0]) {
> > + err = asprintf(&str, "%s%.1f%s", color, b->val, PERF_COLOR_RESET);
> > + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
> > + } else {
> > + err = asprintf(&str, "%.1f", b->val);
> > + }
> > + if (err < 0) {
> > + ret = -ENOMEM;
> > + goto cleanup;
> > + }
> > + fprintf(out, "%*s ", mlen, str);
> > + free(str);
> > +
> > + list_del(&b->list);
> > + free(b->name);
> > + free(b->unit);
> > + free(b);
> > + }
> > + print_footer_std(config);
> > + return 0;
> > +
> > +cleanup:
> > + list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
> > + list_del(&b->list);
> > + free(b->name);
> > + free(b->unit);
> > + free(b);
> > + }
> > + return ret;
> > +}
> > +
> > +static const struct perf_stat_print_callbacks std_metric_only_print_callbacks = {
> > + .print_start = std_metric_only_print_start,
> > + .print_end = std_metric_only_print_end,
> > + .print_event = NULL,
> > + .print_metric = std_metric_only_print_metric,
> > +};
> > +
> > +int perf_stat__print_std(struct evlist *evlist, const struct perf_stat_config *config,
> > + const struct target *target, const struct timespec *ts, int argc,
> > + const char **argv)
> > +{
> > + struct std_print_state ps = {
> > + .fp = config->output,
> > + .target = target,
> > + .argc = argc,
> > + .argv = argv,
> > + };
> > +
> > + if (config->metric_only) {
> > + struct std_metric_only_print_state mops = {
> > + .fp = config->output,
> > + .target = target,
> > + .argc = argc,
> > + .argv = argv,
> > + .evlist = evlist,
> > + };
> > + if (config->interval && ts) {
> > + scnprintf(mops.timestamp, sizeof(mops.timestamp), "%6lu.%09lu ",
> > + (unsigned long)ts->tv_sec, ts->tv_nsec);
> > + } else {
> > + mops.timestamp[0] = '\0';
> > + }
> > + return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
> > + &std_metric_only_print_callbacks, &mops);
> > + } else {
> > + if (config->interval && !config->headers_printed) {
> > + FILE *output = config->output;
> > +
> > + if (config->aggr_mode == AGGR_GLOBAL) {
> > + fprintf(output, "#%*s %*s %*s events\n", 15 - 2, "time", 18, "counts", config->unit_width, "unit");
> > + } else {
> > + fprintf(output, "#%*s %-*s ctrs %*s %*s events\n",
> > + 15 - 2, "time",
> > + aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode],
> > + 18, "counts", config->unit_width, "unit");
> > + }
> > + ((struct perf_stat_config *)config)->headers_printed = true;
> > + }
> > + if (config->interval && ts) {
> > + scnprintf(ps.timestamp, sizeof(ps.timestamp), "%6lu.%09lu ",
> > + (unsigned long)ts->tv_sec, ts->tv_nsec);
> > + } else {
> > + ps.timestamp[0] = '\0';
> > + }
> > + return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
> > + &std_print_callbacks, &ps);
> > + }
> > +}
> > --
> > 2.54.0.794.g4f17f83d09-goog
> >
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks
2026-05-25 23:18 ` [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks Ian Rogers
2026-05-25 23:49 ` Arnaldo Carvalho de Melo
@ 2026-05-25 23:53 ` sashiko-bot
1 sibling, 0 replies; 46+ messages in thread
From: sashiko-bot @ 2026-05-25 23:53 UTC (permalink / raw)
To: Ian Rogers; +Cc: linux-perf-users
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Low] Missing trailing newline for interval output in metric-only mode.
--
commit 7c6854720d44b02ec256a0c56e408fbe8b0ead8f
Author: Ian Rogers <irogers@google.com>
perf stat: Implement standard console (STD) formatting callbacks
This commit implements standard console formatting callbacks inside
util/stat-print-std.c. It replaces the empty stubs by buffering
traversal streams using format-private DOM nodes and unified aggregation
helpers.
> diff --git a/tools/perf/util/stat-print-std.c b/tools/perf/util/stat-print-std.c
> index 83987e97c8895..aa4a083bb85ab 100644
> --- a/tools/perf/util/stat-print-std.c
> +++ b/tools/perf/util/stat-print-std.c
[ ... ]
> +static void print_footer_std(const struct perf_stat_config *config)
> +{
> + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
> + FILE *output = config->output;
> +
> + if (config->interval)
> + return;
[ ... ]
> +static int std_metric_only_print_end(void *ctx, const struct perf_stat_config *config)
> +{
[ ... ]
> + /* Print values */
> + list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
> + if (b->aggr_idx != current_aggr) {
> + if (current_aggr != -2)
> + fprintf(out, "\n");
[ ... ]
> + fprintf(out, "%*s ", mlen, str);
> + free(str);
> +
> + list_del(&b->list);
> + free(b->name);
> + free(b->unit);
> + free(b);
> + }
> + print_footer_std(config);
[Severity: Low]
Will this omit a trailing newline when running perf stat in interval mode?
It looks like the traversal over queued_metrics prints newlines between
aggregation blocks, but relies on print_footer_std() to close out the
final line.
However, as shown above, print_footer_std() returns early if
config->interval is true:
static void print_footer_std(const struct perf_stat_config *config)
{
...
if (config->interval)
return;
Could this cause the output for successive intervals to be concatenated
directly onto the same line without separation?
> + return 0;
> +
> +cleanup:
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260525231900.3527228-1-irogers@google.com?part=2
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v2 03/14] perf stat: Extend STD output linter to test basic New API checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 01/14] perf stat: Introduce core generic print traversal engine and header stubs Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 02/14] perf stat: Implement standard console (STD) formatting callbacks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:39 ` Arnaldo Carvalho de Melo
2026-05-25 23:18 ` [RFC PATCH v2 04/14] perf stat: Extend STD output linter to test core aggregation checks Ian Rogers
` (11 subsequent siblings)
14 siblings, 1 reply; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends the standard console (STD) output linter script
tools/perf/tests/shell/stat+std_output.sh to run the basic no-argument
check a second time using the --new print flag:
perf_cmd="--new -o ${stat_output}"
check_no_args "STD (New API)" "$perf_cmd"
This ensures that standard console outputs produced by the decoupled
printing callbacks are formally validated by the test suite.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+std_output.sh | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index 9c4b92ecf448..233e0a50eb33 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -118,5 +118,9 @@ then
else
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
fi
+# New API basic checks
+perf_cmd="--new -o ${stat_output}"
+check_no_args "STD (New API)" "$perf_cmd"
+
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 03/14] perf stat: Extend STD output linter to test basic New API checks
2026-05-25 23:18 ` [RFC PATCH v2 03/14] perf stat: Extend STD output linter to test basic New API checks Ian Rogers
@ 2026-05-25 23:39 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 46+ messages in thread
From: Arnaldo Carvalho de Melo @ 2026-05-25 23:39 UTC (permalink / raw)
To: Ian Rogers
Cc: linux-perf-users, namhyung, adrian.hunter, james.clark, jolsa,
linux-kernel, mingo, peterz
On Mon, May 25, 2026 at 04:18:49PM -0700, Ian Rogers wrote:
> This patch extends the standard console (STD) output linter script
> tools/perf/tests/shell/stat+std_output.sh to run the basic no-argument
> check a second time using the --new print flag:
>
> perf_cmd="--new -o ${stat_output}"
> check_no_args "STD (New API)" "$perf_cmd"
>
> This ensures that standard console outputs produced by the decoupled
> printing callbacks are formally validated by the test suite.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> Assisted-by: Antigravity:gemini-3.5-flash
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ---
> tools/perf/tests/shell/stat+std_output.sh | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
> index 9c4b92ecf448..233e0a50eb33 100755
> --- a/tools/perf/tests/shell/stat+std_output.sh
> +++ b/tools/perf/tests/shell/stat+std_output.sh
> @@ -118,5 +118,9 @@ then
> else
> echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
> fi
> +# New API basic checks
> +perf_cmd="--new -o ${stat_output}"
> +check_no_args "STD (New API)" "$perf_cmd"
> +
> cleanup
> exit 0
> --
> 2.54.0.794.g4f17f83d09-goog
>
^ permalink raw reply [flat|nested] 46+ messages in thread
* [RFC PATCH v2 04/14] perf stat: Extend STD output linter to test core aggregation checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (2 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 03/14] perf stat: Extend STD output linter to test basic New API checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 05/14] perf stat: Extend STD output linter to test advanced PMU checks Ian Rogers
` (10 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+std_output.sh to run all
standard CPU and thread aggregation checks under the --new print flag:
- check_system_wide
- check_system_wide_no_aggr
- check_interval
- check_per_thread
- check_per_node
- check_per_core
- check_per_socket
- check_per_die
This guarantees that standard console outputs produced by the decoupled
printing engine are verified across all core CPU-aggregation modes.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+std_output.sh | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index 233e0a50eb33..8dee005a7281 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -121,6 +121,17 @@ fi
# New API basic checks
perf_cmd="--new -o ${stat_output}"
check_no_args "STD (New API)" "$perf_cmd"
+check_system_wide "STD (New API)" "$perf_cmd"
+check_interval "STD (New API)" "$perf_cmd"
+check_per_thread "STD (New API)" "$perf_cmd"
+check_per_node "STD (New API)" "$perf_cmd"
+if [ $skip_test -ne 1 ]
+then
+ check_system_wide_no_aggr "STD (New API)" "$perf_cmd"
+ check_per_core "STD (New API)" "$perf_cmd"
+ check_per_die "STD (New API)" "$perf_cmd"
+ check_per_socket "STD (New API)" "$perf_cmd"
+fi
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 05/14] perf stat: Extend STD output linter to test advanced PMU checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (3 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 04/14] perf stat: Extend STD output linter to test core aggregation checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 06/14] perf stat: Extend STD output linter to test metric-only checks Ian Rogers
` (9 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+std_output.sh to run
the advanced hardware PMU and topology checks under the --new print flag:
- check_per_cache_instance
- check_per_cluster
This ensures that standard console outputs are verified under advanced
topology-aware aggregation environments.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+std_output.sh | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index 8dee005a7281..0d38a1283967 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -131,6 +131,8 @@ then
check_per_core "STD (New API)" "$perf_cmd"
check_per_die "STD (New API)" "$perf_cmd"
check_per_socket "STD (New API)" "$perf_cmd"
+ check_per_cache_instance "STD (New API)" "$perf_cmd"
+ check_per_cluster "STD (New API)" "$perf_cmd"
fi
cleanup
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 06/14] perf stat: Extend STD output linter to test metric-only checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (4 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 05/14] perf stat: Extend STD output linter to test advanced PMU checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 07/14] perf stat: Implement CSV formatting callbacks Ian Rogers
` (8 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+std_output.sh to run
the metric-only check under the --new print flag:
- check_metric_only
This guarantees that standard console metric-only outputs produced by
the decoupled printing callbacks are formally validated by the test linter.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+std_output.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index 0d38a1283967..69720b19b908 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -134,6 +134,7 @@ then
check_per_cache_instance "STD (New API)" "$perf_cmd"
check_per_cluster "STD (New API)" "$perf_cmd"
fi
+check_metric_only "STD (New API)" "$perf_cmd"
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 07/14] perf stat: Implement CSV formatting callbacks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (5 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 06/14] perf stat: Extend STD output linter to test metric-only checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 08/14] perf stat: Extend CSV output linter to test core aggregation checks Ian Rogers
` (7 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch implements CSV output formatting callbacks inside
util/stat-print-csv.c, replacing the empty stubs introduced in Commit 1.
Defines the format-private `struct queued_event` and `struct queued_metric`
DOM nodes to buffer traversal streams, and fully encapsulates CSV queued lists
lifecycle and deallocations inside csv_print_start() and csv_print_end().
Utilizes the newly centralized unified aggregation helpers to format CPU
and thread column prefixes cleanly, fixes metrics separators padding,
and incorporates full interval-mode timestamp printing support.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/util/stat-print-csv.c | 537 ++++++++++++++++++++++++++++++-
1 file changed, 529 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/stat-print-csv.c b/tools/perf/util/stat-print-csv.c
index e9d1e7c30c90..35cd4505c6c1 100644
--- a/tools/perf/util/stat-print-csv.c
+++ b/tools/perf/util/stat-print-csv.c
@@ -1,13 +1,534 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "stat-print.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
#include <linux/compiler.h>
+#include <linux/list.h>
+
+#include "cpumap.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "stat-print.h"
+#include "stat.h"
+#include "thread_map.h"
+#include "debug.h"
+
+#define COMM_LEN 16
+#define PID_LEN 7
+
+struct queued_metric {
+ struct list_head list;
+ char *name;
+ char *unit;
+ double val;
+ int aggr_idx;
+};
+
+/**
+ * struct queued_event - In-memory record of a buffered CSV counter event.
+ * @list: Linked list node for queueing.
+ * @evsel: The associated performance event selector.
+ * @name: The uniquely formatted/resolved event name.
+ * @unit: The event's unit (e.g. "msec", "cycles").
+ * @val: Raw aggregated counter value.
+ * @ena: Enabled time for multiplexing percentage.
+ * @run: Running time for multiplexing percentage.
+ * @scale: Event scale factor.
+ * @supported: Event hardware support indicator.
+ * @aggr_idx: Aggregation index.
+ * @metrics_list: Linked list head containing nested queued_metric structures.
+ */
+struct queued_event {
+ struct list_head list;
+ struct evsel *evsel;
+ char *name;
+ char *unit;
+ u64 val, ena, run;
+ double scale;
+ bool supported;
+ int aggr_idx;
+ struct list_head metrics_list;
+};
+
+/**
+ * struct csv_print_state - Print state context for CSV output.
+ * @fp: File descriptor to output to.
+ * @sep: CSV column separator character/string.
+ * @timestamp: Formatted interval timestamp (optional).
+ * @events_list: Linked list head containing queued_event nodes.
+ * @current_event: Pointer to the currently active event being printed.
+ * Serves as a temporary bridge to associate streaming metrics back to
+ * their parent event node during list buffering. This relies on a
+ * strict temporal coupling in the traversal driver: the driver always
+ * invokes print_metric() callbacks for a counter synchronously and
+ * immediately after its print_event() callback, prior to advancing
+ * to the next event or aggregation node. This pointer is completely
+ * private to CSV printing, keeping the traversal driver decoupled
+ * and preserving strict encapsulation.
+ */
+struct csv_print_state {
+ FILE *fp;
+ const char *sep;
+ char timestamp[64];
+ struct list_head events_list;
+ struct queued_event *current_event;
+};
+
+/**
+ * struct csv_metric_only_print_state - Metric-only print state context for CSV output.
+ * @fp: File descriptor to output to.
+ * @sep: CSV column separator.
+ * @timestamp: Formatted interval timestamp (optional).
+ * @evlist: Evlist to query entries from.
+ * @queued_metrics: Linked list head containing queued_metric nodes.
+ */
+struct csv_metric_only_print_state {
+ FILE *fp;
+ const char *sep;
+ char timestamp[64];
+ struct evlist *evlist;
+ struct list_head queued_metrics;
+};
+
+/**
+ * print_aggr_id_csv - Print the aggregation prefix for CSV format.
+ *
+ * Copied and adapted from stat-display.c.
+ */
+static void print_aggr_id_csv(const struct perf_stat_config *config, FILE *output,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ const char *sep = config->csv_sep;
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ fprintf(output, "S%d-D%d-C%d%s%d%s", id.socket, id.die, id.core, sep, aggr_nr, sep);
+ break;
+ case AGGR_CACHE:
+ fprintf(output, "S%d-D%d-L%d-ID%d%s%d%s", id.socket, id.die, id.cache_lvl, id.cache,
+ sep, aggr_nr, sep);
+ break;
+ case AGGR_CLUSTER:
+ fprintf(output, "S%d-D%d-CLS%d%s%d%s", id.socket, id.die, id.cluster, sep, aggr_nr,
+ sep);
+ break;
+ case AGGR_DIE:
+ fprintf(output, "S%d-D%d%s%d%s", id.socket, id.die, sep, aggr_nr, sep);
+ break;
+ case AGGR_SOCKET:
+ fprintf(output, "S%d%s%d%s", id.socket, sep, aggr_nr, sep);
+ break;
+ case AGGR_NODE:
+ fprintf(output, "N%d%s%d%s", id.node, sep, aggr_nr, sep);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread)
+ fprintf(output, "S%d-D%d-C%d%s", id.socket, id.die, id.core, sep);
+ else if (id.cpu.cpu > -1)
+ fprintf(output, "CPU%d%s", id.cpu.cpu, sep);
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "%s-%d%s",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx), sep);
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+}
+
+/*
+ * CSV Output Callbacks - Normal Mode
+ */
+
+static int csv_print_start(void *ctx, const struct perf_stat_config *config __maybe_unused)
+{
+ struct csv_print_state *ps = ctx;
+
+ INIT_LIST_HEAD(&ps->events_list);
+ ps->current_event = NULL;
+ return 0;
+}
+
+static int csv_print_event(void *ctx, const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel, int aggr_idx, u64 val, u64 ena, u64 run,
+ double stdev_pct __maybe_unused)
+{
+ struct csv_print_state *ps = ctx;
+ struct queued_event *ev = malloc(sizeof(*ev));
+
+ if (!ev)
+ return -ENOMEM;
+
+ ev->name = strdup(evsel__name(evsel));
+ if (!ev->name) {
+ free(ev);
+ return -ENOMEM;
+ }
+
+ if (evsel->unit) {
+ ev->unit = strdup(evsel->unit);
+ if (!ev->unit) {
+ free(ev->name);
+ free(ev);
+ return -ENOMEM;
+ }
+ } else {
+ ev->unit = NULL;
+ }
+
+ ev->evsel = evsel;
+ ev->val = val;
+ ev->ena = ena;
+ ev->run = run;
+ ev->scale = evsel->scale;
+ ev->supported = evsel->supported;
+ ev->aggr_idx = aggr_idx;
+ INIT_LIST_HEAD(&ev->metrics_list);
+
+ list_add_tail(&ev->list, &ps->events_list);
+ ps->current_event = ev;
+
+ return 0;
+}
+
+static int csv_print_metric(void *ctx, const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused, int aggr_idx __maybe_unused,
+ const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh __maybe_unused)
+{
+ struct csv_print_state *ps = ctx;
+ struct queued_metric *b;
+
+ if (!ps->current_event)
+ return 0;
+
+ if (evsel != ps->current_event->evsel) {
+ pr_err("decoupled print engine: temporal coupling violation: evsel mismatch!\n");
+ return -EINVAL;
+ }
+
+ b = malloc(sizeof(*b));
+ if (!b)
+ return -ENOMEM;
+
+ b->name = strdup(name);
+ if (!b->name) {
+ free(b);
+ return -ENOMEM;
+ }
+
+ if (unit && unit[0]) {
+ b->unit = strdup(unit);
+ if (!b->unit) {
+ free(b->name);
+ free(b);
+ return -ENOMEM;
+ }
+ } else {
+ b->unit = NULL;
+ }
+
+ b->val = val;
+ list_add_tail(&b->list, &ps->current_event->metrics_list);
+
+ return 0;
+}
+
+static int csv_print_end(void *ctx, const struct perf_stat_config *config)
+{
+ struct csv_print_state *ps = ctx;
+ struct queued_event *ev, *tmp_ev;
+ struct queued_metric *met, *tmp_met;
+ FILE *output = ps->fp;
+ const char *sep = ps->sep;
+ bool has_metrics;
+
+ list_for_each_entry_safe(ev, tmp_ev, &ps->events_list, list) {
+ struct evsel *evsel = ev->evsel;
+ bool ok = (ev->run != 0 && ev->ena != 0);
+ const char *bad_count = ev->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+ double enabled_percent = 100;
+
+ /* Print interval timestamp first if configured */
+ if (config->interval && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+
+ /* Print aggregation prefix first in CSV normal mode */
+ if (config->aggr_map && ev->aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
+ int aggr_nr = 0;
+
+ if (evsel->stats && evsel->stats->aggr)
+ aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
+
+ print_aggr_id_csv(config, output, evsel, id, aggr_nr);
+ }
-int perf_stat__print_csv(struct evlist *evlist __maybe_unused,
- const struct perf_stat_config *config __maybe_unused,
- const struct target *target __maybe_unused,
- const struct timespec *ts __maybe_unused,
- int argc __maybe_unused,
- const char **argv __maybe_unused)
+ if (ok) {
+ double sc = ev->scale;
+ double avg = ev->val * sc;
+ const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+
+ fprintf(output, fmt, avg, sep);
+ } else {
+ fprintf(output, "%s%s", bad_count, sep);
+ }
+
+ if (ev->unit)
+ fprintf(output, "%s%s", ev->unit, sep);
+ else
+ fprintf(output, "%s", sep);
+
+ fprintf(output, "%s", ev->name);
+
+ if (ev->run != ev->ena)
+ enabled_percent = 100.0 * ev->run / ev->ena;
+ fprintf(output, "%s%" PRIu64 "%s%.2f", sep, ev->run, sep, enabled_percent);
+
+ /* Print metrics */
+ has_metrics = false;
+ list_for_each_entry_safe(met, tmp_met, &ev->metrics_list, list) {
+ if (!has_metrics) {
+ has_metrics = true;
+ } else {
+ fprintf(output, "\n");
+ if (config->interval && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+ if (config->aggr_map && ev->aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[ev->aggr_idx];
+ int aggr_nr = 0;
+
+ if (evsel->stats && evsel->stats->aggr)
+ aggr_nr = evsel->stats->aggr[ev->aggr_idx].nr;
+
+ print_aggr_id_csv(config, output, evsel, id, aggr_nr);
+ }
+ /* Subsequent metrics have exactly 4 padding separators */
+ fprintf(output, "%s%s%s%s", sep, sep, sep, sep);
+ }
+ fprintf(output, "%s%.2f%s", sep, met->val, sep);
+ if (met->name && met->name[0])
+ fprintf(output, "%s", met->name);
+
+ list_del(&met->list);
+ free(met->name);
+ free(met->unit);
+ free(met);
+ }
+ if (!has_metrics)
+ fprintf(output, "%s%s", sep, sep);
+ fprintf(output, "\n");
+
+ list_del(&ev->list);
+ free(ev->name);
+ free(ev->unit);
+ free(ev);
+ }
+ return 0;
+}
+
+static const struct perf_stat_print_callbacks csv_print_callbacks = {
+ .print_start = csv_print_start,
+ .print_end = csv_print_end,
+ .print_event = csv_print_event,
+ .print_metric = csv_print_metric,
+};
+
+/*
+ * CSV Output Callbacks - Metric-Only Mode
+ */
+
+static int csv_metric_only_print_start(void *ctx,
+ const struct perf_stat_config *config __maybe_unused)
+{
+ struct csv_metric_only_print_state *ps = ctx;
+
+ INIT_LIST_HEAD(&ps->queued_metrics);
+ return 0;
+}
+
+static int csv_metric_only_print_metric(void *ctx,
+ const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused, int aggr_idx,
+ const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh __maybe_unused)
{
+ struct csv_metric_only_print_state *ps = ctx;
+ struct queued_metric *b = malloc(sizeof(*b));
+
+ if (!b)
+ return -ENOMEM;
+
+ b->name = strdup(name);
+ if (!b->name) {
+ free(b);
+ return -ENOMEM;
+ }
+
+ if (unit && unit[0]) {
+ b->unit = strdup(unit);
+ if (!b->unit) {
+ free(b->name);
+ free(b);
+ return -ENOMEM;
+ }
+ } else {
+ b->unit = NULL;
+ }
+
+ b->val = val;
+ b->aggr_idx = aggr_idx;
+ list_add_tail(&b->list, &ps->queued_metrics);
+
return 0;
}
+
+static int csv_metric_only_print_end(void *ctx, const struct perf_stat_config *config)
+{
+ struct csv_metric_only_print_state *ps = ctx;
+ FILE *output = ps->fp;
+ const char *sep = ps->sep;
+ struct queued_metric *b, *tmp;
+ int first_aggr = -1;
+ /* Initialize to -2 to distinguish from -1 (a valid index in AGGR_GLOBAL mode) */
+ int current_aggr = -2;
+ int ret = 0;
+ int err;
+
+ if (list_empty(&ps->queued_metrics))
+ return 0;
+
+ first_aggr = list_first_entry(&ps->queued_metrics, struct queued_metric, list)->aggr_idx;
+
+ if (!config->metric_only_headers_printed) {
+ /* Print interval timestamp header if configured */
+ if (config->interval)
+ fprintf(output, "time%s", sep);
+
+ /* Print static aggregation prefix header in CSV metric-only mode */
+ if (config->aggr_map && first_aggr >= 0) {
+ const char *p = aggr_header_csv[config->aggr_mode];
+
+ while (*p) {
+ if (*p == ',')
+ fputs(sep, output);
+ else
+ fputc(*p, output);
+ p++;
+ }
+ }
+
+ /* Print headers */
+ list_for_each_entry(b, &ps->queued_metrics, list) {
+ if (b->aggr_idx == first_aggr) {
+ char *header_name;
+
+ if (b->unit && b->unit[0]) {
+ err = asprintf(&header_name, "%s %s", b->unit, b->name);
+ } else {
+ header_name = strdup(b->name);
+ err = header_name ? 0 : -1;
+ }
+ if (err < 0) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ fprintf(output, "%s%s", header_name, sep);
+ free(header_name);
+ }
+ }
+ fprintf(output, "\n");
+ ((struct perf_stat_config *)config)->metric_only_headers_printed = true;
+ }
+
+ /* Print values */
+ list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
+ if (b->aggr_idx != current_aggr) {
+ if (current_aggr != -2)
+ fprintf(output, "\n");
+ current_aggr = b->aggr_idx;
+ if (config->interval && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+ if (config->aggr_map && current_aggr >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[current_aggr];
+ struct evsel *mock_evsel = list_first_entry(
+ &ps->evlist->core.entries, struct evsel, core.node);
+ int aggr_nr = 0;
+
+ if (mock_evsel->stats && mock_evsel->stats->aggr)
+ aggr_nr = mock_evsel->stats->aggr[current_aggr].nr;
+
+ print_aggr_id_csv(config, output, mock_evsel, id, aggr_nr);
+ }
+ }
+ fprintf(output, "%.1f%s", b->val, sep);
+
+ list_del(&b->list);
+ free(b->name);
+ free(b->unit);
+ free(b);
+ }
+ fprintf(output, "\n");
+ return 0;
+
+cleanup:
+ list_for_each_entry_safe(b, tmp, &ps->queued_metrics, list) {
+ list_del(&b->list);
+ free(b->name);
+ free(b->unit);
+ free(b);
+ }
+ return ret;
+}
+
+static const struct perf_stat_print_callbacks csv_metric_only_print_callbacks = {
+ .print_start = csv_metric_only_print_start,
+ .print_end = csv_metric_only_print_end,
+ .print_event = NULL,
+ .print_metric = csv_metric_only_print_metric,
+};
+
+int perf_stat__print_csv(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv)
+{
+ if (config->metric_only) {
+ struct csv_metric_only_print_state ps = {
+ .fp = config->output,
+ .sep = config->csv_sep,
+ .evlist = evlist,
+ };
+ if (config->interval && ts) {
+ scnprintf(ps.timestamp, sizeof(ps.timestamp), "%lu.%09lu%s",
+ (unsigned long)ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ } else {
+ ps.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &csv_metric_only_print_callbacks, &ps);
+ } else {
+ struct csv_print_state ps = {
+ .fp = config->output,
+ .sep = config->csv_sep,
+ };
+
+
+
+ if (config->interval && ts) {
+ scnprintf(ps.timestamp, sizeof(ps.timestamp), "%lu.%09lu%s",
+ (unsigned long)ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ } else {
+ ps.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &csv_print_callbacks, &ps);
+ }
+}
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 08/14] perf stat: Extend CSV output linter to test core aggregation checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (6 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 07/14] perf stat: Implement CSV formatting callbacks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 09/14] perf stat: Extend CSV output linter to test advanced PMU and metric-only checks Ian Rogers
` (6 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+csv_output.sh to run all
basic and core aggregation linter checks a second time under the --new
CSV print flag:
- check_no_args
- check_system_wide
- check_interval
- check_event
- check_per_thread
- check_per_node
- check_system_wide_no_aggr
- check_per_core
- check_per_socket
- check_per_die
This guarantees that CSV outputs produced by the decoupled printing engine
are formally verified and column-valid across standard and interval modes.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+csv_output.sh | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index cd6fff597091..e4153a03d716 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -88,5 +88,21 @@ then
else
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
fi
+# New API CSV checks
+perf_cmd="--new -x$csv_sep -o ${stat_output}"
+check_no_args "CSV (New API)" "$perf_cmd"
+check_system_wide "CSV (New API)" "$perf_cmd"
+check_interval "CSV (New API)" "$perf_cmd"
+check_event "CSV (New API)" "$perf_cmd"
+check_per_thread "CSV (New API)" "$perf_cmd"
+check_per_node "CSV (New API)" "$perf_cmd"
+if [ $skip_test -ne 1 ]
+then
+ check_system_wide_no_aggr "CSV (New API)" "$perf_cmd"
+ check_per_core "CSV (New API)" "$perf_cmd"
+ check_per_die "CSV (New API)" "$perf_cmd"
+ check_per_socket "CSV (New API)" "$perf_cmd"
+fi
+
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 09/14] perf stat: Extend CSV output linter to test advanced PMU and metric-only checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (7 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 08/14] perf stat: Extend CSV output linter to test core aggregation checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 10/14] perf stat: Implement streaming JSON formatting callbacks Ian Rogers
` (5 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+csv_output.sh to run
the advanced hardware PMU, topology-aware aggregation, and metric-only
checks a second time under the --new CSV print flag:
- check_per_cache_instance
- check_per_cluster
- check_metric_only
This guarantees that CSV outputs produced by the decoupled printing callbacks
are verified and column-valid under advanced aggregation modes and metric-only
CSV row-column layouts.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+csv_output.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index e4153a03d716..c48d9e2b6429 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -102,7 +102,10 @@ then
check_per_core "CSV (New API)" "$perf_cmd"
check_per_die "CSV (New API)" "$perf_cmd"
check_per_socket "CSV (New API)" "$perf_cmd"
+ check_per_cache_instance "CSV (New API)" "$perf_cmd"
+ check_per_cluster "CSV (New API)" "$perf_cmd"
fi
+check_metric_only "CSV (New API)" "$perf_cmd"
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 10/14] perf stat: Implement streaming JSON formatting callbacks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (8 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 09/14] perf stat: Extend CSV output linter to test advanced PMU and metric-only checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 11/14] perf stat: Extend JSON output linter to test core aggregation checks Ian Rogers
` (4 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch implements streaming JSON formatting callbacks inside
util/stat-print-json.c, replacing the empty stubs introduced in Commit 1.
Delivers a highly optimized, zero-allocation, and 100% streaming print engine
for JSON normal and metric-only modes. It bypasses dynamic queue events and
metric lists entirely, formatting and streaming JSON objects directly onto the
output file descriptor.
Utilizes the newly centralized unified aggregation helpers to format CPU and
thread keys inside the JSON objects, and incorporates full interval-mode
timestamp printing support.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/util/stat-print-json.c | 333 +++++++++++++++++++++++++++++-
1 file changed, 325 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/stat-print-json.c b/tools/perf/util/stat-print-json.c
index 72df7a94095d..f168dca70ee0 100644
--- a/tools/perf/util/stat-print-json.c
+++ b/tools/perf/util/stat-print-json.c
@@ -1,13 +1,330 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "stat-print.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
#include <linux/compiler.h>
+#include <linux/kernel.h>
+
+#include "cpumap.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "stat-print.h"
+#include "stat.h"
+#include "thread_map.h"
+
+static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh)
+{
+ const char *const strs[] = {
+ "unknown", "bad", "nearly bad", "less good", "good",
+ };
+ _Static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value");
+ return strs[thresh];
+}
+
+/**
+ * struct json_print_state - Print state context for JSON output.
+ * @fp: File descriptor to output to.
+ * @timestamp: Formatted interval timestamp (optional).
+ */
+struct json_print_state {
+ FILE *fp;
+ char timestamp[64];
+};
+
+/**
+ * struct json_metric_only_print_state - Metric-only print state context for JSON output.
+ * @fp: File descriptor to output to.
+ * @timestamp: Formatted interval timestamp (optional).
+ * @evlist: Evlist to query entries from.
+ * @last_aggr_idx: The aggregation index of the last printed metric.
+ * @first_in_group: Whether the current metric is the first in its group.
+ */
+struct json_metric_only_print_state {
+ FILE *fp;
+ char timestamp[64];
+ struct evlist *evlist;
+ int last_aggr_idx;
+ bool first_in_group;
+};
+
+/**
+ * print_aggr_id_json - Print the aggregation prefix for JSON format.
+ *
+ * Copied and adapted from stat-display.c.
+ */
+static void print_aggr_id_json(const struct perf_stat_config *config, FILE *output,
+ struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
+{
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d, ", id.socket,
+ id.die, id.core, aggr_nr);
+ break;
+ case AGGR_CACHE:
+ fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d, ", id.socket,
+ id.die, id.cache_lvl, id.cache, aggr_nr);
+ break;
+ case AGGR_CLUSTER:
+ fprintf(output, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d, ", id.socket,
+ id.die, id.cluster, aggr_nr);
+ break;
+ case AGGR_DIE:
+ fprintf(output, "\"die\" : \"S%d-D%d\", \"counters\" : %d, ", id.socket, id.die,
+ aggr_nr);
+ break;
+ case AGGR_SOCKET:
+ fprintf(output, "\"socket\" : \"S%d\", \"counters\" : %d, ", id.socket, aggr_nr);
+ break;
+ case AGGR_NODE:
+ fprintf(output, "\"node\" : \"N%d\", \"counters\" : %d, ", id.node, aggr_nr);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread)
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\", ", id.socket, id.die, id.core);
+ else if (id.cpu.cpu > -1)
+ fprintf(output, "\"cpu\" : \"%d\", ", id.cpu.cpu);
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "\"thread\" : \"%s-%d\", ",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+}
+
+/*
+ * JSON Output Callbacks - Normal Mode (100% Streaming & Zero-Allocation)
+ */
+
+static int json_print_start(void *ctx __maybe_unused,
+ const struct perf_stat_config *config __maybe_unused)
+{
+ return 0;
+}
+
+static int json_print_event(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
+ int aggr_idx, u64 val, u64 ena, u64 run,
+ double stdev_pct __maybe_unused)
+{
+ struct json_print_state *ps = ctx;
+ FILE *output = config->output;
+ bool ok = (run != 0 && ena != 0);
+ double enabled_percent = 100.0;
+
+ fprintf(output, "{");
+
+ /* Print interval timestamp first if configured */
+ if (config->interval && ps && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+
+ /* Print aggregation JSON fields if configured */
+ if (config->aggr_map && aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[aggr_idx];
+ int aggr_nr = 0;
+
+ if (evsel->stats && evsel->stats->aggr)
+ aggr_nr = evsel->stats->aggr[aggr_idx].nr;
+
+ print_aggr_id_json(config, output, evsel, id, aggr_nr);
+ }
+
+ if (ok) {
+ double sc = evsel->scale;
+ double avg = val * sc;
+
+ fprintf(output, "\"counter-value\" : \"%f\"", avg);
+ } else {
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+
+ fprintf(output, "\"counter-value\" : \"%s\"", bad_count);
+ }
+
+ fprintf(output, ", \"unit\" : \"%s\"", evsel->unit ?: "");
+ /* Cast away const for legacy evsel__name */
+ fprintf(output, ", \"event\" : \"%s\"", evsel__name((struct evsel *)evsel));
+
+ if (run != ena)
+ enabled_percent = 100.0 * run / ena;
+ fprintf(output, ", \"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f", run,
+ enabled_percent);
+ fprintf(output, "}\n");
+
+ return 0;
+}
+
+static int json_print_metric(void *ctx, const struct perf_stat_config *config, struct evsel *evsel,
+ int aggr_idx, const char *name, const char *unit __maybe_unused,
+ double val, enum metric_threshold_classify thresh)
+{
+ struct json_print_state *ps = ctx;
+ FILE *output = config->output;
+ u64 run = 0, ena = 0;
+ double enabled_percent = 100.0;
+ struct perf_stat_evsel *ps_evsel = evsel->stats;
+
+ if (ps_evsel && ps_evsel->aggr) {
+ run = ps_evsel->aggr[aggr_idx].counts.run;
+ ena = ps_evsel->aggr[aggr_idx].counts.ena;
+ }
-int perf_stat__print_json(struct evlist *evlist __maybe_unused,
- const struct perf_stat_config *config __maybe_unused,
- const struct target *target __maybe_unused,
- const struct timespec *ts __maybe_unused,
- int argc __maybe_unused,
- const char **argv __maybe_unused)
+ fprintf(output, "{");
+
+ /* Print interval timestamp first if configured */
+ if (config->interval && ps && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+
+ /* Print aggregation JSON fields if configured */
+ if (config->aggr_map && aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[aggr_idx];
+ int aggr_nr = 0;
+
+ if (evsel->stats && evsel->stats->aggr)
+ aggr_nr = evsel->stats->aggr[aggr_idx].nr;
+
+ print_aggr_id_json(config, output, evsel, id, aggr_nr);
+ }
+
+ if (run != ena)
+ enabled_percent = 100.0 * run / ena;
+ fprintf(output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f", run,
+ enabled_percent);
+ fprintf(output, ", \"metric-value\" : \"%f\"", val);
+ if (name && name[0])
+ fprintf(output, ", \"metric-unit\" : \"%s\"", name);
+ if (thresh != METRIC_THRESHOLD_UNKNOWN) {
+ fprintf(output, ", \"metric-threshold\" : \"%s\"",
+ metric_threshold_classify__str(thresh));
+ }
+ fprintf(output, "}\n");
+
+ return 0;
+}
+
+static int json_print_end(void *ctx __maybe_unused,
+ const struct perf_stat_config *config __maybe_unused)
{
return 0;
}
+
+static const struct perf_stat_print_callbacks json_print_callbacks = {
+ .print_start = json_print_start,
+ .print_end = json_print_end,
+ .print_event = json_print_event,
+ .print_metric = json_print_metric,
+};
+
+/*
+ * JSON Output Callbacks - Metric-Only Mode (100% Streaming & Zero-Allocation)
+ */
+
+static int json_metric_only_print_start(void *ctx,
+ const struct perf_stat_config *config __maybe_unused)
+{
+ struct json_metric_only_print_state *ps = ctx;
+
+ /* Initialize to -2 to distinguish from -1 (a valid index in AGGR_GLOBAL mode) */
+ ps->last_aggr_idx = -2;
+ ps->first_in_group = true;
+ return 0;
+}
+
+static int json_metric_only_print_metric(void *ctx,
+ const struct perf_stat_config *config __maybe_unused,
+ struct evsel *evsel __maybe_unused, int aggr_idx,
+ const char *name, const char *unit, double val,
+ enum metric_threshold_classify thresh __maybe_unused)
+{
+ struct json_metric_only_print_state *ps = ctx;
+ FILE *output = ps->fp;
+
+
+ if (aggr_idx != ps->last_aggr_idx) {
+ if (ps->last_aggr_idx != -2)
+ fprintf(output, "}\n");
+ fprintf(output, "{");
+ if (config->interval && ps->timestamp[0])
+ fprintf(output, "%s", ps->timestamp);
+ if (config->aggr_map && aggr_idx >= 0) {
+ struct aggr_cpu_id id = config->aggr_map->map[aggr_idx];
+ struct evsel *mock_evsel = list_first_entry(&ps->evlist->core.entries,
+ struct evsel, core.node);
+ int aggr_nr = 0;
+
+ if (mock_evsel->stats && mock_evsel->stats->aggr)
+ aggr_nr = mock_evsel->stats->aggr[aggr_idx].nr;
+
+ print_aggr_id_json(config, output, mock_evsel, id, aggr_nr);
+ }
+ ps->last_aggr_idx = aggr_idx;
+ ps->first_in_group = true;
+ }
+
+ if (!ps->first_in_group)
+ fprintf(output, ", ");
+ ps->first_in_group = false;
+
+ if (unit && unit[0])
+ fprintf(output, "\"%s %s\" : \"%.1f\"", unit, name, val);
+ else
+ fprintf(output, "\"%s\" : \"%.1f\"", name, val);
+ return 0;
+}
+
+static int json_metric_only_print_end(void *ctx,
+ const struct perf_stat_config *config __maybe_unused)
+{
+ struct json_metric_only_print_state *ps = ctx;
+ FILE *output = ps->fp;
+
+ if (ps->last_aggr_idx != -2)
+ fprintf(output, "}\n");
+ return 0;
+}
+
+static const struct perf_stat_print_callbacks json_metric_only_print_callbacks = {
+ .print_start = json_metric_only_print_start,
+ .print_end = json_metric_only_print_end,
+ .print_event = NULL,
+ .print_metric = json_metric_only_print_metric,
+};
+
+int perf_stat__print_json(struct evlist *evlist, const struct perf_stat_config *config,
+ const struct target *target, const struct timespec *ts, int argc,
+ const char **argv)
+{
+ if (config->metric_only) {
+ struct json_metric_only_print_state ps = {
+ .fp = config->output,
+ .evlist = evlist,
+ };
+ if (config->interval && ts) {
+ scnprintf(ps.timestamp, sizeof(ps.timestamp), "\"interval\" : %lu.%09lu, ",
+ (unsigned long)ts->tv_sec, ts->tv_nsec);
+ } else {
+ ps.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &json_metric_only_print_callbacks, &ps);
+ } else {
+ struct json_print_state ps = {
+ .fp = config->output,
+ };
+ if (config->interval && ts) {
+ scnprintf(ps.timestamp, sizeof(ps.timestamp), "\"interval\" : %lu.%09lu, ",
+ (unsigned long)ts->tv_sec, ts->tv_nsec);
+ } else {
+ ps.timestamp[0] = '\0';
+ }
+ return perf_stat__print_cb(evlist, config, target, ts, argc, argv,
+ &json_print_callbacks, &ps);
+ }
+}
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 11/14] perf stat: Extend JSON output linter to test core aggregation checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (9 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 10/14] perf stat: Implement streaming JSON formatting callbacks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 12/14] perf stat: Extend JSON output linter to test advanced PMU and metric-only checks Ian Rogers
` (3 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+json_output.sh to run
all basic and standard CPU/thread aggregation checks under the --new
JSON print flag:
- check_no_args
- check_system_wide
- check_interval
- check_event
- check_per_thread
- check_per_node
- check_system_wide_no_aggr
- check_per_core
- check_per_socket
- check_per_die
This guarantees that JSON outputs produced by the decoupled, zero-allocation,
and streaming print callbacks are formally validated across standard and interval modes.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+json_output.sh | 71 ++++++++++++++--------
1 file changed, 45 insertions(+), 26 deletions(-)
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 85d1ad7186c6..8168b861ade7 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -13,6 +13,7 @@ shelldir=$(dirname "$0")
. "${shelldir}"/lib/setup_python.sh
pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
+perf_new_opt=""
stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX)
cleanup() {
@@ -35,42 +36,42 @@ function ParanoidAndNotRoot()
check_no_args()
{
- echo -n "Checking json output: no args "
- perf stat -j -o "${stat_output}" true
+ echo -n "Checking $api_label: no args "
+ perf stat -j $perf_new_opt -o "${stat_output}" true
$PYTHON $pythonchecker --no-args --file "${stat_output}"
echo "[Success]"
}
check_system_wide()
{
- echo -n "Checking json output: system wide "
+ echo -n "Checking $api_label: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j -a -o "${stat_output}" true
+ perf stat -j $perf_new_opt -a -o "${stat_output}" true
$PYTHON $pythonchecker --system-wide --file "${stat_output}"
echo "[Success]"
}
check_system_wide_no_aggr()
{
- echo -n "Checking json output: system wide no aggregation "
+ echo -n "Checking $api_label: system wide no aggregation "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j -A -a --no-merge -o "${stat_output}" true
+ perf stat -j $perf_new_opt -A -a --no-merge -o "${stat_output}" true
$PYTHON $pythonchecker --system-wide-no-aggr --file "${stat_output}"
echo "[Success]"
}
check_interval()
{
- echo -n "Checking json output: interval "
- perf stat -j -I 1000 -o "${stat_output}" true
+ echo -n "Checking $api_label: interval "
+ perf stat -j $perf_new_opt -I 1000 -o "${stat_output}" true
$PYTHON $pythonchecker --interval --file "${stat_output}"
echo "[Success]"
}
@@ -78,110 +79,110 @@ check_interval()
check_event()
{
- echo -n "Checking json output: event "
- perf stat -j -e cpu-clock -o "${stat_output}" true
+ echo -n "Checking $api_label: event "
+ perf stat -j $perf_new_opt -e cpu-clock -o "${stat_output}" true
$PYTHON $pythonchecker --event --file "${stat_output}"
echo "[Success]"
}
check_per_core()
{
- echo -n "Checking json output: per core "
+ echo -n "Checking $api_label: per core "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-core -a -o "${stat_output}" true
+ perf stat -j $perf_new_opt --per-core -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-core --file "${stat_output}"
echo "[Success]"
}
check_per_thread()
{
- echo -n "Checking json output: per thread "
+ echo -n "Checking $api_label: per thread "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-thread -p $$ -o "${stat_output}" true
+ perf stat -j $perf_new_opt --per-thread -p $$ -o "${stat_output}" true
$PYTHON $pythonchecker --per-thread --file "${stat_output}"
echo "[Success]"
}
check_per_cache_instance()
{
- echo -n "Checking json output: per cache_instance "
+ echo -n "Checking $api_label: per cache_instance "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-cache -a true 2>&1 | $PYTHON $pythonchecker --per-cache
+ perf stat -j $perf_new_opt --per-cache -a true 2>&1 | $PYTHON $pythonchecker --per-cache
echo "[Success]"
}
check_per_cluster()
{
- echo -n "Checking json output: per cluster "
+ echo -n "Checking $api_label: per cluster "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-cluster -a true 2>&1 | $PYTHON $pythonchecker --per-cluster
+ perf stat -j $perf_new_opt --per-cluster -a true 2>&1 | $PYTHON $pythonchecker --per-cluster
echo "[Success]"
}
check_per_die()
{
- echo -n "Checking json output: per die "
+ echo -n "Checking $api_label: per die "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-die -a -o "${stat_output}" true
+ perf stat -j $perf_new_opt --per-die -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-die --file "${stat_output}"
echo "[Success]"
}
check_per_node()
{
- echo -n "Checking json output: per node "
+ echo -n "Checking $api_label: per node "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-node -a -o "${stat_output}" true
+ perf stat -j $perf_new_opt --per-node -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-node --file "${stat_output}"
echo "[Success]"
}
check_per_socket()
{
- echo -n "Checking json output: per socket "
+ echo -n "Checking $api_label: per socket "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-socket -a -o "${stat_output}" true
+ perf stat -j $perf_new_opt --per-socket -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-socket --file "${stat_output}"
echo "[Success]"
}
check_metric_only()
{
- echo -n "Checking json output: metric only "
+ echo -n "Checking $api_label: metric only "
if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo | grep -qw 67
then
echo "[Skip] CPU-measurement counter facility not installed"
return
fi
- perf stat -j --metric-only -M page_faults_per_second -o "${stat_output}" true
+ perf stat -j $perf_new_opt --metric-only -M page_faults_per_second -o "${stat_output}" true
$PYTHON $pythonchecker --metric-only --file "${stat_output}"
echo "[Success]"
}
@@ -214,6 +215,7 @@ check_for_topology()
}
check_for_topology
+api_label="json output"
check_no_args
check_system_wide
check_interval
@@ -232,5 +234,22 @@ then
else
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
fi
+# Run New API JSON basic and standard aggregation checks
+perf_new_opt="--new"
+api_label="json (New API)"
+check_no_args
+check_system_wide
+check_interval
+check_event
+check_per_thread
+check_per_node
+if [ $skip_test -ne 1 ]
+then
+ check_system_wide_no_aggr
+ check_per_core
+ check_per_die
+ check_per_socket
+fi
+
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 12/14] perf stat: Extend JSON output linter to test advanced PMU and metric-only checks
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (10 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 11/14] perf stat: Extend JSON output linter to test core aggregation checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:18 ` [RFC PATCH v2 13/14] perf stat: Add --new support to PMU metrics Python validator Ian Rogers
` (2 subsequent siblings)
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends tools/perf/tests/shell/stat+json_output.sh to run
the advanced hardware PMU, topology-aware aggregation, and metric-only
JSON checks a second time under the --new print flag:
- check_per_cache_instance
- check_per_cluster
- check_metric_only
This guarantees that JSON outputs produced by the decoupled streaming printing callbacks
are verified and structurally valid under advanced topology-aware modes and metric-only
JSON row-column layouts.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat+json_output.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 8168b861ade7..d18cac5dcff1 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -249,7 +249,10 @@ then
check_per_core
check_per_die
check_per_socket
+ check_per_cache_instance
+ check_per_cluster
fi
+check_metric_only
cleanup
exit 0
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 13/14] perf stat: Add --new support to PMU metrics Python validator
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (11 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 12/14] perf stat: Extend JSON output linter to test advanced PMU and metric-only checks Ian Rogers
@ 2026-05-25 23:18 ` Ian Rogers
2026-05-25 23:19 ` [RFC PATCH v2 14/14] perf stat: Extend PMU metrics value linter to validate --new outputs Ian Rogers
2026-06-05 18:02 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Chun-Tse Shao
14 siblings, 0 replies; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:18 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch extends the performance metrics value Python validation script
tools/perf/tests/shell/lib/perf_metric_validation.py to support a new
command-line argument `-new`:
parser.add_argument("-new", help="Use new printing API (--new)", ...)
When set, the Validator class appends the `--new` option flag to its internally
spawned `perf stat` commands:
command = [tool, 'stat']
if self.new_print:
command.append('--new')
This enables validating Intel PMU metric mathematical values generated
specifically by the decoupled, streaming JSON printing callbacks.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/lib/perf_metric_validation.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
index dea8ef1977bf..f69cf0e0de57 100644
--- a/tools/perf/tests/shell/lib/perf_metric_validation.py
+++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
@@ -36,7 +36,7 @@ class TestError:
class Validator:
def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
- workload='true', metrics='', cputype='cpu'):
+ workload='true', metrics='', cputype='cpu', new_print=False):
self.rulefname = rulefname
self.reportfname = reportfname
self.rules = None
@@ -68,6 +68,7 @@ class Validator:
self.datafname = datafname
self.debug = debug
self.fullrulefname = fullrulefname
+ self.new_print = new_print
def __set_metrics(self, metrics=''):
if metrics != '':
@@ -379,7 +380,10 @@ class Validator:
def _run_perf(self, metric, workload: str):
tool = 'perf'
- command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
+ command = [tool, 'stat']
+ if self.new_print:
+ command.append('--new')
+ command.extend(['--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"])
wl = workload.split()
command.extend(wl)
print(" ".join(command))
@@ -584,6 +588,8 @@ def main() -> None:
parser.add_argument("-m", help="Metric list to validate", default="")
parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
default="cpu")
+ parser.add_argument("-new", help="Use new printing API (--new)",
+ action="store_true", default=False)
args = parser.parse_args()
outpath = Path(args.output_dir)
reportf = Path.joinpath(outpath, 'perf_report.json')
@@ -592,7 +598,7 @@ def main() -> None:
validator = Validator(args.rule, reportf, debug=args.debug,
datafname=datafile, fullrulefname=fullrule, workload=args.wl,
- metrics=args.m, cputype=args.cputype)
+ metrics=args.m, cputype=args.cputype, new_print=args.new)
ret = validator.test()
return ret
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* [RFC PATCH v2 14/14] perf stat: Extend PMU metrics value linter to validate --new outputs
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (12 preceding siblings ...)
2026-05-25 23:18 ` [RFC PATCH v2 13/14] perf stat: Add --new support to PMU metrics Python validator Ian Rogers
@ 2026-05-25 23:19 ` Ian Rogers
2026-05-25 23:53 ` sashiko-bot
2026-06-05 18:02 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Chun-Tse Shao
14 siblings, 1 reply; 46+ messages in thread
From: Ian Rogers @ 2026-05-25 23:19 UTC (permalink / raw)
To: irogers, acme, linux-perf-users, namhyung
Cc: adrian.hunter, james.clark, jolsa, linux-kernel, mingo, peterz
This patch updates tools/perf/tests/shell/stat_metrics_values.sh to run
the metrics value validation linter twice: once for the legacy path,
and once under the --new print flag using the newly introduced python
validator `-new` argument:
$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir ... -new
This mathematically guarantees that calculated Intel PMU metric values
staged inside the decoupled streaming JSON callbacks are parsed and
validated cleanly against the formal rules engine.
Signed-off-by: Ian Rogers <irogers@google.com>
Assisted-by: Antigravity:gemini-3.5-flash
---
tools/perf/tests/shell/stat_metrics_values.sh | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
index 30566f0b5427..02c0bcf6e078 100755
--- a/tools/perf/tests/shell/stat_metrics_values.sh
+++ b/tools/perf/tests/shell/stat_metrics_values.sh
@@ -22,9 +22,20 @@ for cputype in /sys/bus/event_source/devices/cpu_*; do
$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
-cputype "${cputype}"
ret=$?
- rm -rf $tmpdir
if [ $ret -ne 0 ]; then
echo "Metric validation return with errors. Please check metrics reported with errors."
+ rm -rf $tmpdir
+ exit $ret
+ fi
+
+ echo "Testing metrics for: $cputype (New API)"
+ $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
+ -cputype "${cputype}" -new
+ ret=$?
+ rm -rf $tmpdir
+ if [ $ret -ne 0 ]; then
+ echo "Metric validation return with errors (New API). Please check metrics reported with errors."
+ exit $ret
fi
done
exit $ret
--
2.54.0.794.g4f17f83d09-goog
^ permalink raw reply related [flat|nested] 46+ messages in thread* Re: [RFC PATCH v2 14/14] perf stat: Extend PMU metrics value linter to validate --new outputs
2026-05-25 23:19 ` [RFC PATCH v2 14/14] perf stat: Extend PMU metrics value linter to validate --new outputs Ian Rogers
@ 2026-05-25 23:53 ` sashiko-bot
0 siblings, 0 replies; 46+ messages in thread
From: sashiko-bot @ 2026-05-25 23:53 UTC (permalink / raw)
To: Ian Rogers; +Cc: linux-perf-users
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Low] The New API validation run overwrites debug artifacts generated by the legacy validation run.
--
commit 40eff85b19a584a903ea2292617705adf84c2b46
Author: Ian Rogers <irogers@google.com>
perf stat: Extend PMU metrics value linter to validate --new outputs
This patch updates the stat_metrics_values.sh test to run the validation
linter a second time using the --new flag to ensure new Intel PMU metric
outputs are validated.
> diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
> --- a/tools/perf/tests/shell/stat_metrics_values.sh
> +++ b/tools/perf/tests/shell/stat_metrics_values.sh
> @@ -22,9 +22,20 @@ for cputype in /sys/bus/event_source/devices/cpu_*; do
> $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> -cputype "${cputype}"
> ret=$?
> - rm -rf $tmpdir
> if [ $ret -ne 0 ]; then
> echo "Metric validation return with errors. Please check metrics reported with errors."
> + rm -rf $tmpdir
> + exit $ret
> + fi
> +
> + echo "Testing metrics for: $cputype (New API)"
> + $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> + -cputype "${cputype}" -new
[Severity: Low]
Does reusing the same $tmpdir here overwrite the debug artifacts from the
legacy validation run?
If a developer manually adds the -debug flag for troubleshooting, it appears
the second python execution will silently overwrite the artifacts (such as
perf_report.json and perf_data.json) generated by the first execution.
Would writing to separate subdirectories help preserve both sets of debug
artifacts?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260525231900.3527228-1-irogers@google.com?part=14
^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API
2026-05-25 23:18 ` [RFC PATCH v2 00/14] perf stat: Decouple and modularize metrics/events output printing API Ian Rogers
` (13 preceding siblings ...)
2026-05-25 23:19 ` [RFC PATCH v2 14/14] perf stat: Extend PMU metrics value linter to validate --new outputs Ian Rogers
@ 2026-06-05 18:02 ` Chun-Tse Shao
14 siblings, 0 replies; 46+ messages in thread
From: Chun-Tse Shao @ 2026-06-05 18:02 UTC (permalink / raw)
To: Ian Rogers
Cc: acme, linux-perf-users, namhyung, adrian.hunter, james.clark,
jolsa, linux-kernel, mingo, peterz
Hi Ian,
On Mon, May 25, 2026 at 4:19 PM Ian Rogers <irogers@google.com> wrote:
>
> This RFC patch series introduces a complete architectural refactoring
> to decouple and modularize the event and metric output printing
> engine inside 'perf stat'.
I really like this change. Historically, fixing printing format issues
in perf stat has been painful due to the tight coupling of printing
logic with aggregation and math in util/stat-display.c. Decoupling
these logics makes the codebase much easier to maintain and simplifies
future changes to the print format.
Acked-by: Chun-Tse Shao <ctshao@google.com>
>
>
> ======================
> Background and Motivation
> ======================
> Historically, 'perf stat' output printing was tightly coupled with
> data collection, aggregation math, and shadow metrics calculation.
> Formatting logic (Standard Console, CSV, and JSON) was scattered
> across util/stat-display.c, featuring massive, complex switch-cases,
> temporal adjacency assumptions, and duplicated layout logic. Adding
> new metrics, uncore PMUs, or topology-aware CPU aggregation modes
> frequently resulted in accidental layout regressions, broken field
> counts in CSV linters, or parsing crashes.
>
> This patch series decouples the data-traversal and shadows-metric
> calculations from the visual layout rendering, introducing a highly
> optimized, modular, and type-safe callback-driven print
> architecture.
>
> ======================
> Decoupled Printing Strategy
> ======================
> 1. Format-Agnostic Traversal Driver (util/stat-print.c)
> The core display logic is abstracted into a generic traversal
> driver, perf_stat__print_cb(). This driver manages the complex
> CPU/thread/topology aggregation loops, resolves hybrid wildcard
> merges, filters default skipped uncore metrics, and calculates
> raw shadow metrics. Once the data points are prepared, the driver
> streams them cleanly to formatting callbacks.
> - Safety: The core `calculate_and_print_metric` traversal is
> fully protected with early-exit checks if formatting callbacks
> choose to leave `print_metric` unpopulated.
>
> 2. Type-Safe Callbacks Interface (struct perf_stat_print_callbacks)
> Output formats communicate with the driver using a clean
> streaming interface:
> - print_start(): Initializes format-private DOM states.
> - print_event(): Buffers or prints raw counter event details.
> - print_metric(): Buffers or prints calculated shadow metrics.
> - print_end(): Finalizes rendering and cleans up structures.
>
> 3. Format-Specific Rendering Engines:
> - Standard Console (util/stat-print-std.c):
> Buffers events and metrics into standard-private DOM lists.
> It resolves default metric-group skipped headers, prepends
> formatted interval timestamps, aligns rows dynamically using
> aggr_header_lens, and prints them cleanly in print_end().
> - Refinement: Cleanly resolves `aggr_idx == -1` global indices
> by tracking bounds with a `-2` initialization indicator,
> protecting all lookups from out-of-bounds array reads.
> It safely resets the active event pointer if a zero counter
> is skipped locally, avoiding temporal violation false-positives.
> - CSV Printing (util/stat-print-csv.c):
> Buffers events and metrics into format-private queues,
> formatting rows separated by config->csv_sep. Corrects
> metrics continuation padding to print exactly 4 separators,
> ensuring column counts are strictly and visually valid.
> - Refinement: Decoupled CSV headers now output static
> structural labels (e.g. "cpu,", "die,") instead of live
> hardware IDs, and prevent redundant header rows in interval
> mode by persisting state tracking.
> - Streaming JSON Printing (util/stat-print-json.c):
> Implements a highly optimized, 100% streaming, zero-allocation
> print engine that bypasses dynamic queues and metrics buffering
> completely! JSON objects and interval keys are formatted and
> streamed directly onto the output file descriptor, maximizing
> speed and eliminating heap allocation overhead.
> - Refinement: Completely zero-allocation fast-path rendering
> inside `json_metric_only_print_metric` by streaming strings
> directly without dynamic `asprintf` or `strdup` overheads.
>
> 4. Centralized Aggregation Prefix Formatting
> Duplicates in CPU/thread aggregation prefix rendering are
> completely eliminated by exposing arrays globally and introducing
> shared generic helpers in stat-print.c:
> - perf_stat__get_aggr_key(): Resolves the JSON key name.
> - perf_stat__get_aggr_id_char(): Resolves the unified prefix.
> This mathematically guarantees absolute structural and visual
> consistency across all formats.
>
> 5. Temporal Coupling Sanity Checks
> A strict temporal coupling constraint (that the traversal driver
> always invokes print_metric() callbacks synchronously and
> consecutively for the same PMU/event node immediately after its
> print_event() callback) is formally protected by adding a
> runtime evsel matching check inside both STD and CSV engines:
> if (evsel != ps->current_event->evsel) abort_print();
>
> ======================
> Verification and Testing
> ======================
> All automated shell linters (stat+std_output.sh, stat+csv_output.sh,
> stat+json_output.sh) have been extended to run their entire
> aggregation suites a second time under the new printer flag
> (--new), passing with 100% success. The PMU metrics value Python
> validation script and stat_metrics_values.sh have also been
> extended with --new flag testing, ensuring complete mathematical
> correctness of calculated metric values.
> - Test Quality: JSON linter checks define dynamic `api_label`
> indicators to generate highly distinguishable and descriptive
> output logs between legacy and `--new` passes.
>
> ======================
> Changes since v1:
> ======================
> - calculate_and_print_metric: added safe print_metric NULL callback check.
> - should_skip_zero_counter: added safe aggr_idx bounds check to avoid
> out-of-bounds mapping array access when aggr_idx is negative.
> - std_print_event: reset ps->current_event pointer on skipped zero counters
> to avoid temporal coupling mismatch violations.
> - std_metric_only_print_end: only print metric headers once in
> interval mode, and print dynamic spacing padding to perfectly
> align columns.
> - csv_metric_only_print_end: only print CSV headers once in
> interval mode, print static aggregation labels instead of live
> hardware IDs, and fix column misalignment under AGGR_GLOBAL by
> initializing current_aggr to -2 sentinel.
> - json_metric_only_print_metric: completely zero-allocation fast-path
> rendering by streaming combined keys directly without dynamic heap string
> allocations, and resolve AGGR_GLOBAL indices by initializing
> last_aggr_idx to -2.
> - stat+json_output.sh: define dynamic api_label to generate highly
> distinguishable and descriptive output logs between legacy and
> --new passes.
> - merged duplicate skip_test block structures inside linter shell scripts.
> - documented -2 sentinel choices as C comments inside standard, CSV,
> and JSON print engines.
>
> We would highly appreciate reviews, comments, and feedback on this
> decoupled output printing strategy.
>
> Assisted-by: Antigravity:gemini-3.5-flash
>
> ***
>
> Ian Rogers (14):
> perf stat: Introduce core generic print traversal engine and header
> stubs
> perf stat: Implement standard console (STD) formatting callbacks
> perf stat: Extend STD output linter to test basic New API checks
> perf stat: Extend STD output linter to test core aggregation checks
> perf stat: Extend STD output linter to test advanced PMU checks
> perf stat: Extend STD output linter to test metric-only checks
> perf stat: Implement CSV formatting callbacks
> perf stat: Extend CSV output linter to test core aggregation checks
> perf stat: Extend CSV output linter to test advanced PMU and
> metric-only checks
> perf stat: Implement streaming JSON formatting callbacks
> perf stat: Extend JSON output linter to test core aggregation checks
> perf stat: Extend JSON output linter to test advanced PMU and
> metric-only checks
> perf stat: Add --new support to PMU metrics Python validator
> perf stat: Extend PMU metrics value linter to validate --new outputs
>
> tools/perf/builtin-stat.c | 261 +++---
> .../tests/shell/lib/perf_metric_validation.py | 12 +-
> tools/perf/tests/shell/stat+csv_output.sh | 19 +
> tools/perf/tests/shell/stat+json_output.sh | 74 +-
> tools/perf/tests/shell/stat+std_output.sh | 18 +
> tools/perf/tests/shell/stat_metrics_values.sh | 13 +-
> tools/perf/util/Build | 4 +
> tools/perf/util/stat-display.c | 28 +-
> tools/perf/util/stat-print-csv.c | 534 ++++++++++++
> tools/perf/util/stat-print-json.c | 330 ++++++++
> tools/perf/util/stat-print-std.c | 773 ++++++++++++++++++
> tools/perf/util/stat-print.c | 490 +++++++++++
> tools/perf/util/stat-print.h | 133 +++
> tools/perf/util/stat.h | 2 +
> 14 files changed, 2519 insertions(+), 172 deletions(-)
> create mode 100644 tools/perf/util/stat-print-csv.c
> create mode 100644 tools/perf/util/stat-print-json.c
> create mode 100644 tools/perf/util/stat-print-std.c
> create mode 100644 tools/perf/util/stat-print.c
> create mode 100644 tools/perf/util/stat-print.h
>
> --
> 2.54.0.794.g4f17f83d09-goog
>
>
^ permalink raw reply [flat|nested] 46+ messages in thread