[PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Ian Rogers <irogers@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	 Mark Rutland <mark.rutland@arm.com>,
	 Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>,  Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	 Kan Liang <kan.liang@linux.intel.com>,
	linux-perf-users@vger.kernel.org,  linux-kernel@vger.kernel.org,
	Andi Kleen <ak@linux.intel.com>,
	 Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Subject: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
Date: Sat, 20 Jul 2024 00:45:52 -0700	[thread overview]
Message-ID: <20240720074552.1915993-2-irogers@google.com> (raw)
In-Reply-To: <20240720074552.1915993-1-irogers@google.com>

Andi Kleen reported a regression where `perf script +F metric` would
crash. With this change the output is:

```
$ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy

      21.229620 GB/sec

      15.751008 GB/sec

      16.009221 GB/sec
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
$ perf --no-pager script -F +metric
            perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
            perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
            perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
            perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
...
```

The change fixes perf script to update counts and thereby aggregate
values which then get consumed by unchanged metric logic in the shadow
stat output. Note, it would be preferential to switch to json metrics.

Reported-by: Andi Kleen <ak@linux.intel.com>
Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
Signed-off-by: Ian Rogers <irogers@google.com>
---
The code isn't well tested nor does it support non-leader sampling
reading of counts based on periods that seemed to present in the
previous code. Sending out for the sake of discussion. Andi's changes
added a test and that should certainly be added.
---
 tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
 1 file changed, 93 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c16224b1fef3..752d6219fb08 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,6 +63,7 @@
 #include "util/util.h"
 #include "util/cgroup.h"
 #include "perf.h"
+#include <internal/threadmap.h>
 
 #include <linux/ctype.h>
 #ifdef HAVE_LIBTRACEEVENT
@@ -334,16 +335,8 @@ struct evsel_script {
        char *filename;
        FILE *fp;
        u64  samples;
-       /* For metric output */
-       u64  val;
-       int  gnum;
 };
 
-static inline struct evsel_script *evsel_script(struct evsel *evsel)
-{
-	return (struct evsel_script *)evsel->priv;
-}
-
 static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
 {
 	struct evsel_script *es = zalloc(sizeof(*es));
@@ -2107,6 +2100,12 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
 	fputs("\tmetric: ", mctx->fp);
 }
 
+static struct aggr_cpu_id perf_script__get_cpu(struct perf_stat_config *config __maybe_unused,
+					struct perf_cpu cpu)
+{
+	return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+}
+
 static void perf_sample__fprint_metric(struct perf_script *script,
 				       struct thread *thread,
 				       struct evsel *evsel,
@@ -2126,23 +2125,96 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 		.force_header = false,
 	};
 	struct evsel *ev2;
-	u64 val;
+	struct perf_cpu sample_cpu = { .cpu = sample->cpu, };
+	int thread_idx, cpu_map_idx;
+	u64 read_format = evsel->core.attr.read_format;
+	int aggr_idx;
 
+	/* Only support leader sampling with a group of read events. */
+	if ((read_format & PERF_FORMAT_GROUP) == 0)
+		return;
+
+	/* Lazy initialization of stats values. */
 	if (!evsel->stats)
 		evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
-	if (evsel_script(leader)->gnum++ == 0)
-		perf_stat__reset_shadow_stats();
-	val = sample->period * evsel->scale;
-	evsel_script(evsel)->val = val;
-	if (evsel_script(leader)->gnum == leader->core.nr_members) {
-		for_each_group_member (ev2, leader) {
-			perf_stat__print_shadow_stats(&stat_config, ev2,
-						      evsel_script(ev2)->val,
-						      sample->cpu,
-						      &ctx,
-						      NULL);
+	if (!stat_config.aggr_map) {
+		int nr_aggr;
+
+		stat_config.aggr_get_id = perf_script__get_cpu;
+		stat_config.aggr_map =
+			cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
+					aggr_cpu_id__cpu,
+					/*data=*/NULL,
+					/*needs_sort=*/false);
+		if (!stat_config.aggr_map) {
+			pr_err("cannot allocate aggr map\n");
+			return;
+		}
+		nr_aggr = stat_config.aggr_map->nr;
+		if (evlist__alloc_aggr_stats(evsel->evlist, nr_aggr) < 0) {
+			pr_err("cannot allocate aggr counts\n");
+			return;
 		}
-		evsel_script(leader)->gnum = 0;
+	}
+
+	/* Add group counts from sample into appropriate evsel counts by id. */
+	for_each_group_evsel(ev2, leader) {
+		struct perf_thread_map *threads = perf_evsel__threads(&ev2->core);
+		struct perf_cpu_map *cpus = evsel__cpus(ev2);
+		int id_num = 0;
+		bool match = false;
+
+		perf_cpu_map__for_each_idx(cpu_map_idx, cpus) {
+			for (thread_idx = 0; thread_idx < threads->nr; thread_idx++) {
+				struct sample_read_value *value = sample->read.group.values;
+				u64 id = ev2->core.id[id_num++];
+
+				sample_read_group__for_each(value, sample->read.group.nr,
+							    read_format) {
+					struct perf_counts_values *counts;
+
+					if (value->id != id)
+						continue;
+
+					counts = perf_counts(ev2->counts, cpu_map_idx, thread_idx);
+					counts->val += value->value;
+					/*
+					 * Ensure the enabled/running time isn't
+					 * 0, which implies an error.
+					 */
+					counts->ena += sample->read.time_enabled ?: sample->period;
+					counts->run += sample->read.time_running ?: sample->period;
+					match = true;
+				}
+			}
+		}
+		if (match) {
+			/* Update the aggreate count in ev2. */
+			perf_stat_process_counter(&stat_config, ev2);
+		}
+	}
+
+	/* Find the appropriate indices for dumping of this sample. */
+	thread_idx = perf_thread_map__idx(perf_evsel__threads(&evsel->core),
+					thread__pid(thread));
+	cpu_map_idx = perf_cpu_map__idx(evsel__cpus(evsel), sample_cpu);
+	if (thread_idx == -1 || cpu_map_idx == -1)
+		return;
+
+	cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
+		if (stat_config.aggr_map->map[aggr_idx].cpu.cpu == sample_cpu.cpu)
+			break;
+	}
+	/* Iterate all events and the leader of the group, trying to print stats. */
+	for_each_group_evsel(ev2, leader) {
+		struct perf_counts_values *counts =
+			perf_counts(ev2->counts, cpu_map_idx, thread_idx);
+
+		if (!counts)
+			continue;
+
+		perf_stat__print_shadow_stats(&stat_config, ev2, counts->val * ev2->scale,
+					      aggr_idx, &ctx, NULL);
 	}
 }
 
-- 
2.45.2.1089.g2a221341d9-goog

next prev parent reply	other threads:[~2024-07-20  7:46 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-20  7:45 [PATCH v1 1/2] libperf threadmap: Add ability to find index from pid Ian Rogers
2024-07-20  7:45 ` Ian Rogers [this message]
2024-07-23 14:41   ` [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling James Clark
2024-07-23 14:57     ` Ian Rogers
2024-07-23 15:26       ` Andi Kleen
2024-07-23 15:26       ` James Clark
2024-07-23 15:39         ` Andi Kleen
2024-07-23 15:49           ` Ian Rogers
2024-07-26  0:05   ` Namhyung Kim
2024-07-26  3:32     ` Ian Rogers

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:c16224b1fef dfblob:752d6219fb0 )
 OR (
bs:"[PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240720074552.1915993-2-irogers@google.com \
    --to=irogers@google.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=atrajeev@linux.vnet.ibm.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).