[PATCH v1 1/2] libperf threadmap: Add ability to find index from pid

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v1 1/2] libperf threadmap: Add ability to find index from pid
@ 2024-07-20  7:45 Ian Rogers
  2024-07-20  7:45 ` [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling Ian Rogers
  0 siblings, 1 reply; 10+ messages in thread
From: Ian Rogers @ 2024-07-20  7:45 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, linux-perf-users,
	linux-kernel, Andi Kleen, Athira Rajeev

It is useful to be able to determine the index of thread in a thread
map as the index is used in other situations like finding the perf
count values. Unlike with perf_cpu_map__idx a binary search can't be
performed as the array isn't ordered. Also -1 in the array matches any
pid.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/perf/include/internal/threadmap.h | 1 +
 tools/lib/perf/threadmap.c                  | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/tools/lib/perf/include/internal/threadmap.h b/tools/lib/perf/include/internal/threadmap.h
index df748baf9eda..92889d81b6b1 100644
--- a/tools/lib/perf/include/internal/threadmap.h
+++ b/tools/lib/perf/include/internal/threadmap.h
@@ -19,5 +19,6 @@ struct perf_thread_map {
 };
 
 struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr);
+int perf_thread_map__idx(const struct perf_thread_map *threads, pid_t pid);
 
 #endif /* __LIBPERF_INTERNAL_THREADMAP_H */
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index 07968f3ea093..728683199a85 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -99,3 +99,12 @@ pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
 {
 	return map->map[idx].pid;
 }
+
+int perf_thread_map__idx(const struct perf_thread_map *threads, pid_t pid)
+{
+	for (int i = 0; i < threads->nr; i++) {
+		if (pid == threads->map[i].pid || threads->map[i].pid == -1)
+			return i;
+	}
+	return -1;
+}
-- 
2.45.2.1089.g2a221341d9-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-20  7:45 [PATCH v1 1/2] libperf threadmap: Add ability to find index from pid Ian Rogers
@ 2024-07-20  7:45 ` Ian Rogers
  2024-07-23 14:41   ` James Clark
  2024-07-26  0:05   ` Namhyung Kim
  0 siblings, 2 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-20  7:45 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, linux-perf-users,
	linux-kernel, Andi Kleen, Athira Rajeev

Andi Kleen reported a regression where `perf script +F metric` would
crash. With this change the output is:

```
$ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy

      21.229620 GB/sec

      15.751008 GB/sec

      16.009221 GB/sec
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
$ perf --no-pager script -F +metric
            perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
            perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
            perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
            perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
...
```

The change fixes perf script to update counts and thereby aggregate
values which then get consumed by unchanged metric logic in the shadow
stat output. Note, it would be preferential to switch to json metrics.

Reported-by: Andi Kleen <ak@linux.intel.com>
Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
Signed-off-by: Ian Rogers <irogers@google.com>
---
The code isn't well tested nor does it support non-leader sampling
reading of counts based on periods that seemed to present in the
previous code. Sending out for the sake of discussion. Andi's changes
added a test and that should certainly be added.
---
 tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
 1 file changed, 93 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c16224b1fef3..752d6219fb08 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,6 +63,7 @@
 #include "util/util.h"
 #include "util/cgroup.h"
 #include "perf.h"
+#include <internal/threadmap.h>
 
 #include <linux/ctype.h>
 #ifdef HAVE_LIBTRACEEVENT
@@ -334,16 +335,8 @@ struct evsel_script {
        char *filename;
        FILE *fp;
        u64  samples;
-       /* For metric output */
-       u64  val;
-       int  gnum;
 };
 
-static inline struct evsel_script *evsel_script(struct evsel *evsel)
-{
-	return (struct evsel_script *)evsel->priv;
-}
-
 static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
 {
 	struct evsel_script *es = zalloc(sizeof(*es));
@@ -2107,6 +2100,12 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
 	fputs("\tmetric: ", mctx->fp);
 }
 
+static struct aggr_cpu_id perf_script__get_cpu(struct perf_stat_config *config __maybe_unused,
+					struct perf_cpu cpu)
+{
+	return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+}
+
 static void perf_sample__fprint_metric(struct perf_script *script,
 				       struct thread *thread,
 				       struct evsel *evsel,
@@ -2126,23 +2125,96 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 		.force_header = false,
 	};
 	struct evsel *ev2;
-	u64 val;
+	struct perf_cpu sample_cpu = { .cpu = sample->cpu, };
+	int thread_idx, cpu_map_idx;
+	u64 read_format = evsel->core.attr.read_format;
+	int aggr_idx;
 
+	/* Only support leader sampling with a group of read events. */
+	if ((read_format & PERF_FORMAT_GROUP) == 0)
+		return;
+
+	/* Lazy initialization of stats values. */
 	if (!evsel->stats)
 		evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
-	if (evsel_script(leader)->gnum++ == 0)
-		perf_stat__reset_shadow_stats();
-	val = sample->period * evsel->scale;
-	evsel_script(evsel)->val = val;
-	if (evsel_script(leader)->gnum == leader->core.nr_members) {
-		for_each_group_member (ev2, leader) {
-			perf_stat__print_shadow_stats(&stat_config, ev2,
-						      evsel_script(ev2)->val,
-						      sample->cpu,
-						      &ctx,
-						      NULL);
+	if (!stat_config.aggr_map) {
+		int nr_aggr;
+
+		stat_config.aggr_get_id = perf_script__get_cpu;
+		stat_config.aggr_map =
+			cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
+					aggr_cpu_id__cpu,
+					/*data=*/NULL,
+					/*needs_sort=*/false);
+		if (!stat_config.aggr_map) {
+			pr_err("cannot allocate aggr map\n");
+			return;
+		}
+		nr_aggr = stat_config.aggr_map->nr;
+		if (evlist__alloc_aggr_stats(evsel->evlist, nr_aggr) < 0) {
+			pr_err("cannot allocate aggr counts\n");
+			return;
 		}
-		evsel_script(leader)->gnum = 0;
+	}
+
+	/* Add group counts from sample into appropriate evsel counts by id. */
+	for_each_group_evsel(ev2, leader) {
+		struct perf_thread_map *threads = perf_evsel__threads(&ev2->core);
+		struct perf_cpu_map *cpus = evsel__cpus(ev2);
+		int id_num = 0;
+		bool match = false;
+
+		perf_cpu_map__for_each_idx(cpu_map_idx, cpus) {
+			for (thread_idx = 0; thread_idx < threads->nr; thread_idx++) {
+				struct sample_read_value *value = sample->read.group.values;
+				u64 id = ev2->core.id[id_num++];
+
+				sample_read_group__for_each(value, sample->read.group.nr,
+							    read_format) {
+					struct perf_counts_values *counts;
+
+					if (value->id != id)
+						continue;
+
+					counts = perf_counts(ev2->counts, cpu_map_idx, thread_idx);
+					counts->val += value->value;
+					/*
+					 * Ensure the enabled/running time isn't
+					 * 0, which implies an error.
+					 */
+					counts->ena += sample->read.time_enabled ?: sample->period;
+					counts->run += sample->read.time_running ?: sample->period;
+					match = true;
+				}
+			}
+		}
+		if (match) {
+			/* Update the aggreate count in ev2. */
+			perf_stat_process_counter(&stat_config, ev2);
+		}
+	}
+
+	/* Find the appropriate indices for dumping of this sample. */
+	thread_idx = perf_thread_map__idx(perf_evsel__threads(&evsel->core),
+					thread__pid(thread));
+	cpu_map_idx = perf_cpu_map__idx(evsel__cpus(evsel), sample_cpu);
+	if (thread_idx == -1 || cpu_map_idx == -1)
+		return;
+
+	cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
+		if (stat_config.aggr_map->map[aggr_idx].cpu.cpu == sample_cpu.cpu)
+			break;
+	}
+	/* Iterate all events and the leader of the group, trying to print stats. */
+	for_each_group_evsel(ev2, leader) {
+		struct perf_counts_values *counts =
+			perf_counts(ev2->counts, cpu_map_idx, thread_idx);
+
+		if (!counts)
+			continue;
+
+		perf_stat__print_shadow_stats(&stat_config, ev2, counts->val * ev2->scale,
+					      aggr_idx, &ctx, NULL);
 	}
 }
 
-- 
2.45.2.1089.g2a221341d9-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-20  7:45 ` [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling Ian Rogers
@ 2024-07-23 14:41   ` James Clark
  2024-07-23 14:57     ` Ian Rogers
  2024-07-26  0:05   ` Namhyung Kim
  1 sibling, 1 reply; 10+ messages in thread
From: James Clark @ 2024-07-23 14:41 UTC (permalink / raw)
  To: Ian Rogers, Andi Kleen
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, linux-perf-users, linux-kernel,
	Athira Rajeev



On 20/07/2024 8:45 am, Ian Rogers wrote:
> Andi Kleen reported a regression where `perf script +F metric` would
> crash. With this change the output is:
> 
> ```
> $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
> 
>        21.229620 GB/sec
> 
>        15.751008 GB/sec
> 
>        16.009221 GB/sec
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
> $ perf --no-pager script -F +metric
>              perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>              perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
>              perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>              perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
> ...
> ```

For some reason I only get the metric: lines when I record with -a. I 
noticed this because Andi's test doesn't use -a so it fails.

I'm not sure if that's expected or it's related to your disclaimer below?

> 
> The change fixes perf script to update counts and thereby aggregate
> values which then get consumed by unchanged metric logic in the shadow
> stat output. Note, it would be preferential to switch to json metrics.
> 
> Reported-by: Andi Kleen <ak@linux.intel.com>
> Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
> Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> The code isn't well tested nor does it support non-leader sampling
> reading of counts based on periods that seemed to present in the
> previous code. Sending out for the sake of discussion. Andi's changes
> added a test and that should certainly be added.
> ---
>   tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
>   1 file changed, 93 insertions(+), 21 deletions(-)
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-23 14:41   ` James Clark
@ 2024-07-23 14:57     ` Ian Rogers
  2024-07-23 15:26       ` Andi Kleen
  2024-07-23 15:26       ` James Clark
  0 siblings, 2 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-23 14:57 UTC (permalink / raw)
  To: James Clark
  Cc: Andi Kleen, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, linux-perf-users, linux-kernel,
	Athira Rajeev

On Tue, Jul 23, 2024 at 7:41 AM James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 20/07/2024 8:45 am, Ian Rogers wrote:
> > Andi Kleen reported a regression where `perf script +F metric` would
> > crash. With this change the output is:
> >
> > ```
> > $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
> >
> >        21.229620 GB/sec
> >
> >        15.751008 GB/sec
> >
> >        16.009221 GB/sec
> > [ perf record: Woken up 1 times to write data ]
> > [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
> > $ perf --no-pager script -F +metric
> >              perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> >              perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
> >              perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> >              perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
> > ...
> > ```
>
> For some reason I only get the metric: lines when I record with -a. I
> noticed this because Andi's test doesn't use -a so it fails.
>
> I'm not sure if that's expected or it's related to your disclaimer below?

It is. When you don't do -a the cpu map just contains -1 and for some
reason it is busted. The whole indirections to arrays of arrays,
counts, stats, aggregations, with indices into various other arrays
and a lack of helpers. The code works for perf stat, but there is a
lot of complexity that I don't fully grok in that. Here I've tried to
kind of break down what the code is trying to do in the comments, but
the old code never did sample_read_group__for_each so was is broken
with leader sampling? Is the leader sampling pretending the read
counts are periods and calling process sample multiple times. Andi
likely knows this code better than me so I was hoping he could fix it
up. We may want to take the patches anyway in order to not have a
segv.

Thanks,
Ian

> >
> > The change fixes perf script to update counts and thereby aggregate
> > values which then get consumed by unchanged metric logic in the shadow
> > stat output. Note, it would be preferential to switch to json metrics.
> >
> > Reported-by: Andi Kleen <ak@linux.intel.com>
> > Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
> > Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > The code isn't well tested nor does it support non-leader sampling
> > reading of counts based on periods that seemed to present in the
> > previous code. Sending out for the sake of discussion. Andi's changes
> > added a test and that should certainly be added.
> > ---
> >   tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
> >   1 file changed, 93 insertions(+), 21 deletions(-)
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-23 14:57     ` Ian Rogers
@ 2024-07-23 15:26       ` Andi Kleen
  2024-07-23 15:26       ` James Clark
  1 sibling, 0 replies; 10+ messages in thread
From: Andi Kleen @ 2024-07-23 15:26 UTC (permalink / raw)
  To: Ian Rogers
  Cc: James Clark, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Kan Liang,
	linux-perf-users, linux-kernel, Athira Rajeev

> > For some reason I only get the metric: lines when I record with -a. I
> > noticed this because Andi's test doesn't use -a so it fails.
> >
> > I'm not sure if that's expected or it's related to your disclaimer below?
> 
> It is. When you don't do -a the cpu map just contains -1 and for some

The intention was to make it work without -a

> reason it is busted. The whole indirections to arrays of arrays,

That's what I tried to fix earlier with my patch that remapped to index 0,
but it ran into the problem that it conflicted with the perf stat report setup.

The metrics code doesn't really need a map, all it wants to do is to
process the last contiguous group. 

That is at some point perhaps it would be nice to extend it to multiple
groups, but I wasn't trying to support that here.

> counts, stats, aggregations, with indices into various other arrays
> and a lack of helpers. The code works for perf stat, but there is a
> lot of complexity that I don't fully grok in that. Here I've tried to

I don't really know how we ended up with such convoluted code, but that's
a different issue.

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-23 14:57     ` Ian Rogers
  2024-07-23 15:26       ` Andi Kleen
@ 2024-07-23 15:26       ` James Clark
  2024-07-23 15:39         ` Andi Kleen
  1 sibling, 1 reply; 10+ messages in thread
From: James Clark @ 2024-07-23 15:26 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Andi Kleen, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, linux-perf-users, linux-kernel,
	Athira Rajeev



On 23/07/2024 3:57 pm, Ian Rogers wrote:
> On Tue, Jul 23, 2024 at 7:41 AM James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 20/07/2024 8:45 am, Ian Rogers wrote:
>>> Andi Kleen reported a regression where `perf script +F metric` would
>>> crash. With this change the output is:
>>>
>>> ```
>>> $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
>>>
>>>         21.229620 GB/sec
>>>
>>>         15.751008 GB/sec
>>>
>>>         16.009221 GB/sec
>>> [ perf record: Woken up 1 times to write data ]
>>> [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
>>> $ perf --no-pager script -F +metric
>>>               perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>>>               perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
>>>               perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>>>               perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
>>> ...
>>> ```
>>
>> For some reason I only get the metric: lines when I record with -a. I
>> noticed this because Andi's test doesn't use -a so it fails.
>>
>> I'm not sure if that's expected or it's related to your disclaimer below?
> 
> It is. When you don't do -a the cpu map just contains -1 and for some
> reason it is busted. The whole indirections to arrays of arrays,
> counts, stats, aggregations, with indices into various other arrays
> and a lack of helpers. The code works for perf stat, but there is a
> lot of complexity that I don't fully grok in that. Here I've tried to
> kind of break down what the code is trying to do in the comments, but
> the old code never did sample_read_group__for_each so was is broken
> with leader sampling? Is the leader sampling pretending the read
> counts are periods and calling process sample multiple times. Andi
> likely knows this code better than me so I was hoping he could fix it
> up. We may want to take the patches anyway in order to not have a
> segv.
> 
> Thanks,
> Ian
> 

Yeah I suppose it's strictly better now without the segfault. Could you 
pull in the test and update it to add -a? At least then that behavior 
will be locked down and we can extend it later without -a.

I also tested Andi's V5 and still got the segfault.

>>>
>>> The change fixes perf script to update counts and thereby aggregate
>>> values which then get consumed by unchanged metric logic in the shadow
>>> stat output. Note, it would be preferential to switch to json metrics.
>>>
>>> Reported-by: Andi Kleen <ak@linux.intel.com>
>>> Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
>>> Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>> ---
>>> The code isn't well tested nor does it support non-leader sampling
>>> reading of counts based on periods that seemed to present in the
>>> previous code. Sending out for the sake of discussion. Andi's changes
>>> added a test and that should certainly be added.
>>> ---
>>>    tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
>>>    1 file changed, 93 insertions(+), 21 deletions(-)
>>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-23 15:26       ` James Clark
@ 2024-07-23 15:39         ` Andi Kleen
  2024-07-23 15:49           ` Ian Rogers
  0 siblings, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2024-07-23 15:39 UTC (permalink / raw)
  To: James Clark
  Cc: Ian Rogers, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, linux-perf-users, linux-kernel,
	Athira Rajeev

> Yeah I suppose it's strictly better now without the segfault. Could you pull
> in the test and update it to add -a? At least then that behavior will be
> locked down and we can extend it later without -a.

Ian's patch really implements completely new functionality (supporting
metrics over multiple groups). It isn't a regression fix, but completely
redesigns the old "single group" feature.

> I also tested Andi's V5 and still got the segfault.

Yes the earlier versions worked, but they broke perf stat report.
I think the right short term fix is to use V3 or so, but with a
check that perf stat report isn't used.

Then perhaps the new functionality of multiple groups can be considered
over it.

-Andi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-23 15:39         ` Andi Kleen
@ 2024-07-23 15:49           ` Ian Rogers
  0 siblings, 0 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-23 15:49 UTC (permalink / raw)
  To: Andi Kleen
  Cc: James Clark, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Kan Liang,
	linux-perf-users, linux-kernel, Athira Rajeev

On Tue, Jul 23, 2024 at 8:40 AM Andi Kleen <ak@linux.intel.com> wrote:
>
> > Yeah I suppose it's strictly better now without the segfault. Could you pull
> > in the test and update it to add -a? At least then that behavior will be
> > locked down and we can extend it later without -a.
>
> Ian's patch really implements completely new functionality (supporting
> metrics over multiple groups). It isn't a regression fix, but completely
> redesigns the old "single group" feature.

I don't understand this, I don't understand "single group". I couldn't
understand the old logic using period. It is true in this patch and
others we've been trying to remove duplicated count values, such as
"saved_value" in shadow stats. Having a single value has led to things
being more robust, for example, counts getting reset in perf stat's
interval mode. The stat-display is still a mess, the stat-shadow code
is also still a mess with bugs in things like column header printing.
It won't get any better by having something as simple as a count
duplicated in at least 3 places.

> > I also tested Andi's V5 and still got the segfault.
>
> Yes the earlier versions worked, but they broke perf stat report.
> I think the right short term fix is to use V3 or so, but with a
> check that perf stat report isn't used.
>
> Then perhaps the new functionality of multiple groups can be considered
> over it.

I look forward to seeing the changes. I was trying to do a service
here as you were randomly jamming 0 in as an aggregation index and
hoping for some magic to come from it. The code here shows how things
can be wired up and it works, but just not in the way the original
stuff did because I didn't understand how the original stuff was
expecting to work. Feel free to ignore it and not see it merged.

Thanks,
Ian

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-20  7:45 ` [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling Ian Rogers
  2024-07-23 14:41   ` James Clark
@ 2024-07-26  0:05   ` Namhyung Kim
  2024-07-26  3:32     ` Ian Rogers
  1 sibling, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2024-07-26  0:05 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, linux-perf-users, linux-kernel, Andi Kleen,
	Athira Rajeev

Hi Ian,

On Sat, Jul 20, 2024 at 12:45:52AM -0700, Ian Rogers wrote:
> Andi Kleen reported a regression where `perf script +F metric` would
> crash. With this change the output is:
> 
> ```
> $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
> 
>       21.229620 GB/sec
> 
>       15.751008 GB/sec
> 
>       16.009221 GB/sec
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
> $ perf --no-pager script -F +metric
>             perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>             perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
>             perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
>             perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
> ...
> ```
> 
> The change fixes perf script to update counts and thereby aggregate
> values which then get consumed by unchanged metric logic in the shadow
> stat output. Note, it would be preferential to switch to json metrics.
> 
> Reported-by: Andi Kleen <ak@linux.intel.com>
> Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
> Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> The code isn't well tested nor does it support non-leader sampling
> reading of counts based on periods that seemed to present in the
> previous code. Sending out for the sake of discussion. Andi's changes
> added a test and that should certainly be added.

You don't need to handle leader sampling specially since it's already
done by evlist__deliver_sample().  It will call tool->sample() for each
event (with sample->period is set) when leader sampling is enabled.

> ---
>  tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
>  1 file changed, 93 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index c16224b1fef3..752d6219fb08 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -63,6 +63,7 @@
>  #include "util/util.h"
>  #include "util/cgroup.h"
>  #include "perf.h"
> +#include <internal/threadmap.h>
>  
>  #include <linux/ctype.h>
>  #ifdef HAVE_LIBTRACEEVENT
> @@ -334,16 +335,8 @@ struct evsel_script {
>         char *filename;
>         FILE *fp;
>         u64  samples;
> -       /* For metric output */
> -       u64  val;
> -       int  gnum;
>  };
>  
> -static inline struct evsel_script *evsel_script(struct evsel *evsel)
> -{
> -	return (struct evsel_script *)evsel->priv;
> -}
> -
>  static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
>  {
>  	struct evsel_script *es = zalloc(sizeof(*es));
> @@ -2107,6 +2100,12 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
>  	fputs("\tmetric: ", mctx->fp);
>  }
>  
> +static struct aggr_cpu_id perf_script__get_cpu(struct perf_stat_config *config __maybe_unused,
> +					struct perf_cpu cpu)
> +{
> +	return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
> +}
> +
>  static void perf_sample__fprint_metric(struct perf_script *script,
>  				       struct thread *thread,
>  				       struct evsel *evsel,
> @@ -2126,23 +2125,96 @@ static void perf_sample__fprint_metric(struct perf_script *script,
>  		.force_header = false,
>  	};
>  	struct evsel *ev2;
> -	u64 val;
> +	struct perf_cpu sample_cpu = { .cpu = sample->cpu, };
> +	int thread_idx, cpu_map_idx;
> +	u64 read_format = evsel->core.attr.read_format;
> +	int aggr_idx;
>  
> +	/* Only support leader sampling with a group of read events. */
> +	if ((read_format & PERF_FORMAT_GROUP) == 0)
> +		return;
> +
> +	/* Lazy initialization of stats values. */
>  	if (!evsel->stats)
>  		evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
> -	if (evsel_script(leader)->gnum++ == 0)
> -		perf_stat__reset_shadow_stats();
> -	val = sample->period * evsel->scale;
> -	evsel_script(evsel)->val = val;
> -	if (evsel_script(leader)->gnum == leader->core.nr_members) {
> -		for_each_group_member (ev2, leader) {
> -			perf_stat__print_shadow_stats(&stat_config, ev2,
> -						      evsel_script(ev2)->val,
> -						      sample->cpu,
> -						      &ctx,
> -						      NULL);
> +	if (!stat_config.aggr_map) {
> +		int nr_aggr;
> +
> +		stat_config.aggr_get_id = perf_script__get_cpu;

To support per-thread mode, I think we should check thread_map of the
evlist if it has actual PID.  And use the thread map index as an aggr
idx.

Thanks,
Namhyung


> +		stat_config.aggr_map =
> +			cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
> +					aggr_cpu_id__cpu,
> +					/*data=*/NULL,
> +					/*needs_sort=*/false);
> +		if (!stat_config.aggr_map) {
> +			pr_err("cannot allocate aggr map\n");
> +			return;
> +		}
> +		nr_aggr = stat_config.aggr_map->nrd;
> +		if (evlist__alloc_aggr_stats(evsel->evlist, nr_aggr) < 0) {
> +			pr_err("cannot allocate aggr counts\n");
> +			return;
>  		}
> -		evsel_script(leader)->gnum = 0;
> +	}
> +
> +	/* Add group counts from sample into appropriate evsel counts by id. */
> +	for_each_group_evsel(ev2, leader) {
> +		struct perf_thread_map *threads = perf_evsel__threads(&ev2->core);
> +		struct perf_cpu_map *cpus = evsel__cpus(ev2);
> +		int id_num = 0;
> +		bool match = false;
> +
> +		perf_cpu_map__for_each_idx(cpu_map_idx, cpus) {
> +			for (thread_idx = 0; thread_idx < threads->nr; thread_idx++) {
> +				struct sample_read_value *value = sample->read.group.values;
> +				u64 id = ev2->core.id[id_num++];
> +
> +				sample_read_group__for_each(value, sample->read.group.nr,
> +							    read_format) {
> +					struct perf_counts_values *counts;
> +
> +					if (value->id != id)
> +						continue;
> +
> +					counts = perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> +					counts->val += value->value;
> +					/*
> +					 * Ensure the enabled/running time isn't
> +					 * 0, which implies an error.
> +					 */
> +					counts->ena += sample->read.time_enabled ?: sample->period;
> +					counts->run += sample->read.time_running ?: sample->period;
> +					match = true;
> +				}
> +			}
> +		}
> +		if (match) {
> +			/* Update the aggreate count in ev2. */
> +			perf_stat_process_counter(&stat_config, ev2);
> +		}
> +	}
> +
> +	/* Find the appropriate indices for dumping of this sample. */
> +	thread_idx = perf_thread_map__idx(perf_evsel__threads(&evsel->core),
> +					thread__pid(thread));
> +	cpu_map_idx = perf_cpu_map__idx(evsel__cpus(evsel), sample_cpu);
> +	if (thread_idx == -1 || cpu_map_idx == -1)
> +		return;
> +
> +	cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
> +		if (stat_config.aggr_map->map[aggr_idx].cpu.cpu == sample_cpu.cpu)
> +			break;
> +	}
> +	/* Iterate all events and the leader of the group, trying to print stats. */
> +	for_each_group_evsel(ev2, leader) {
> +		struct perf_counts_values *counts =
> +			perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> +
> +		if (!counts)
> +			continue;
> +
> +		perf_stat__print_shadow_stats(&stat_config, ev2, counts->val * ev2->scale,
> +					      aggr_idx, &ctx, NULL);
>  	}
>  }
>  
> -- 
> 2.45.2.1089.g2a221341d9-goog
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling
  2024-07-26  0:05   ` Namhyung Kim
@ 2024-07-26  3:32     ` Ian Rogers
  0 siblings, 0 replies; 10+ messages in thread
From: Ian Rogers @ 2024-07-26  3:32 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, linux-perf-users, linux-kernel, Andi Kleen,
	Athira Rajeev

On Thu, Jul 25, 2024 at 5:05 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hi Ian,
>
> On Sat, Jul 20, 2024 at 12:45:52AM -0700, Ian Rogers wrote:
> > Andi Kleen reported a regression where `perf script +F metric` would
> > crash. With this change the output is:
> >
> > ```
> > $ perf record -a -e '{cycles,instructions}:S' perf bench mem memcpy
> >
> >       21.229620 GB/sec
> >
> >       15.751008 GB/sec
> >
> >       16.009221 GB/sec
> > [ perf record: Woken up 1 times to write data ]
> > [ perf record: Captured and wrote 1.945 MB perf.data (294 samples) ]
> > $ perf --no-pager script -F +metric
> >             perf 1912464 [000] 814503.473101:       6325       cycles:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> >             perf 1912464 [000] 814503.473101:   metric:    0.06  insn per cycle
> >             perf 1912464 [000] 814503.473101:        351 instructions:  ffffffff8548d64a native_write_msr+0xa ([kernel.kallsyms])
> >             perf 1912464 [000] 814503.473101:   metric:    0.03  insn per cycle
> > ...
> > ```
> >
> > The change fixes perf script to update counts and thereby aggregate
> > values which then get consumed by unchanged metric logic in the shadow
> > stat output. Note, it would be preferential to switch to json metrics.
> >
> > Reported-by: Andi Kleen <ak@linux.intel.com>
> > Closes: https://lore.kernel.org/linux-perf-users/20240713155443.1665378-1-ak@linux.intel.com/
> > Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value")'
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > The code isn't well tested nor does it support non-leader sampling
> > reading of counts based on periods that seemed to present in the
> > previous code. Sending out for the sake of discussion. Andi's changes
> > added a test and that should certainly be added.
>
> You don't need to handle leader sampling specially since it's already
> done by evlist__deliver_sample().  It will call tool->sample() for each
> event (with sample->period is set) when leader sampling is enabled.

Right, but you need to synthesize a count value then rather than just
use the count from the group. Counts always increase while periods may
decrease. If you add a period to a count, what about overflow? Sounds
ripe for ubsan failures.

> > ---
> >  tools/perf/builtin-script.c | 114 +++++++++++++++++++++++++++++-------
> >  1 file changed, 93 insertions(+), 21 deletions(-)
> >
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index c16224b1fef3..752d6219fb08 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -63,6 +63,7 @@
> >  #include "util/util.h"
> >  #include "util/cgroup.h"
> >  #include "perf.h"
> > +#include <internal/threadmap.h>
> >
> >  #include <linux/ctype.h>
> >  #ifdef HAVE_LIBTRACEEVENT
> > @@ -334,16 +335,8 @@ struct evsel_script {
> >         char *filename;
> >         FILE *fp;
> >         u64  samples;
> > -       /* For metric output */
> > -       u64  val;
> > -       int  gnum;
> >  };
> >
> > -static inline struct evsel_script *evsel_script(struct evsel *evsel)
> > -{
> > -     return (struct evsel_script *)evsel->priv;
> > -}
> > -
> >  static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
> >  {
> >       struct evsel_script *es = zalloc(sizeof(*es));
> > @@ -2107,6 +2100,12 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
> >       fputs("\tmetric: ", mctx->fp);
> >  }
> >
> > +static struct aggr_cpu_id perf_script__get_cpu(struct perf_stat_config *config __maybe_unused,
> > +                                     struct perf_cpu cpu)
> > +{
> > +     return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
> > +}
> > +
> >  static void perf_sample__fprint_metric(struct perf_script *script,
> >                                      struct thread *thread,
> >                                      struct evsel *evsel,
> > @@ -2126,23 +2125,96 @@ static void perf_sample__fprint_metric(struct perf_script *script,
> >               .force_header = false,
> >       };
> >       struct evsel *ev2;
> > -     u64 val;
> > +     struct perf_cpu sample_cpu = { .cpu = sample->cpu, };
> > +     int thread_idx, cpu_map_idx;
> > +     u64 read_format = evsel->core.attr.read_format;
> > +     int aggr_idx;
> >
> > +     /* Only support leader sampling with a group of read events. */
> > +     if ((read_format & PERF_FORMAT_GROUP) == 0)
> > +             return;
> > +
> > +     /* Lazy initialization of stats values. */
> >       if (!evsel->stats)
> >               evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
> > -     if (evsel_script(leader)->gnum++ == 0)
> > -             perf_stat__reset_shadow_stats();
> > -     val = sample->period * evsel->scale;
> > -     evsel_script(evsel)->val = val;
> > -     if (evsel_script(leader)->gnum == leader->core.nr_members) {
> > -             for_each_group_member (ev2, leader) {
> > -                     perf_stat__print_shadow_stats(&stat_config, ev2,
> > -                                                   evsel_script(ev2)->val,
> > -                                                   sample->cpu,
> > -                                                   &ctx,
> > -                                                   NULL);
> > +     if (!stat_config.aggr_map) {
> > +             int nr_aggr;
> > +
> > +             stat_config.aggr_get_id = perf_script__get_cpu;
>
> To support per-thread mode, I think we should check thread_map of the
> evlist if it has actual PID.  And use the thread map index as an aggr
> idx.

Right, I wanted to send something out that was somewhat sensibly using
the aggregation and using perf_stat_process_counter that updates it.
The blamed patch was part of a series of cleaning up aggregation (by
me and Namhyung) that got rid of in stat shadow the duplicate
saved_value aggregation and switched to use the common aggregation
code. Andi's code brute forces the aggregation code making seemingly
questionable assumptions all over the place. I wanted to show that
needn't be the case.

Fwiw, I don't intend to send a v2 of this patch series. I'm perfectly
happy with the idea of "-F metric" just being deleted from perf
script.

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > +             stat_config.aggr_map =
> > +                     cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
> > +                                     aggr_cpu_id__cpu,
> > +                                     /*data=*/NULL,
> > +                                     /*needs_sort=*/false);
> > +             if (!stat_config.aggr_map) {
> > +                     pr_err("cannot allocate aggr map\n");
> > +                     return;
> > +             }
> > +             nr_aggr = stat_config.aggr_map->nrd;
> > +             if (evlist__alloc_aggr_stats(evsel->evlist, nr_aggr) < 0) {
> > +                     pr_err("cannot allocate aggr counts\n");
> > +                     return;
> >               }
> > -             evsel_script(leader)->gnum = 0;
> > +     }
> > +
> > +     /* Add group counts from sample into appropriate evsel counts by id. */
> > +     for_each_group_evsel(ev2, leader) {
> > +             struct perf_thread_map *threads = perf_evsel__threads(&ev2->core);
> > +             struct perf_cpu_map *cpus = evsel__cpus(ev2);
> > +             int id_num = 0;
> > +             bool match = false;
> > +
> > +             perf_cpu_map__for_each_idx(cpu_map_idx, cpus) {
> > +                     for (thread_idx = 0; thread_idx < threads->nr; thread_idx++) {
> > +                             struct sample_read_value *value = sample->read.group.values;
> > +                             u64 id = ev2->core.id[id_num++];
> > +
> > +                             sample_read_group__for_each(value, sample->read.group.nr,
> > +                                                         read_format) {
> > +                                     struct perf_counts_values *counts;
> > +
> > +                                     if (value->id != id)
> > +                                             continue;
> > +
> > +                                     counts = perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> > +                                     counts->val += value->value;
> > +                                     /*
> > +                                      * Ensure the enabled/running time isn't
> > +                                      * 0, which implies an error.
> > +                                      */
> > +                                     counts->ena += sample->read.time_enabled ?: sample->period;
> > +                                     counts->run += sample->read.time_running ?: sample->period;
> > +                                     match = true;
> > +                             }
> > +                     }
> > +             }
> > +             if (match) {
> > +                     /* Update the aggreate count in ev2. */
> > +                     perf_stat_process_counter(&stat_config, ev2);
> > +             }
> > +     }
> > +
> > +     /* Find the appropriate indices for dumping of this sample. */
> > +     thread_idx = perf_thread_map__idx(perf_evsel__threads(&evsel->core),
> > +                                     thread__pid(thread));
> > +     cpu_map_idx = perf_cpu_map__idx(evsel__cpus(evsel), sample_cpu);
> > +     if (thread_idx == -1 || cpu_map_idx == -1)
> > +             return;
> > +
> > +     cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
> > +             if (stat_config.aggr_map->map[aggr_idx].cpu.cpu == sample_cpu.cpu)
> > +                     break;
> > +     }
> > +     /* Iterate all events and the leader of the group, trying to print stats. */
> > +     for_each_group_evsel(ev2, leader) {
> > +             struct perf_counts_values *counts =
> > +                     perf_counts(ev2->counts, cpu_map_idx, thread_idx);
> > +
> > +             if (!counts)
> > +                     continue;
> > +
> > +             perf_stat__print_shadow_stats(&stat_config, ev2, counts->val * ev2->scale,
> > +                                           aggr_idx, &ctx, NULL);
> >       }
> >  }
> >
> > --
> > 2.45.2.1089.g2a221341d9-goog
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-07-26  3:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-20  7:45 [PATCH v1 1/2] libperf threadmap: Add ability to find index from pid Ian Rogers
2024-07-20  7:45 ` [PATCH v1 2/2] perf script: Fix for `perf script +F metric` with leader sampling Ian Rogers
2024-07-23 14:41   ` James Clark
2024-07-23 14:57     ` Ian Rogers
2024-07-23 15:26       ` Andi Kleen
2024-07-23 15:26       ` James Clark
2024-07-23 15:39         ` Andi Kleen
2024-07-23 15:49           ` Ian Rogers
2024-07-26  0:05   ` Namhyung Kim
2024-07-26  3:32     ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).