* [PATCH v3 1/5] perf cpumap: Don't decrement refcnt on args to merge
2022-04-08 3:56 [PATCH v3 0/5] Make evlist CPUs more accurate Ian Rogers
@ 2022-04-08 3:56 ` Ian Rogers
2022-04-08 3:56 ` [PATCH v3 2/5] perf tests: Additional cpumap merge tests Ian Rogers
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2022-04-08 3:56 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Mathieu Poirier, Suzuki K Poulose, Mike Leach, Leo Yan,
John Garry, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Kajol Jain, James Clark, German Gomez,
Adrian Hunter, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
Cc: Stephane Eranian, Ian Rogers
Having one argument to the cpumap merge decremented but not the other
leads to an inconsistent API. Don't decrement either argument.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/lib/perf/cpumap.c | 11 +++--------
tools/lib/perf/evlist.c | 6 +++++-
tools/perf/tests/cpumap.c | 1 +
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 384d5e076ee4..95c56e17241b 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -342,9 +342,7 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
/*
* Merge two cpumaps
*
- * orig either gets freed and replaced with a new map, or reused
- * with no reference count change (similar to "realloc")
- * other has its reference count increased.
+ * May reuse either orig or other bumping reference count accordingly.
*/
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
@@ -356,11 +354,9 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *merged;
if (perf_cpu_map__is_subset(orig, other))
- return orig;
- if (perf_cpu_map__is_subset(other, orig)) {
- perf_cpu_map__put(orig);
+ return perf_cpu_map__get(orig);
+ if (perf_cpu_map__is_subset(other, orig))
return perf_cpu_map__get(other);
- }
tmp_len = orig->nr + other->nr;
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
@@ -387,6 +383,5 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
merged = cpu_map__trim_new(k, tmp_cpus);
free(tmp_cpus);
- perf_cpu_map__put(orig);
return merged;
}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 1b15ba13c477..b783249a038b 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -35,6 +35,8 @@ void perf_evlist__init(struct perf_evlist *evlist)
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
struct perf_evsel *evsel)
{
+ struct perf_cpu_map *tmp;
+
/*
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
@@ -52,7 +54,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
- evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+ tmp = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+ perf_cpu_map__put(evlist->all_cpus);
+ evlist->all_cpus = tmp;
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index f94929ebb54b..cf205ed6b158 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -133,6 +133,7 @@ static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subte
TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(c) == 5);
cpu_map__snprint(c, buf, sizeof(buf));
TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
+ perf_cpu_map__put(a);
perf_cpu_map__put(b);
perf_cpu_map__put(c);
return 0;
--
2.35.1.1178.g4f1659d476-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 2/5] perf tests: Additional cpumap merge tests
2022-04-08 3:56 [PATCH v3 0/5] Make evlist CPUs more accurate Ian Rogers
2022-04-08 3:56 ` [PATCH v3 1/5] perf cpumap: Don't decrement refcnt on args to merge Ian Rogers
@ 2022-04-08 3:56 ` Ian Rogers
2022-04-08 3:56 ` [PATCH v3 3/5] perf cpumap: Add intersect function Ian Rogers
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2022-04-08 3:56 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Mathieu Poirier, Suzuki K Poulose, Mike Leach, Leo Yan,
John Garry, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Kajol Jain, James Clark, German Gomez,
Adrian Hunter, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
Cc: Stephane Eranian, Ian Rogers
Cover cases where one cpu map is a subset of the other.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/tests/cpumap.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index cf205ed6b158..3b9fc549d30b 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -123,22 +123,36 @@ static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subte
return 0;
}
-static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+static int __test__cpu_map_merge(const char *lhs, const char *rhs, int nr, const char *expected)
{
- struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
- struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
+ struct perf_cpu_map *a = perf_cpu_map__new(lhs);
+ struct perf_cpu_map *b = perf_cpu_map__new(rhs);
struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
char buf[100];
- TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(c) == 5);
+ TEST_ASSERT_EQUAL("failed to merge map: bad nr", perf_cpu_map__nr(c), nr);
cpu_map__snprint(c, buf, sizeof(buf));
- TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
+ TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, expected));
perf_cpu_map__put(a);
perf_cpu_map__put(b);
perf_cpu_map__put(c);
return 0;
}
+static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = __test__cpu_map_merge("4,2,1", "4,5,7", 5, "1-2,4-5,7");
+ if (ret) return ret;
+ ret = __test__cpu_map_merge("4,2,1", "1", 3, "1-2,4");
+ if (ret) return ret;
+ ret = __test__cpu_map_merge("1", "4,2,1", 3, "1-2,4");
+ if (ret) return ret;
+ ret = __test__cpu_map_merge("1", "1", 1, "1");
+ return ret;
+}
+
DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
DEFINE_SUITE("Print cpu map", cpu_map_print);
DEFINE_SUITE("Merge cpu map", cpu_map_merge);
--
2.35.1.1178.g4f1659d476-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 3/5] perf cpumap: Add intersect function.
2022-04-08 3:56 [PATCH v3 0/5] Make evlist CPUs more accurate Ian Rogers
2022-04-08 3:56 ` [PATCH v3 1/5] perf cpumap: Don't decrement refcnt on args to merge Ian Rogers
2022-04-08 3:56 ` [PATCH v3 2/5] perf tests: Additional cpumap merge tests Ian Rogers
@ 2022-04-08 3:56 ` Ian Rogers
2022-04-08 3:56 ` [PATCH v3 4/5] perf evlist: Respect all_cpus when setting user_requested_cpus Ian Rogers
2022-04-08 3:56 ` [PATCH v3 5/5] perf test: Combine cpu map tests into 1 suite Ian Rogers
4 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2022-04-08 3:56 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Mathieu Poirier, Suzuki K Poulose, Mike Leach, Leo Yan,
John Garry, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Kajol Jain, James Clark, German Gomez,
Adrian Hunter, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
Cc: Stephane Eranian, Ian Rogers
The merge function gives the union of two cpu maps. Add an intersect
function which will be used in the next change.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/lib/perf/cpumap.c | 35 ++++++++++++++++++++++++++++
tools/lib/perf/include/perf/cpumap.h | 2 ++
tools/perf/tests/builtin-test.c | 1 +
tools/perf/tests/cpumap.c | 35 ++++++++++++++++++++++++++++
tools/perf/tests/tests.h | 1 +
5 files changed, 74 insertions(+)
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 95c56e17241b..66371135e742 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -385,3 +385,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
free(tmp_cpus);
return merged;
}
+
+struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ struct perf_cpu *tmp_cpus;
+ int tmp_len;
+ int i, j, k;
+ struct perf_cpu_map *merged = NULL;
+
+ if (perf_cpu_map__is_subset(other, orig))
+ return perf_cpu_map__get(orig);
+ if (perf_cpu_map__is_subset(orig, other))
+ return perf_cpu_map__get(other);
+
+ tmp_len = max(orig->nr, other->nr);
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
+ if (!tmp_cpus)
+ return NULL;
+
+ i = j = k = 0;
+ while (i < orig->nr && j < other->nr) {
+ if (orig->map[i].cpu < other->map[j].cpu)
+ i++;
+ else if (orig->map[i].cpu > other->map[j].cpu)
+ j++;
+ else {
+ j++;
+ tmp_cpus[k++] = orig->map[i++];
+ }
+ }
+ if (k)
+ merged = cpu_map__trim_new(k, tmp_cpus);
+ free(tmp_cpus);
+ return merged;
+}
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 4a2edbdb5e2b..a2a7216c0b78 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -19,6 +19,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index fac3717d9ba1..dffa41e7ee20 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -88,6 +88,7 @@ static struct test_suite *generic_tests[] = {
&suite__backward_ring_buffer,
&suite__cpu_map_print,
&suite__cpu_map_merge,
+ &suite__cpu_map_intersect,
&suite__sdt_event,
&suite__is_printable_array,
&suite__bitmap_print,
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 3b9fc549d30b..112331829414 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -153,6 +153,41 @@ static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subte
return ret;
}
+static int __test__cpu_map_intersect(const char *lhs, const char *rhs, int nr, const char *expected)
+{
+ struct perf_cpu_map *a = perf_cpu_map__new(lhs);
+ struct perf_cpu_map *b = perf_cpu_map__new(rhs);
+ struct perf_cpu_map *c = perf_cpu_map__intersect(a, b);
+ char buf[100];
+
+ TEST_ASSERT_EQUAL("failed to intersect map: bad nr", perf_cpu_map__nr(c), nr);
+ cpu_map__snprint(c, buf, sizeof(buf));
+ TEST_ASSERT_VAL("failed to intersect map: bad result", !strcmp(buf, expected));
+ perf_cpu_map__put(a);
+ perf_cpu_map__put(b);
+ perf_cpu_map__put(c);
+ return 0;
+}
+
+static int test__cpu_map_intersect(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = __test__cpu_map_intersect("4,2,1", "4,5,7", 1, "4");
+ if (ret) return ret;
+ ret = __test__cpu_map_intersect("1-8", "6-9", 3, "6-8");
+ if (ret) return ret;
+ ret = __test__cpu_map_intersect("1-8,12-20", "6-9,15", 4, "6-8,15");
+ if (ret) return ret;
+ ret = __test__cpu_map_intersect("4,2,1", "1", 1, "1");
+ if (ret) return ret;
+ ret = __test__cpu_map_intersect("1", "4,2,1", 1, "1");
+ if (ret) return ret;
+ ret = __test__cpu_map_intersect("1", "1", 1, "1");
+ return ret;
+}
+
DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
DEFINE_SUITE("Print cpu map", cpu_map_print);
DEFINE_SUITE("Merge cpu map", cpu_map_merge);
+DEFINE_SUITE("Intersect cpu map", cpu_map_intersect);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 5bbb8f6a48fc..f2823c4859b8 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -127,6 +127,7 @@ DECLARE_SUITE(event_times);
DECLARE_SUITE(backward_ring_buffer);
DECLARE_SUITE(cpu_map_print);
DECLARE_SUITE(cpu_map_merge);
+DECLARE_SUITE(cpu_map_intersect);
DECLARE_SUITE(sdt_event);
DECLARE_SUITE(is_printable_array);
DECLARE_SUITE(bitmap_print);
--
2.35.1.1178.g4f1659d476-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 4/5] perf evlist: Respect all_cpus when setting user_requested_cpus
2022-04-08 3:56 [PATCH v3 0/5] Make evlist CPUs more accurate Ian Rogers
` (2 preceding siblings ...)
2022-04-08 3:56 ` [PATCH v3 3/5] perf cpumap: Add intersect function Ian Rogers
@ 2022-04-08 3:56 ` Ian Rogers
2022-04-28 20:15 ` Adrian Hunter
2022-04-08 3:56 ` [PATCH v3 5/5] perf test: Combine cpu map tests into 1 suite Ian Rogers
4 siblings, 1 reply; 9+ messages in thread
From: Ian Rogers @ 2022-04-08 3:56 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Mathieu Poirier, Suzuki K Poulose, Mike Leach, Leo Yan,
John Garry, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Kajol Jain, James Clark, German Gomez,
Adrian Hunter, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
Cc: Stephane Eranian, Ian Rogers
If all_cpus is calculated it represents the merge/union of all
evsel cpu maps. By default user_requested_cpus is computed to be
the online CPUs. For uncore events, it is often the case currently
that all_cpus is a subset of user_requested_cpus. Metrics printed
without aggregation and with metric-only, in print_no_aggr_metric,
iterate over user_requested_cpus assuming every CPU has a metric to
print. For each CPU the prefix is printed, but then if the
evsel's cpus doesn't contain anything you get an empty line like
the following on a 2 socket 36 core SkylakeX:
```
$ perf stat -A -M DRAM_BW_Use -a --metric-only -I 1000
1.000453137 CPU0 0.00
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137 CPU18 0.00
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
1.000453137
2.003717143 CPU0 0.00
...
```
While it is possible to be lazier in printing the prefix and
trailing newline, having user_requested_cpus not be a subset of
all_cpus is preferential so that wasted work isn't done elsewhere
user_requested_cpus is used. The change modifies user_requested_cpus
to be the intersection of user specified CPUs, or default all online
CPUs, with the CPUs computed through the merge of all evsel cpu maps.
New behavior:
```
$ perf stat -A -M DRAM_BW_Use -a --metric-only -I 1000
1.001086325 CPU0 0.00
1.001086325 CPU18 0.00
2.003671291 CPU0 0.00
2.003671291 CPU18 0.00
...
```
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/evlist.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 52ea004ba01e..196d57b905a0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1036,6 +1036,13 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
if (!cpus)
goto out_delete_threads;
+ if (evlist->core.all_cpus) {
+ struct perf_cpu_map *tmp;
+
+ tmp = perf_cpu_map__intersect(cpus, evlist->core.all_cpus);
+ perf_cpu_map__put(cpus);
+ cpus = tmp;
+ }
evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
perf_evlist__set_maps(&evlist->core, cpus, threads);
--
2.35.1.1178.g4f1659d476-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 4/5] perf evlist: Respect all_cpus when setting user_requested_cpus
2022-04-08 3:56 ` [PATCH v3 4/5] perf evlist: Respect all_cpus when setting user_requested_cpus Ian Rogers
@ 2022-04-28 20:15 ` Adrian Hunter
[not found] ` <CAP-5=fVNuQDW+yge897RjaWfE3cfQTD4ufFws6PS2k99Qe05Uw@mail.gmail.com>
0 siblings, 1 reply; 9+ messages in thread
From: Adrian Hunter @ 2022-04-28 20:15 UTC (permalink / raw)
To: Ian Rogers
Cc: Stephane Eranian, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Namhyung Kim, Mathieu Poirier, Suzuki K Poulose,
Mike Leach, Leo Yan, John Garry, Will Deacon, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
Yonghong Song, John Fastabend, KP Singh, Kajol Jain, James Clark,
German Gomez, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
On 8/04/22 06:56, Ian Rogers wrote:
> If all_cpus is calculated it represents the merge/union of all
> evsel cpu maps. By default user_requested_cpus is computed to be
> the online CPUs. For uncore events, it is often the case currently
> that all_cpus is a subset of user_requested_cpus. Metrics printed
> without aggregation and with metric-only, in print_no_aggr_metric,
> iterate over user_requested_cpus assuming every CPU has a metric to
> print. For each CPU the prefix is printed, but then if the
> evsel's cpus doesn't contain anything you get an empty line like
> the following on a 2 socket 36 core SkylakeX:
>
> ```
> $ perf stat -A -M DRAM_BW_Use -a --metric-only -I 1000
> 1.000453137 CPU0 0.00
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137 CPU18 0.00
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 1.000453137
> 2.003717143 CPU0 0.00
> ...
> ```
>
> While it is possible to be lazier in printing the prefix and
> trailing newline, having user_requested_cpus not be a subset of
> all_cpus is preferential so that wasted work isn't done elsewhere
> user_requested_cpus is used. The change modifies user_requested_cpus
> to be the intersection of user specified CPUs, or default all online
> CPUs, with the CPUs computed through the merge of all evsel cpu maps.
>
> New behavior:
> ```
> $ perf stat -A -M DRAM_BW_Use -a --metric-only -I 1000
> 1.001086325 CPU0 0.00
> 1.001086325 CPU18 0.00
> 2.003671291 CPU0 0.00
> 2.003671291 CPU18 0.00
> ...
> ```
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/evlist.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 52ea004ba01e..196d57b905a0 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1036,6 +1036,13 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
> if (!cpus)
> goto out_delete_threads;
>
> + if (evlist->core.all_cpus) {
> + struct perf_cpu_map *tmp;
> +
> + tmp = perf_cpu_map__intersect(cpus, evlist->core.all_cpus);
Isn't an uncore PMU represented as being on CPU0 actually
collecting data that can be due to any CPU.
Or for an uncore PMU represented as being on CPU0-CPU4 on a
4 core 8 hyperthread processor, actually 1 PMU per core.
So I am not sure intersection makes sense.
Also it is not obvious what happens with hybrid CPUs or
per thread recording.
> + perf_cpu_map__put(cpus);
> + cpus = tmp;
> + }
> evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid;
>
> perf_evlist__set_maps(&evlist->core, cpus, threads);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 5/5] perf test: Combine cpu map tests into 1 suite
2022-04-08 3:56 [PATCH v3 0/5] Make evlist CPUs more accurate Ian Rogers
` (3 preceding siblings ...)
2022-04-08 3:56 ` [PATCH v3 4/5] perf evlist: Respect all_cpus when setting user_requested_cpus Ian Rogers
@ 2022-04-08 3:56 ` Ian Rogers
4 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2022-04-08 3:56 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Mathieu Poirier, Suzuki K Poulose, Mike Leach, Leo Yan,
John Garry, Will Deacon, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Kajol Jain, James Clark, German Gomez,
Adrian Hunter, Riccardo Mancini, Andi Kleen, Alexey Bayduraev,
Alexander Antonov, linux-perf-users, linux-kernel, coresight,
linux-arm-kernel, netdev, bpf
Cc: Stephane Eranian, Ian Rogers
Combine related CPU map tests into 1 suite reducing global state.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/tests/builtin-test.c | 5 +----
tools/perf/tests/cpumap.c | 16 ++++++++++++----
tools/perf/tests/tests.h | 5 +----
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index dffa41e7ee20..1941ae52e8b6 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -79,16 +79,13 @@ static struct test_suite *generic_tests[] = {
&suite__bpf,
&suite__thread_map_synthesize,
&suite__thread_map_remove,
- &suite__cpu_map_synthesize,
+ &suite__cpu_map,
&suite__synthesize_stat_config,
&suite__synthesize_stat,
&suite__synthesize_stat_round,
&suite__event_update,
&suite__event_times,
&suite__backward_ring_buffer,
- &suite__cpu_map_print,
- &suite__cpu_map_merge,
- &suite__cpu_map_intersect,
&suite__sdt_event,
&suite__is_printable_array,
&suite__bitmap_print,
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 112331829414..fc124757a082 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -187,7 +187,15 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused, int s
return ret;
}
-DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
-DEFINE_SUITE("Print cpu map", cpu_map_print);
-DEFINE_SUITE("Merge cpu map", cpu_map_merge);
-DEFINE_SUITE("Intersect cpu map", cpu_map_intersect);
+static struct test_case cpu_map_tests[] = {
+ TEST_CASE("Synthesize cpu map", cpu_map_synthesize),
+ TEST_CASE("Print cpu map", cpu_map_print),
+ TEST_CASE("Merge cpu map", cpu_map_merge),
+ TEST_CASE("Intersect cpu map", cpu_map_intersect),
+ { .name = NULL, }
+};
+
+struct test_suite suite__cpu_map = {
+ .desc = "CPU map",
+ .test_cases = cpu_map_tests,
+};
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index f2823c4859b8..895803fdedc4 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -118,16 +118,13 @@ DECLARE_SUITE(bpf);
DECLARE_SUITE(session_topology);
DECLARE_SUITE(thread_map_synthesize);
DECLARE_SUITE(thread_map_remove);
-DECLARE_SUITE(cpu_map_synthesize);
+DECLARE_SUITE(cpu_map);
DECLARE_SUITE(synthesize_stat_config);
DECLARE_SUITE(synthesize_stat);
DECLARE_SUITE(synthesize_stat_round);
DECLARE_SUITE(event_update);
DECLARE_SUITE(event_times);
DECLARE_SUITE(backward_ring_buffer);
-DECLARE_SUITE(cpu_map_print);
-DECLARE_SUITE(cpu_map_merge);
-DECLARE_SUITE(cpu_map_intersect);
DECLARE_SUITE(sdt_event);
DECLARE_SUITE(is_printable_array);
DECLARE_SUITE(bitmap_print);
--
2.35.1.1178.g4f1659d476-goog
^ permalink raw reply related [flat|nested] 9+ messages in thread