* [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes
@ 2023-05-23 4:44 Ian Rogers
2023-05-23 4:44 ` [PATCH v1 2/2] perf evsel: for_each_group fixes Ian Rogers
2023-05-23 13:00 ` [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Adrian Hunter
0 siblings, 2 replies; 8+ messages in thread
From: Ian Rogers @ 2023-05-23 4:44 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Ian Rogers, Adrian Hunter, Kan Liang, Sandipan Das, James Clark,
Dmitrii Dolgov, Changbin Du, Rob Herring, Xing Zhengjun,
linux-perf-users, linux-kernel
Previously the evsel__group_pmu_name would iterate the evsel's group,
however, the list of evsels aren't yet sorted and so the loop may
terminate prematurely. It is also not desirable to iterate the list of
evsels during list_sort as the list may be broken. Precompute the
group_pmu_name for the evsel before sorting, as part of the
computation and only if necessary, iterate the whole list looking for
group members so that being sorted isn't necessary.
Move the group pmu name computation to parse-events.c given the closer
dependency on the behavior of
parse_events__sort_events_and_fix_groups.
Fixes: 7abf0bccaaec ("perf evsel: Add function to compute group PMU name")
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/evsel.c | 31 +++++-----------------
tools/perf/util/evsel.h | 2 +-
tools/perf/util/parse-events.c | 47 ++++++++++++++++++++++++++++++----
3 files changed, 50 insertions(+), 30 deletions(-)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2f5910b31fa9..3247773f9e24 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -290,6 +290,7 @@ void evsel__init(struct evsel *evsel,
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->pmu_name = NULL;
+ evsel->group_pmu_name = NULL;
evsel->skippable = false;
}
@@ -431,6 +432,11 @@ struct evsel *evsel__clone(struct evsel *orig)
if (evsel->pmu_name == NULL)
goto out_err;
}
+ if (orig->group_pmu_name) {
+ evsel->group_pmu_name = strdup(orig->group_pmu_name);
+ if (evsel->group_pmu_name == NULL)
+ goto out_err;
+ }
if (orig->filter) {
evsel->filter = strdup(orig->filter);
if (evsel->filter == NULL)
@@ -827,30 +833,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
return !strcmp(evsel__name(evsel), name);
}
-const char *evsel__group_pmu_name(const struct evsel *evsel)
-{
- struct evsel *leader = evsel__leader(evsel);
- struct evsel *pos;
-
- /*
- * Software events may be in a group with other uncore PMU events. Use
- * the pmu_name of the first non-software event to avoid breaking the
- * software event out of the group.
- *
- * Aux event leaders, like intel_pt, expect a group with events from
- * other PMUs, so substitute the AUX event's PMU in this case.
- */
- if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
- /* Starting with the leader, find the first event with a named PMU. */
- for_each_group_evsel(pos, leader) {
- if (pos->pmu_name)
- return pos->pmu_name;
- }
- }
-
- return evsel->pmu_name ?: "cpu";
-}
-
const char *evsel__metric_id(const struct evsel *evsel)
{
if (evsel->metric_id)
@@ -1536,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
+ zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
zfree(&evsel->metric_id);
evsel__zero_per_pkg(evsel);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index df8928745fc6..820771a649b2 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -72,6 +72,7 @@ struct evsel {
char *name;
char *group_name;
const char *pmu_name;
+ const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *tp_format;
#endif
@@ -289,7 +290,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
const char *evsel__name(struct evsel *evsel);
bool evsel__name_is(struct evsel *evsel, const char *name);
-const char *evsel__group_pmu_name(const struct evsel *evsel);
const char *evsel__metric_id(const struct evsel *evsel);
static inline bool evsel__is_tool(const struct evsel *evsel)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 34ba840ae19a..210e6f713c6f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2125,6 +2125,41 @@ static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
return ret;
}
+static void evsel__compute_group_pmu_name(struct evsel *evsel,
+ const struct list_head *head)
+{
+ struct evsel *leader = evsel__leader(evsel);
+ struct evsel *pos;
+
+ /*
+ * Software events may be in a group with other uncore PMU events. Use
+ * the pmu_name of the first non-software event to avoid breaking the
+ * software event out of the group.
+ *
+ * Aux event leaders, like intel_pt, expect a group with events from
+ * other PMUs, so substitute the AUX event's PMU in this case.
+ */
+ if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
+ /*
+ * Starting with the leader, find the first event with a named
+ * PMU. for_each_group_(member|evsel) isn't used as the list
+ * isn't yet sorted putting evsel's in the same group together.
+ */
+ if (leader->pmu_name) {
+ evsel->group_pmu_name = strdup(leader->pmu_name);
+ return;
+ }
+ list_for_each_entry(pos, head, core.node) {
+ if (evsel__leader(pos) == leader && pos->pmu_name) {
+ evsel->group_pmu_name = strdup(pos->pmu_name);
+ return;
+ }
+ }
+ }
+
+ evsel->group_pmu_name = strdup(evsel->pmu_name ?: "cpu");
+}
+
__weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
{
/* Order by insertion index. */
@@ -2160,8 +2195,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
/* Group by PMU if there is a group. Groups can't span PMUs. */
if (lhs_has_group && rhs_has_group) {
- lhs_pmu_name = evsel__group_pmu_name(lhs);
- rhs_pmu_name = evsel__group_pmu_name(rhs);
+ lhs_pmu_name = lhs->group_pmu_name;
+ rhs_pmu_name = rhs->group_pmu_name;
ret = strcmp(lhs_pmu_name, rhs_pmu_name);
if (ret)
return ret;
@@ -2186,6 +2221,8 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
list_for_each_entry(pos, list, core.node) {
const struct evsel *pos_leader = evsel__leader(pos);
+ evsel__compute_group_pmu_name(pos, list);
+
if (pos == pos_leader)
orig_num_leaders++;
@@ -2210,7 +2247,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
idx = 0;
list_for_each_entry(pos, list, core.node) {
const struct evsel *pos_leader = evsel__leader(pos);
- const char *pos_pmu_name = evsel__group_pmu_name(pos);
+ const char *pos_pmu_name = pos->group_pmu_name;
const char *cur_leader_pmu_name, *pos_leader_pmu_name;
bool force_grouped = arch_evsel__must_be_in_group(pos);
@@ -2227,7 +2264,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
if (!cur_leader)
cur_leader = pos;
- cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
+ cur_leader_pmu_name = cur_leader->group_pmu_name;
if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
strcmp(cur_leader_pmu_name, pos_pmu_name)) {
/* Event is for a different group/PMU than last. */
@@ -2239,7 +2276,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
cur_leaders_grp = pos->core.leader;
}
- pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
+ pos_leader_pmu_name = pos_leader->group_pmu_name;
if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
/*
* Event's PMU differs from its leader's. Groups can't
--
2.40.1.698.g37aff9b760-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v1 2/2] perf evsel: for_each_group fixes
2023-05-23 4:44 [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Ian Rogers
@ 2023-05-23 4:44 ` Ian Rogers
2023-05-23 14:31 ` Adrian Hunter
2023-05-23 13:00 ` [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Adrian Hunter
1 sibling, 1 reply; 8+ messages in thread
From: Ian Rogers @ 2023-05-23 4:44 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Ian Rogers, Adrian Hunter, Kan Liang, Sandipan Das, James Clark,
Dmitrii Dolgov, Changbin Du, Rob Herring, Xing Zhengjun,
linux-perf-users, linux-kernel
Address/memory sanitizer was reporting issues in evsel__group_pmu_name
because the for_each_group_evsel loop didn't terminate when the head
was reached, the head would then be cast and accessed as an evsel
leading to invalid memory accesses. Fix for_each_group_member and
for_each_group_evsel to terminate at the list head. Note,
evsel__group_pmu_name no longer iterates the group, but the problem is
present regardless.
Fixes: 717e263fc354 ("perf report: Show group description when event group is enabled")
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/evsel.h | 24 ++++++++++++++++--------
tools/perf/util/evsel_fprintf.c | 1 +
2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 820771a649b2..6a64543c7612 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -462,16 +462,24 @@ static inline int evsel__group_idx(struct evsel *evsel)
}
/* Iterates group WITHOUT the leader. */
-#define for_each_group_member(_evsel, _leader) \
-for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
- (_evsel) && (_evsel)->core.leader == (&_leader->core); \
- (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+#define for_each_group_member_head(_evsel, _leader, _head) \
+for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
+ (_evsel) && (&(_evsel)->core.node != (_head)) && \
+ (_evsel)->core.leader == (&_leader->core); \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_member(_evsel, _leader) \
+ for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries)
/* Iterates group WITH the leader. */
-#define for_each_group_evsel(_evsel, _leader) \
-for ((_evsel) = _leader; \
- (_evsel) && (_evsel)->core.leader == (&_leader->core); \
- (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+#define for_each_group_evsel_head(_evsel, _leader, _head) \
+for ((_evsel) = _leader; \
+ (_evsel) && (&(_evsel)->core.node != (_head)) && \
+ (_evsel)->core.leader == (&_leader->core); \
+ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
+
+#define for_each_group_evsel(_evsel, _leader) \
+ for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries)
static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
{
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index cc80ec554c0a..036a2171dc1c 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -2,6 +2,7 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdbool.h>
+#include "util/evlist.h"
#include "evsel.h"
#include "util/evsel_fprintf.h"
#include "util/event.h"
--
2.40.1.698.g37aff9b760-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes
2023-05-23 4:44 [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Ian Rogers
2023-05-23 4:44 ` [PATCH v1 2/2] perf evsel: for_each_group fixes Ian Rogers
@ 2023-05-23 13:00 ` Adrian Hunter
2023-05-23 16:58 ` Ian Rogers
1 sibling, 1 reply; 8+ messages in thread
From: Adrian Hunter @ 2023-05-23 13:00 UTC (permalink / raw)
To: Ian Rogers, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On 23/05/23 07:44, Ian Rogers wrote:
> Previously the evsel__group_pmu_name would iterate the evsel's group,
> however, the list of evsels aren't yet sorted and so the loop may
> terminate prematurely. It is also not desirable to iterate the list of
> evsels during list_sort as the list may be broken. Precompute the
> group_pmu_name for the evsel before sorting, as part of the
> computation and only if necessary, iterate the whole list looking for
> group members so that being sorted isn't necessary.
>
> Move the group pmu name computation to parse-events.c given the closer
> dependency on the behavior of
> parse_events__sort_events_and_fix_groups.
>
> Fixes: 7abf0bccaaec ("perf evsel: Add function to compute group PMU name")
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/evsel.c | 31 +++++-----------------
> tools/perf/util/evsel.h | 2 +-
> tools/perf/util/parse-events.c | 47 ++++++++++++++++++++++++++++++----
> 3 files changed, 50 insertions(+), 30 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 2f5910b31fa9..3247773f9e24 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -290,6 +290,7 @@ void evsel__init(struct evsel *evsel,
> evsel->per_pkg_mask = NULL;
> evsel->collect_stat = false;
> evsel->pmu_name = NULL;
> + evsel->group_pmu_name = NULL;
> evsel->skippable = false;
> }
>
> @@ -431,6 +432,11 @@ struct evsel *evsel__clone(struct evsel *orig)
> if (evsel->pmu_name == NULL)
> goto out_err;
> }
> + if (orig->group_pmu_name) {
> + evsel->group_pmu_name = strdup(orig->group_pmu_name);
> + if (evsel->group_pmu_name == NULL)
> + goto out_err;
> + }
> if (orig->filter) {
> evsel->filter = strdup(orig->filter);
> if (evsel->filter == NULL)
> @@ -827,30 +833,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
> return !strcmp(evsel__name(evsel), name);
> }
>
> -const char *evsel__group_pmu_name(const struct evsel *evsel)
> -{
> - struct evsel *leader = evsel__leader(evsel);
> - struct evsel *pos;
> -
> - /*
> - * Software events may be in a group with other uncore PMU events. Use
> - * the pmu_name of the first non-software event to avoid breaking the
> - * software event out of the group.
> - *
> - * Aux event leaders, like intel_pt, expect a group with events from
> - * other PMUs, so substitute the AUX event's PMU in this case.
> - */
> - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> - /* Starting with the leader, find the first event with a named PMU. */
> - for_each_group_evsel(pos, leader) {
> - if (pos->pmu_name)
> - return pos->pmu_name;
> - }
> - }
> -
> - return evsel->pmu_name ?: "cpu";
> -}
> -
> const char *evsel__metric_id(const struct evsel *evsel)
> {
> if (evsel->metric_id)
> @@ -1536,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
> zfree(&evsel->group_name);
> zfree(&evsel->name);
> zfree(&evsel->pmu_name);
> + zfree(&evsel->group_pmu_name);
> zfree(&evsel->unit);
> zfree(&evsel->metric_id);
> evsel__zero_per_pkg(evsel);
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index df8928745fc6..820771a649b2 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -72,6 +72,7 @@ struct evsel {
> char *name;
> char *group_name;
> const char *pmu_name;
> + const char *group_pmu_name;
Since it seems to be only used when sorting, do we really
need this on struct evsel?
> #ifdef HAVE_LIBTRACEEVENT
> struct tep_event *tp_format;
> #endif
> @@ -289,7 +290,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
> int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
> const char *evsel__name(struct evsel *evsel);
> bool evsel__name_is(struct evsel *evsel, const char *name);
> -const char *evsel__group_pmu_name(const struct evsel *evsel);
> const char *evsel__metric_id(const struct evsel *evsel);
>
> static inline bool evsel__is_tool(const struct evsel *evsel)
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 34ba840ae19a..210e6f713c6f 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -2125,6 +2125,41 @@ static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
> return ret;
> }
>
> +static void evsel__compute_group_pmu_name(struct evsel *evsel,
> + const struct list_head *head)
> +{
> + struct evsel *leader = evsel__leader(evsel);
> + struct evsel *pos;
> +
> + /*
> + * Software events may be in a group with other uncore PMU events. Use
> + * the pmu_name of the first non-software event to avoid breaking the
> + * software event out of the group.
> + *
> + * Aux event leaders, like intel_pt, expect a group with events from
> + * other PMUs, so substitute the AUX event's PMU in this case.
> + */
> + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> + /*
> + * Starting with the leader, find the first event with a named
> + * PMU. for_each_group_(member|evsel) isn't used as the list
> + * isn't yet sorted putting evsel's in the same group together.
> + */
> + if (leader->pmu_name) {
> + evsel->group_pmu_name = strdup(leader->pmu_name);
> + return;
> + }
> + list_for_each_entry(pos, head, core.node) {
> + if (evsel__leader(pos) == leader && pos->pmu_name) {
> + evsel->group_pmu_name = strdup(pos->pmu_name);
> + return;
> + }
> + }
> + }
> +
> + evsel->group_pmu_name = strdup(evsel->pmu_name ?: "cpu");
> +}
> +
> __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> {
> /* Order by insertion index. */
> @@ -2160,8 +2195,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
>
> /* Group by PMU if there is a group. Groups can't span PMUs. */
> if (lhs_has_group && rhs_has_group) {
> - lhs_pmu_name = evsel__group_pmu_name(lhs);
> - rhs_pmu_name = evsel__group_pmu_name(rhs);
> + lhs_pmu_name = lhs->group_pmu_name;
> + rhs_pmu_name = rhs->group_pmu_name;
> ret = strcmp(lhs_pmu_name, rhs_pmu_name);
> if (ret)
> return ret;
> @@ -2186,6 +2221,8 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> list_for_each_entry(pos, list, core.node) {
> const struct evsel *pos_leader = evsel__leader(pos);
>
> + evsel__compute_group_pmu_name(pos, list);
Perhaps check for failing to allocate the string?
But alternatively, allocate an array for pointers to the
group pmu names. Then they don't needed to be strdup'ed,
or stored on evsel. Would have to count the number of evsel
first though.
group_pmu_names = calloc(nr_evsel, sizeof(const char *));
group_pmu_names[pos->core.idx] = evsel__group_pmu_name(pos);
> +
> if (pos == pos_leader)
> orig_num_leaders++;
>
> @@ -2210,7 +2247,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> idx = 0;
> list_for_each_entry(pos, list, core.node) {
> const struct evsel *pos_leader = evsel__leader(pos);
> - const char *pos_pmu_name = evsel__group_pmu_name(pos);
> + const char *pos_pmu_name = pos->group_pmu_name;
> const char *cur_leader_pmu_name, *pos_leader_pmu_name;
> bool force_grouped = arch_evsel__must_be_in_group(pos);
>
> @@ -2227,7 +2264,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> if (!cur_leader)
> cur_leader = pos;
>
> - cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
> + cur_leader_pmu_name = cur_leader->group_pmu_name;
> if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
> strcmp(cur_leader_pmu_name, pos_pmu_name)) {
> /* Event is for a different group/PMU than last. */
> @@ -2239,7 +2276,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> */
> cur_leaders_grp = pos->core.leader;
> }
> - pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
> + pos_leader_pmu_name = pos_leader->group_pmu_name;
> if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
> /*
> * Event's PMU differs from its leader's. Groups can't
By the way, do we really need unsorted_idx?
For example what about this for evlist__cmp():
static int evlist__cmp(void *state __maybe_unused, const struct list_head *l, const struct list_head *r)
{
const struct evsel *lhs = container_of(l, struct evsel, core.node);
const struct evsel *rhs = container_of(r, struct evsel, core.node);
int lhs_leader_idx = lhs->core.leader->idx;
int rhs_leader_idx = rhs->core.leader->idx;
int ret;
if (lhs_leader_idx != rhs_leader_idx)
return lhs_leader_idx - rhs_leader_idx;
ret = strcmp(evsel__group_pmu_name(lhs), evsel__group_pmu_name(rhs));
if (ret)
return ret;
/* Architecture specific sorting. */
return arch_evlist__cmp(lhs, rhs);
}
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 2/2] perf evsel: for_each_group fixes
2023-05-23 4:44 ` [PATCH v1 2/2] perf evsel: for_each_group fixes Ian Rogers
@ 2023-05-23 14:31 ` Adrian Hunter
2023-05-23 17:04 ` Ian Rogers
0 siblings, 1 reply; 8+ messages in thread
From: Adrian Hunter @ 2023-05-23 14:31 UTC (permalink / raw)
To: Ian Rogers, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On 23/05/23 07:44, Ian Rogers wrote:
> Address/memory sanitizer was reporting issues in evsel__group_pmu_name
> because the for_each_group_evsel loop didn't terminate when the head
> was reached, the head would then be cast and accessed as an evsel
> leading to invalid memory accesses. Fix for_each_group_member and
> for_each_group_evsel to terminate at the list head. Note,
> evsel__group_pmu_name no longer iterates the group, but the problem is
> present regardless.
>
> Fixes: 717e263fc354 ("perf report: Show group description when event group is enabled")
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/evsel.h | 24 ++++++++++++++++--------
> tools/perf/util/evsel_fprintf.c | 1 +
> 2 files changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 820771a649b2..6a64543c7612 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -462,16 +462,24 @@ static inline int evsel__group_idx(struct evsel *evsel)
> }
>
> /* Iterates group WITHOUT the leader. */
> -#define for_each_group_member(_evsel, _leader) \
> -for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
> - (_evsel) && (_evsel)->core.leader == (&_leader->core); \
> - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> +#define for_each_group_member_head(_evsel, _leader, _head) \
> +for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
> + (_evsel) && (&(_evsel)->core.node != (_head)) && \
Extra parentheses perhaps not needed e.g. just
&(_evsel)->core.node != (_head)
> + (_evsel)->core.leader == (&_leader->core); \
Parentheses look odd, maybe should be:
&(_leader)->core
> + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> +
> +#define for_each_group_member(_evsel, _leader) \
> + for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries)
Did you consider using (_leader)->core.nr_members so that it is not
necessary to assume the evlist back pointer is populated.
>
> /* Iterates group WITH the leader. */
> -#define for_each_group_evsel(_evsel, _leader) \
> -for ((_evsel) = _leader; \
> - (_evsel) && (_evsel)->core.leader == (&_leader->core); \
> - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> +#define for_each_group_evsel_head(_evsel, _leader, _head) \
> +for ((_evsel) = _leader; \
> + (_evsel) && (&(_evsel)->core.node != (_head)) && \
> + (_evsel)->core.leader == (&_leader->core); \
> + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> +
> +#define for_each_group_evsel(_evsel, _leader) \
> + for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries)
>
> static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
> {
> diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
> index cc80ec554c0a..036a2171dc1c 100644
> --- a/tools/perf/util/evsel_fprintf.c
> +++ b/tools/perf/util/evsel_fprintf.c
> @@ -2,6 +2,7 @@
> #include <inttypes.h>
> #include <stdio.h>
> #include <stdbool.h>
> +#include "util/evlist.h"
> #include "evsel.h"
> #include "util/evsel_fprintf.h"
> #include "util/event.h"
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes
2023-05-23 13:00 ` [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Adrian Hunter
@ 2023-05-23 16:58 ` Ian Rogers
2023-05-29 16:03 ` Adrian Hunter
0 siblings, 1 reply; 8+ messages in thread
From: Ian Rogers @ 2023-05-23 16:58 UTC (permalink / raw)
To: Adrian Hunter
Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On Tue, May 23, 2023 at 6:01 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> On 23/05/23 07:44, Ian Rogers wrote:
> > Previously the evsel__group_pmu_name would iterate the evsel's group,
> > however, the list of evsels aren't yet sorted and so the loop may
> > terminate prematurely. It is also not desirable to iterate the list of
> > evsels during list_sort as the list may be broken. Precompute the
> > group_pmu_name for the evsel before sorting, as part of the
> > computation and only if necessary, iterate the whole list looking for
> > group members so that being sorted isn't necessary.
> >
> > Move the group pmu name computation to parse-events.c given the closer
> > dependency on the behavior of
> > parse_events__sort_events_and_fix_groups.
> >
> > Fixes: 7abf0bccaaec ("perf evsel: Add function to compute group PMU name")
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/util/evsel.c | 31 +++++-----------------
> > tools/perf/util/evsel.h | 2 +-
> > tools/perf/util/parse-events.c | 47 ++++++++++++++++++++++++++++++----
> > 3 files changed, 50 insertions(+), 30 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 2f5910b31fa9..3247773f9e24 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -290,6 +290,7 @@ void evsel__init(struct evsel *evsel,
> > evsel->per_pkg_mask = NULL;
> > evsel->collect_stat = false;
> > evsel->pmu_name = NULL;
> > + evsel->group_pmu_name = NULL;
> > evsel->skippable = false;
> > }
> >
> > @@ -431,6 +432,11 @@ struct evsel *evsel__clone(struct evsel *orig)
> > if (evsel->pmu_name == NULL)
> > goto out_err;
> > }
> > + if (orig->group_pmu_name) {
> > + evsel->group_pmu_name = strdup(orig->group_pmu_name);
> > + if (evsel->group_pmu_name == NULL)
> > + goto out_err;
> > + }
> > if (orig->filter) {
> > evsel->filter = strdup(orig->filter);
> > if (evsel->filter == NULL)
> > @@ -827,30 +833,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
> > return !strcmp(evsel__name(evsel), name);
> > }
> >
> > -const char *evsel__group_pmu_name(const struct evsel *evsel)
> > -{
> > - struct evsel *leader = evsel__leader(evsel);
> > - struct evsel *pos;
> > -
> > - /*
> > - * Software events may be in a group with other uncore PMU events. Use
> > - * the pmu_name of the first non-software event to avoid breaking the
> > - * software event out of the group.
> > - *
> > - * Aux event leaders, like intel_pt, expect a group with events from
> > - * other PMUs, so substitute the AUX event's PMU in this case.
> > - */
> > - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> > - /* Starting with the leader, find the first event with a named PMU. */
> > - for_each_group_evsel(pos, leader) {
> > - if (pos->pmu_name)
> > - return pos->pmu_name;
> > - }
> > - }
> > -
> > - return evsel->pmu_name ?: "cpu";
> > -}
> > -
> > const char *evsel__metric_id(const struct evsel *evsel)
> > {
> > if (evsel->metric_id)
> > @@ -1536,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
> > zfree(&evsel->group_name);
> > zfree(&evsel->name);
> > zfree(&evsel->pmu_name);
> > + zfree(&evsel->group_pmu_name);
> > zfree(&evsel->unit);
> > zfree(&evsel->metric_id);
> > evsel__zero_per_pkg(evsel);
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index df8928745fc6..820771a649b2 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -72,6 +72,7 @@ struct evsel {
> > char *name;
> > char *group_name;
> > const char *pmu_name;
> > + const char *group_pmu_name;
>
> Since it seems to be only used when sorting, do we really
> need this on struct evsel?
Agreed. There is also redundancy between evsel->pmu_name and
evsel->pmu->name. For now having it here makes the coding easier and
it could be a useful were we to do more with sorting, like sorting the
final evlist rather than each evlist post parsing, or for debug
output.
> > #ifdef HAVE_LIBTRACEEVENT
> > struct tep_event *tp_format;
> > #endif
> > @@ -289,7 +290,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
> > int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
> > const char *evsel__name(struct evsel *evsel);
> > bool evsel__name_is(struct evsel *evsel, const char *name);
> > -const char *evsel__group_pmu_name(const struct evsel *evsel);
> > const char *evsel__metric_id(const struct evsel *evsel);
> >
> > static inline bool evsel__is_tool(const struct evsel *evsel)
> > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> > index 34ba840ae19a..210e6f713c6f 100644
> > --- a/tools/perf/util/parse-events.c
> > +++ b/tools/perf/util/parse-events.c
> > @@ -2125,6 +2125,41 @@ static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
> > return ret;
> > }
> >
> > +static void evsel__compute_group_pmu_name(struct evsel *evsel,
> > + const struct list_head *head)
> > +{
> > + struct evsel *leader = evsel__leader(evsel);
> > + struct evsel *pos;
> > +
> > + /*
> > + * Software events may be in a group with other uncore PMU events. Use
> > + * the pmu_name of the first non-software event to avoid breaking the
> > + * software event out of the group.
> > + *
> > + * Aux event leaders, like intel_pt, expect a group with events from
> > + * other PMUs, so substitute the AUX event's PMU in this case.
> > + */
> > + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> > + /*
> > + * Starting with the leader, find the first event with a named
> > + * PMU. for_each_group_(member|evsel) isn't used as the list
> > + * isn't yet sorted putting evsel's in the same group together.
> > + */
> > + if (leader->pmu_name) {
> > + evsel->group_pmu_name = strdup(leader->pmu_name);
> > + return;
> > + }
> > + list_for_each_entry(pos, head, core.node) {
> > + if (evsel__leader(pos) == leader && pos->pmu_name) {
> > + evsel->group_pmu_name = strdup(pos->pmu_name);
> > + return;
> > + }
> > + }
> > + }
> > +
> > + evsel->group_pmu_name = strdup(evsel->pmu_name ?: "cpu");
> > +}
> > +
> > __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> > {
> > /* Order by insertion index. */
> > @@ -2160,8 +2195,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
> >
> > /* Group by PMU if there is a group. Groups can't span PMUs. */
> > if (lhs_has_group && rhs_has_group) {
> > - lhs_pmu_name = evsel__group_pmu_name(lhs);
> > - rhs_pmu_name = evsel__group_pmu_name(rhs);
> > + lhs_pmu_name = lhs->group_pmu_name;
> > + rhs_pmu_name = rhs->group_pmu_name;
> > ret = strcmp(lhs_pmu_name, rhs_pmu_name);
> > if (ret)
> > return ret;
> > @@ -2186,6 +2221,8 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> > list_for_each_entry(pos, list, core.node) {
> > const struct evsel *pos_leader = evsel__leader(pos);
> >
> > + evsel__compute_group_pmu_name(pos, list);
>
> Perhaps check for failing to allocate the string?
Will fix in v2.
> But alternatively, allocate an array for pointers to the
> group pmu names. Then they don't needed to be strdup'ed,
> or stored on evsel. Would have to count the number of evsel
> first though.
>
> group_pmu_names = calloc(nr_evsel, sizeof(const char *));
>
> group_pmu_names[pos->core.idx] = evsel__group_pmu_name(pos);
Possibly, I'd prefer to keep it as simple as possible until we think
we should optimize.
> > +
> > if (pos == pos_leader)
> > orig_num_leaders++;
> >
> > @@ -2210,7 +2247,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> > idx = 0;
> > list_for_each_entry(pos, list, core.node) {
> > const struct evsel *pos_leader = evsel__leader(pos);
> > - const char *pos_pmu_name = evsel__group_pmu_name(pos);
> > + const char *pos_pmu_name = pos->group_pmu_name;
> > const char *cur_leader_pmu_name, *pos_leader_pmu_name;
> > bool force_grouped = arch_evsel__must_be_in_group(pos);
> >
> > @@ -2227,7 +2264,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> > if (!cur_leader)
> > cur_leader = pos;
> >
> > - cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
> > + cur_leader_pmu_name = cur_leader->group_pmu_name;
> > if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
> > strcmp(cur_leader_pmu_name, pos_pmu_name)) {
> > /* Event is for a different group/PMU than last. */
> > @@ -2239,7 +2276,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> > */
> > cur_leaders_grp = pos->core.leader;
> > }
> > - pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
> > + pos_leader_pmu_name = pos_leader->group_pmu_name;
> > if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
> > /*
> > * Event's PMU differs from its leader's. Groups can't
>
> By the way, do we really need unsorted_idx?
Right, I've played around with this a bit. I'll have a go at
explaining the motivation below.
> For example what about this for evlist__cmp():
>
> static int evlist__cmp(void *state __maybe_unused, const struct list_head *l, const struct list_head *r)
> {
> const struct evsel *lhs = container_of(l, struct evsel, core.node);
> const struct evsel *rhs = container_of(r, struct evsel, core.node);
> int lhs_leader_idx = lhs->core.leader->idx;
> int rhs_leader_idx = rhs->core.leader->idx;
> int ret;
>
> if (lhs_leader_idx != rhs_leader_idx)
> return lhs_leader_idx - rhs_leader_idx;
So here any ungrouped evsels, or evsels in different groups will cause
evlist__cmp to terminate.
> ret = strcmp(evsel__group_pmu_name(lhs), evsel__group_pmu_name(rhs));
> if (ret)
> return ret;
This is redundant as the leader matches on both lhs and rhs given the
test above.
> /* Architecture specific sorting. */
> return arch_evlist__cmp(lhs, rhs);
We'll never reach here for cases like ungrouped topdown (perf metric)
events where we want to sort the topdown events to allow grouping. I
think the comment:
/*
* First sort by grouping/leader. Read the leader idx only if the evsel
* is part of a group, as -1 indicates no group.
*/
isn't clear. I'll tweak it in v2, I think it would be better as something like:
/*
* First sort by grouping/leader. Read the leader idx only if the evsel
* is part of a group, by default ungrouped events will be sorted relative
* to grouped events based
* on where the first ungrouped event occurs. If both events don't have
* a group we want to fall-through to the arch specific sorting, that can
* reorder and fix things like Intel's topdown events.
*/
To go back to why the code became this way. By default we'll place
metric's events into a weak group and if the perf_event_open fails
we'll break the group. We're smart enough to know when breaking a
group of events that the topdown event's group must not be broken.
However, there are cases where the perf_event_open succeeds but then
the counters yield "<not counted>" as they are part of a group. I've
asked for kernel fixes to fail the perf_event_open, maybe they'll
happen, for now I need to be able to work on old kernels anyway.
Metrics with "<not counted>" events are now tagged as saying the
events shouldn't be grouped but this has a problem of breaking the
grouping for topdown events. We already had to sort events for cases
like "{imc/cas_count_read/,imc/cas_count_write/}" where we have
multiple imc PMUs and we need to group cas_count_read and
cas_count_write events for each PMU - the eventual grouping looks like
"{uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/},{uncore_imc_1/cas_count_read/,uncore_imc_1/cas_count_write/}...".
The aim with the code was to have a single sorting mechanism for the
existing uncore case and the new ungrouped topdown events case.
Thanks,
Ian
> }
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 2/2] perf evsel: for_each_group fixes
2023-05-23 14:31 ` Adrian Hunter
@ 2023-05-23 17:04 ` Ian Rogers
0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2023-05-23 17:04 UTC (permalink / raw)
To: Adrian Hunter
Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On Tue, May 23, 2023 at 7:33 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> On 23/05/23 07:44, Ian Rogers wrote:
> > Address/memory sanitizer was reporting issues in evsel__group_pmu_name
> > because the for_each_group_evsel loop didn't terminate when the head
> > was reached, the head would then be cast and accessed as an evsel
> > leading to invalid memory accesses. Fix for_each_group_member and
> > for_each_group_evsel to terminate at the list head. Note,
> > evsel__group_pmu_name no longer iterates the group, but the problem is
> > present regardless.
> >
> > Fixes: 717e263fc354 ("perf report: Show group description when event group is enabled")
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/util/evsel.h | 24 ++++++++++++++++--------
> > tools/perf/util/evsel_fprintf.c | 1 +
> > 2 files changed, 17 insertions(+), 8 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index 820771a649b2..6a64543c7612 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -462,16 +462,24 @@ static inline int evsel__group_idx(struct evsel *evsel)
> > }
> >
> > /* Iterates group WITHOUT the leader. */
> > -#define for_each_group_member(_evsel, _leader) \
> > -for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
> > - (_evsel) && (_evsel)->core.leader == (&_leader->core); \
> > - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> > +#define for_each_group_member_head(_evsel, _leader, _head) \
> > +for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \
> > + (_evsel) && (&(_evsel)->core.node != (_head)) && \
>
> Extra parentheses perhaps not needed e.g. just
>
> &(_evsel)->core.node != (_head)
>
> > + (_evsel)->core.leader == (&_leader->core); \
>
> Parentheses look odd, maybe should be:
>
> &(_leader)->core
>
> > + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> > +
> > +#define for_each_group_member(_evsel, _leader) \
> > + for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries)
>
> Did you consider using (_leader)->core.nr_members so that it is not
> necessary to assume the evlist back pointer is populated.
Thanks! I'll address the other comments in v2. Wrt
nr_members/evsel->evlist, I think we can use it to avoid the whole
list scan in evsel__compute_group_pmu_name. We could use it here but
in all the non-parse_events__sort_events_and_fix_groups cases we have
the evsel->evlist and testing for the list head seems more consistent
with other list_for_each cases.
Thanks,
Ian
> >
> > /* Iterates group WITH the leader. */
> > -#define for_each_group_evsel(_evsel, _leader) \
> > -for ((_evsel) = _leader; \
> > - (_evsel) && (_evsel)->core.leader == (&_leader->core); \
> > - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> > +#define for_each_group_evsel_head(_evsel, _leader, _head) \
> > +for ((_evsel) = _leader; \
> > + (_evsel) && (&(_evsel)->core.node != (_head)) && \
> > + (_evsel)->core.leader == (&_leader->core); \
> > + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
> > +
> > +#define for_each_group_evsel(_evsel, _leader) \
> > + for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries)
> >
> > static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
> > {
> > diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
> > index cc80ec554c0a..036a2171dc1c 100644
> > --- a/tools/perf/util/evsel_fprintf.c
> > +++ b/tools/perf/util/evsel_fprintf.c
> > @@ -2,6 +2,7 @@
> > #include <inttypes.h>
> > #include <stdio.h>
> > #include <stdbool.h>
> > +#include "util/evlist.h"
> > #include "evsel.h"
> > #include "util/evsel_fprintf.h"
> > #include "util/event.h"
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes
2023-05-23 16:58 ` Ian Rogers
@ 2023-05-29 16:03 ` Adrian Hunter
2023-05-29 16:16 ` Ian Rogers
0 siblings, 1 reply; 8+ messages in thread
From: Adrian Hunter @ 2023-05-29 16:03 UTC (permalink / raw)
To: Ian Rogers
Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On 23/05/23 19:58, Ian Rogers wrote:
> On Tue, May 23, 2023 at 6:01 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>>
>> On 23/05/23 07:44, Ian Rogers wrote:
>>> Previously the evsel__group_pmu_name would iterate the evsel's group,
>>> however, the list of evsels aren't yet sorted and so the loop may
>>> terminate prematurely. It is also not desirable to iterate the list of
>>> evsels during list_sort as the list may be broken. Precompute the
>>> group_pmu_name for the evsel before sorting, as part of the
>>> computation and only if necessary, iterate the whole list looking for
>>> group members so that being sorted isn't necessary.
>>>
>>> Move the group pmu name computation to parse-events.c given the closer
>>> dependency on the behavior of
>>> parse_events__sort_events_and_fix_groups.
>>>
>>> Fixes: 7abf0bccaaec ("perf evsel: Add function to compute group PMU name")
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>> ---
>>> tools/perf/util/evsel.c | 31 +++++-----------------
>>> tools/perf/util/evsel.h | 2 +-
>>> tools/perf/util/parse-events.c | 47 ++++++++++++++++++++++++++++++----
>>> 3 files changed, 50 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>>> index 2f5910b31fa9..3247773f9e24 100644
>>> --- a/tools/perf/util/evsel.c
>>> +++ b/tools/perf/util/evsel.c
>>> @@ -290,6 +290,7 @@ void evsel__init(struct evsel *evsel,
>>> evsel->per_pkg_mask = NULL;
>>> evsel->collect_stat = false;
>>> evsel->pmu_name = NULL;
>>> + evsel->group_pmu_name = NULL;
>>> evsel->skippable = false;
>>> }
>>>
>>> @@ -431,6 +432,11 @@ struct evsel *evsel__clone(struct evsel *orig)
>>> if (evsel->pmu_name == NULL)
>>> goto out_err;
>>> }
>>> + if (orig->group_pmu_name) {
>>> + evsel->group_pmu_name = strdup(orig->group_pmu_name);
>>> + if (evsel->group_pmu_name == NULL)
>>> + goto out_err;
>>> + }
>>> if (orig->filter) {
>>> evsel->filter = strdup(orig->filter);
>>> if (evsel->filter == NULL)
>>> @@ -827,30 +833,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
>>> return !strcmp(evsel__name(evsel), name);
>>> }
>>>
>>> -const char *evsel__group_pmu_name(const struct evsel *evsel)
>>> -{
>>> - struct evsel *leader = evsel__leader(evsel);
>>> - struct evsel *pos;
>>> -
>>> - /*
>>> - * Software events may be in a group with other uncore PMU events. Use
>>> - * the pmu_name of the first non-software event to avoid breaking the
>>> - * software event out of the group.
>>> - *
>>> - * Aux event leaders, like intel_pt, expect a group with events from
>>> - * other PMUs, so substitute the AUX event's PMU in this case.
>>> - */
>>> - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
>>> - /* Starting with the leader, find the first event with a named PMU. */
>>> - for_each_group_evsel(pos, leader) {
>>> - if (pos->pmu_name)
>>> - return pos->pmu_name;
>>> - }
>>> - }
>>> -
>>> - return evsel->pmu_name ?: "cpu";
>>> -}
>>> -
>>> const char *evsel__metric_id(const struct evsel *evsel)
>>> {
>>> if (evsel->metric_id)
>>> @@ -1536,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
>>> zfree(&evsel->group_name);
>>> zfree(&evsel->name);
>>> zfree(&evsel->pmu_name);
>>> + zfree(&evsel->group_pmu_name);
>>> zfree(&evsel->unit);
>>> zfree(&evsel->metric_id);
>>> evsel__zero_per_pkg(evsel);
>>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>>> index df8928745fc6..820771a649b2 100644
>>> --- a/tools/perf/util/evsel.h
>>> +++ b/tools/perf/util/evsel.h
>>> @@ -72,6 +72,7 @@ struct evsel {
>>> char *name;
>>> char *group_name;
>>> const char *pmu_name;
>>> + const char *group_pmu_name;
>>
>> Since it seems to be only used when sorting, do we really
>> need this on struct evsel?
>
> Agreed. There is also redundancy between evsel->pmu_name and
> evsel->pmu->name. For now having it here makes the coding easier and
> it could be a useful were we to do more with sorting, like sorting the
> final evlist rather than each evlist post parsing, or for debug
> output.
>
>>> #ifdef HAVE_LIBTRACEEVENT
>>> struct tep_event *tp_format;
>>> #endif
>>> @@ -289,7 +290,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
>>> int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
>>> const char *evsel__name(struct evsel *evsel);
>>> bool evsel__name_is(struct evsel *evsel, const char *name);
>>> -const char *evsel__group_pmu_name(const struct evsel *evsel);
>>> const char *evsel__metric_id(const struct evsel *evsel);
>>>
>>> static inline bool evsel__is_tool(const struct evsel *evsel)
>>> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
>>> index 34ba840ae19a..210e6f713c6f 100644
>>> --- a/tools/perf/util/parse-events.c
>>> +++ b/tools/perf/util/parse-events.c
>>> @@ -2125,6 +2125,41 @@ static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
>>> return ret;
>>> }
>>>
>>> +static void evsel__compute_group_pmu_name(struct evsel *evsel,
>>> + const struct list_head *head)
>>> +{
>>> + struct evsel *leader = evsel__leader(evsel);
>>> + struct evsel *pos;
>>> +
>>> + /*
>>> + * Software events may be in a group with other uncore PMU events. Use
>>> + * the pmu_name of the first non-software event to avoid breaking the
>>> + * software event out of the group.
>>> + *
>>> + * Aux event leaders, like intel_pt, expect a group with events from
>>> + * other PMUs, so substitute the AUX event's PMU in this case.
>>> + */
>>> + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
>>> + /*
>>> + * Starting with the leader, find the first event with a named
>>> + * PMU. for_each_group_(member|evsel) isn't used as the list
>>> + * isn't yet sorted putting evsel's in the same group together.
>>> + */
>>> + if (leader->pmu_name) {
>>> + evsel->group_pmu_name = strdup(leader->pmu_name);
>>> + return;
>>> + }
>>> + list_for_each_entry(pos, head, core.node) {
>>> + if (evsel__leader(pos) == leader && pos->pmu_name) {
>>> + evsel->group_pmu_name = strdup(pos->pmu_name);
>>> + return;
>>> + }
>>> + }
>>> + }
>>> +
>>> + evsel->group_pmu_name = strdup(evsel->pmu_name ?: "cpu");
>>> +}
>>> +
>>> __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>>> {
>>> /* Order by insertion index. */
>>> @@ -2160,8 +2195,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
>>>
>>> /* Group by PMU if there is a group. Groups can't span PMUs. */
>>> if (lhs_has_group && rhs_has_group) {
>>> - lhs_pmu_name = evsel__group_pmu_name(lhs);
>>> - rhs_pmu_name = evsel__group_pmu_name(rhs);
>>> + lhs_pmu_name = lhs->group_pmu_name;
>>> + rhs_pmu_name = rhs->group_pmu_name;
>>> ret = strcmp(lhs_pmu_name, rhs_pmu_name);
>>> if (ret)
>>> return ret;
>>> @@ -2186,6 +2221,8 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
>>> list_for_each_entry(pos, list, core.node) {
>>> const struct evsel *pos_leader = evsel__leader(pos);
>>>
>>> + evsel__compute_group_pmu_name(pos, list);
>>
>> Perhaps check for failing to allocate the string?
>
> Will fix in v2.
>
>> But alternatively, allocate an array for pointers to the
>> group pmu names. Then they don't needed to be strdup'ed,
>> or stored on evsel. Would have to count the number of evsel
>> first though.
>>
>> group_pmu_names = calloc(nr_evsel, sizeof(const char *));
>>
>> group_pmu_names[pos->core.idx] = evsel__group_pmu_name(pos);
>
> Possibly, I'd prefer to keep it as simple as possible until we think
> we should optimize.
>
>>> +
>>> if (pos == pos_leader)
>>> orig_num_leaders++;
>>>
>>> @@ -2210,7 +2247,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
>>> idx = 0;
>>> list_for_each_entry(pos, list, core.node) {
>>> const struct evsel *pos_leader = evsel__leader(pos);
>>> - const char *pos_pmu_name = evsel__group_pmu_name(pos);
>>> + const char *pos_pmu_name = pos->group_pmu_name;
>>> const char *cur_leader_pmu_name, *pos_leader_pmu_name;
>>> bool force_grouped = arch_evsel__must_be_in_group(pos);
>>>
>>> @@ -2227,7 +2264,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
>>> if (!cur_leader)
>>> cur_leader = pos;
>>>
>>> - cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
>>> + cur_leader_pmu_name = cur_leader->group_pmu_name;
>>> if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
>>> strcmp(cur_leader_pmu_name, pos_pmu_name)) {
>>> /* Event is for a different group/PMU than last. */
>>> @@ -2239,7 +2276,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
>>> */
>>> cur_leaders_grp = pos->core.leader;
>>> }
>>> - pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
>>> + pos_leader_pmu_name = pos_leader->group_pmu_name;
>>> if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
>>> /*
>>> * Event's PMU differs from its leader's. Groups can't
>>
>> By the way, do we really need unsorted_idx?
>
> Right, I've played around with this a bit. I'll have a go at
> explaining the motivation below.
>
>> For example what about this for evlist__cmp():
>>
>> static int evlist__cmp(void *state __maybe_unused, const struct list_head *l, const struct list_head *r)
>> {
>> const struct evsel *lhs = container_of(l, struct evsel, core.node);
>> const struct evsel *rhs = container_of(r, struct evsel, core.node);
>> int lhs_leader_idx = lhs->core.leader->idx;
>> int rhs_leader_idx = rhs->core.leader->idx;
>> int ret;
>>
>> if (lhs_leader_idx != rhs_leader_idx)
>> return lhs_leader_idx - rhs_leader_idx;
>
> So here any ungrouped evsels, or evsels in different groups will cause
> evlist__cmp to terminate.
>
>> ret = strcmp(evsel__group_pmu_name(lhs), evsel__group_pmu_name(rhs));
>> if (ret)
>> return ret;
>
> This is redundant as the leader matches on both lhs and rhs given the
> test above.
>
>> /* Architecture specific sorting. */
>> return arch_evlist__cmp(lhs, rhs);
>
> We'll never reach here for cases like ungrouped topdown (perf metric)
> events where we want to sort the topdown events to allow grouping. I
> think the comment:
>
> /*
> * First sort by grouping/leader. Read the leader idx only if the evsel
> * is part of a group, as -1 indicates no group.
> */
>
> isn't clear. I'll tweak it in v2, I think it would be better as something like:
>
> /*
> * First sort by grouping/leader. Read the leader idx only if the evsel
> * is part of a group, by default ungrouped events will be sorted relative
> * to grouped events based
> * on where the first ungrouped event occurs. If both events don't have
> * a group we want to fall-through to the arch specific sorting, that can
> * reorder and fix things like Intel's topdown events.
> */
>
> To go back to why the code became this way. By default we'll place
> metric's events into a weak group and if the perf_event_open fails
> we'll break the group. We're smart enough to know when breaking a
> group of events that the topdown event's group must not be broken.
> However, there are cases where the perf_event_open succeeds but then
> the counters yield "<not counted>" as they are part of a group. I've
> asked for kernel fixes to fail the perf_event_open, maybe they'll
> happen, for now I need to be able to work on old kernels anyway.
> Metrics with "<not counted>" events are now tagged as saying the
> events shouldn't be grouped but this has a problem of breaking the
> grouping for topdown events. We already had to sort events for cases
> like "{imc/cas_count_read/,imc/cas_count_write/}" where we have
> multiple imc PMUs and we need to group cas_count_read and
> cas_count_write events for each PMU - the eventual grouping looks like
> "{uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/},{uncore_imc_1/cas_count_read/,uncore_imc_1/cas_count_write/}...".
> The aim with the code was to have a single sorting mechanism for the
> existing uncore case and the new ungrouped topdown events case.
Are there any tests for that?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes
2023-05-29 16:03 ` Adrian Hunter
@ 2023-05-29 16:16 ` Ian Rogers
0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2023-05-29 16:16 UTC (permalink / raw)
To: Adrian Hunter, Wang, Weilin
Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
Kan Liang, Sandipan Das, James Clark, Dmitrii Dolgov, Changbin Du,
Rob Herring, Xing Zhengjun, linux-perf-users, linux-kernel
On Mon, May 29, 2023 at 9:03 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> On 23/05/23 19:58, Ian Rogers wrote:
> > On Tue, May 23, 2023 at 6:01 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
> >>
> >> On 23/05/23 07:44, Ian Rogers wrote:
> >>> Previously the evsel__group_pmu_name would iterate the evsel's group,
> >>> however, the list of evsels aren't yet sorted and so the loop may
> >>> terminate prematurely. It is also not desirable to iterate the list of
> >>> evsels during list_sort as the list may be broken. Precompute the
> >>> group_pmu_name for the evsel before sorting, as part of the
> >>> computation and only if necessary, iterate the whole list looking for
> >>> group members so that being sorted isn't necessary.
> >>>
> >>> Move the group pmu name computation to parse-events.c given the closer
> >>> dependency on the behavior of
> >>> parse_events__sort_events_and_fix_groups.
> >>>
> >>> Fixes: 7abf0bccaaec ("perf evsel: Add function to compute group PMU name")
> >>> Signed-off-by: Ian Rogers <irogers@google.com>
> >>> ---
> >>> tools/perf/util/evsel.c | 31 +++++-----------------
> >>> tools/perf/util/evsel.h | 2 +-
> >>> tools/perf/util/parse-events.c | 47 ++++++++++++++++++++++++++++++----
> >>> 3 files changed, 50 insertions(+), 30 deletions(-)
> >>>
> >>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> >>> index 2f5910b31fa9..3247773f9e24 100644
> >>> --- a/tools/perf/util/evsel.c
> >>> +++ b/tools/perf/util/evsel.c
> >>> @@ -290,6 +290,7 @@ void evsel__init(struct evsel *evsel,
> >>> evsel->per_pkg_mask = NULL;
> >>> evsel->collect_stat = false;
> >>> evsel->pmu_name = NULL;
> >>> + evsel->group_pmu_name = NULL;
> >>> evsel->skippable = false;
> >>> }
> >>>
> >>> @@ -431,6 +432,11 @@ struct evsel *evsel__clone(struct evsel *orig)
> >>> if (evsel->pmu_name == NULL)
> >>> goto out_err;
> >>> }
> >>> + if (orig->group_pmu_name) {
> >>> + evsel->group_pmu_name = strdup(orig->group_pmu_name);
> >>> + if (evsel->group_pmu_name == NULL)
> >>> + goto out_err;
> >>> + }
> >>> if (orig->filter) {
> >>> evsel->filter = strdup(orig->filter);
> >>> if (evsel->filter == NULL)
> >>> @@ -827,30 +833,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
> >>> return !strcmp(evsel__name(evsel), name);
> >>> }
> >>>
> >>> -const char *evsel__group_pmu_name(const struct evsel *evsel)
> >>> -{
> >>> - struct evsel *leader = evsel__leader(evsel);
> >>> - struct evsel *pos;
> >>> -
> >>> - /*
> >>> - * Software events may be in a group with other uncore PMU events. Use
> >>> - * the pmu_name of the first non-software event to avoid breaking the
> >>> - * software event out of the group.
> >>> - *
> >>> - * Aux event leaders, like intel_pt, expect a group with events from
> >>> - * other PMUs, so substitute the AUX event's PMU in this case.
> >>> - */
> >>> - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> >>> - /* Starting with the leader, find the first event with a named PMU. */
> >>> - for_each_group_evsel(pos, leader) {
> >>> - if (pos->pmu_name)
> >>> - return pos->pmu_name;
> >>> - }
> >>> - }
> >>> -
> >>> - return evsel->pmu_name ?: "cpu";
> >>> -}
> >>> -
> >>> const char *evsel__metric_id(const struct evsel *evsel)
> >>> {
> >>> if (evsel->metric_id)
> >>> @@ -1536,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
> >>> zfree(&evsel->group_name);
> >>> zfree(&evsel->name);
> >>> zfree(&evsel->pmu_name);
> >>> + zfree(&evsel->group_pmu_name);
> >>> zfree(&evsel->unit);
> >>> zfree(&evsel->metric_id);
> >>> evsel__zero_per_pkg(evsel);
> >>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> >>> index df8928745fc6..820771a649b2 100644
> >>> --- a/tools/perf/util/evsel.h
> >>> +++ b/tools/perf/util/evsel.h
> >>> @@ -72,6 +72,7 @@ struct evsel {
> >>> char *name;
> >>> char *group_name;
> >>> const char *pmu_name;
> >>> + const char *group_pmu_name;
> >>
> >> Since it seems to be only used when sorting, do we really
> >> need this on struct evsel?
> >
> > Agreed. There is also redundancy between evsel->pmu_name and
> > evsel->pmu->name. For now having it here makes the coding easier and
> > it could be a useful were we to do more with sorting, like sorting the
> > final evlist rather than each evlist post parsing, or for debug
> > output.
> >
> >>> #ifdef HAVE_LIBTRACEEVENT
> >>> struct tep_event *tp_format;
> >>> #endif
> >>> @@ -289,7 +290,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size);
> >>> int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
> >>> const char *evsel__name(struct evsel *evsel);
> >>> bool evsel__name_is(struct evsel *evsel, const char *name);
> >>> -const char *evsel__group_pmu_name(const struct evsel *evsel);
> >>> const char *evsel__metric_id(const struct evsel *evsel);
> >>>
> >>> static inline bool evsel__is_tool(const struct evsel *evsel)
> >>> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> >>> index 34ba840ae19a..210e6f713c6f 100644
> >>> --- a/tools/perf/util/parse-events.c
> >>> +++ b/tools/perf/util/parse-events.c
> >>> @@ -2125,6 +2125,41 @@ static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state,
> >>> return ret;
> >>> }
> >>>
> >>> +static void evsel__compute_group_pmu_name(struct evsel *evsel,
> >>> + const struct list_head *head)
> >>> +{
> >>> + struct evsel *leader = evsel__leader(evsel);
> >>> + struct evsel *pos;
> >>> +
> >>> + /*
> >>> + * Software events may be in a group with other uncore PMU events. Use
> >>> + * the pmu_name of the first non-software event to avoid breaking the
> >>> + * software event out of the group.
> >>> + *
> >>> + * Aux event leaders, like intel_pt, expect a group with events from
> >>> + * other PMUs, so substitute the AUX event's PMU in this case.
> >>> + */
> >>> + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
> >>> + /*
> >>> + * Starting with the leader, find the first event with a named
> >>> + * PMU. for_each_group_(member|evsel) isn't used as the list
> >>> + * isn't yet sorted putting evsel's in the same group together.
> >>> + */
> >>> + if (leader->pmu_name) {
> >>> + evsel->group_pmu_name = strdup(leader->pmu_name);
> >>> + return;
> >>> + }
> >>> + list_for_each_entry(pos, head, core.node) {
> >>> + if (evsel__leader(pos) == leader && pos->pmu_name) {
> >>> + evsel->group_pmu_name = strdup(pos->pmu_name);
> >>> + return;
> >>> + }
> >>> + }
> >>> + }
> >>> +
> >>> + evsel->group_pmu_name = strdup(evsel->pmu_name ?: "cpu");
> >>> +}
> >>> +
> >>> __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> >>> {
> >>> /* Order by insertion index. */
> >>> @@ -2160,8 +2195,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
> >>>
> >>> /* Group by PMU if there is a group. Groups can't span PMUs. */
> >>> if (lhs_has_group && rhs_has_group) {
> >>> - lhs_pmu_name = evsel__group_pmu_name(lhs);
> >>> - rhs_pmu_name = evsel__group_pmu_name(rhs);
> >>> + lhs_pmu_name = lhs->group_pmu_name;
> >>> + rhs_pmu_name = rhs->group_pmu_name;
> >>> ret = strcmp(lhs_pmu_name, rhs_pmu_name);
> >>> if (ret)
> >>> return ret;
> >>> @@ -2186,6 +2221,8 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> >>> list_for_each_entry(pos, list, core.node) {
> >>> const struct evsel *pos_leader = evsel__leader(pos);
> >>>
> >>> + evsel__compute_group_pmu_name(pos, list);
> >>
> >> Perhaps check for failing to allocate the string?
> >
> > Will fix in v2.
> >
> >> But alternatively, allocate an array for pointers to the
> >> group pmu names. Then they don't needed to be strdup'ed,
> >> or stored on evsel. Would have to count the number of evsel
> >> first though.
> >>
> >> group_pmu_names = calloc(nr_evsel, sizeof(const char *));
> >>
> >> group_pmu_names[pos->core.idx] = evsel__group_pmu_name(pos);
> >
> > Possibly, I'd prefer to keep it as simple as possible until we think
> > we should optimize.
> >
> >>> +
> >>> if (pos == pos_leader)
> >>> orig_num_leaders++;
> >>>
> >>> @@ -2210,7 +2247,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> >>> idx = 0;
> >>> list_for_each_entry(pos, list, core.node) {
> >>> const struct evsel *pos_leader = evsel__leader(pos);
> >>> - const char *pos_pmu_name = evsel__group_pmu_name(pos);
> >>> + const char *pos_pmu_name = pos->group_pmu_name;
> >>> const char *cur_leader_pmu_name, *pos_leader_pmu_name;
> >>> bool force_grouped = arch_evsel__must_be_in_group(pos);
> >>>
> >>> @@ -2227,7 +2264,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> >>> if (!cur_leader)
> >>> cur_leader = pos;
> >>>
> >>> - cur_leader_pmu_name = evsel__group_pmu_name(cur_leader);
> >>> + cur_leader_pmu_name = cur_leader->group_pmu_name;
> >>> if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
> >>> strcmp(cur_leader_pmu_name, pos_pmu_name)) {
> >>> /* Event is for a different group/PMU than last. */
> >>> @@ -2239,7 +2276,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list)
> >>> */
> >>> cur_leaders_grp = pos->core.leader;
> >>> }
> >>> - pos_leader_pmu_name = evsel__group_pmu_name(pos_leader);
> >>> + pos_leader_pmu_name = pos_leader->group_pmu_name;
> >>> if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
> >>> /*
> >>> * Event's PMU differs from its leader's. Groups can't
> >>
> >> By the way, do we really need unsorted_idx?
> >
> > Right, I've played around with this a bit. I'll have a go at
> > explaining the motivation below.
> >
> >> For example what about this for evlist__cmp():
> >>
> >> static int evlist__cmp(void *state __maybe_unused, const struct list_head *l, const struct list_head *r)
> >> {
> >> const struct evsel *lhs = container_of(l, struct evsel, core.node);
> >> const struct evsel *rhs = container_of(r, struct evsel, core.node);
> >> int lhs_leader_idx = lhs->core.leader->idx;
> >> int rhs_leader_idx = rhs->core.leader->idx;
> >> int ret;
> >>
> >> if (lhs_leader_idx != rhs_leader_idx)
> >> return lhs_leader_idx - rhs_leader_idx;
> >
> > So here any ungrouped evsels, or evsels in different groups will cause
> > evlist__cmp to terminate.
> >
> >> ret = strcmp(evsel__group_pmu_name(lhs), evsel__group_pmu_name(rhs));
> >> if (ret)
> >> return ret;
> >
> > This is redundant as the leader matches on both lhs and rhs given the
> > test above.
> >
> >> /* Architecture specific sorting. */
> >> return arch_evlist__cmp(lhs, rhs);
> >
> > We'll never reach here for cases like ungrouped topdown (perf metric)
> > events where we want to sort the topdown events to allow grouping. I
> > think the comment:
> >
> > /*
> > * First sort by grouping/leader. Read the leader idx only if the evsel
> > * is part of a group, as -1 indicates no group.
> > */
> >
> > isn't clear. I'll tweak it in v2, I think it would be better as something like:
> >
> > /*
> > * First sort by grouping/leader. Read the leader idx only if the evsel
> > * is part of a group, by default ungrouped events will be sorted relative
> > * to grouped events based
> > * on where the first ungrouped event occurs. If both events don't have
> > * a group we want to fall-through to the arch specific sorting, that can
> > * reorder and fix things like Intel's topdown events.
> > */
> >
> > To go back to why the code became this way. By default we'll place
> > metric's events into a weak group and if the perf_event_open fails
> > we'll break the group. We're smart enough to know when breaking a
> > group of events that the topdown event's group must not be broken.
> > However, there are cases where the perf_event_open succeeds but then
> > the counters yield "<not counted>" as they are part of a group. I've
> > asked for kernel fixes to fail the perf_event_open, maybe they'll
> > happen, for now I need to be able to work on old kernels anyway.
> > Metrics with "<not counted>" events are now tagged as saying the
> > events shouldn't be grouped but this has a problem of breaking the
> > grouping for topdown events. We already had to sort events for cases
> > like "{imc/cas_count_read/,imc/cas_count_write/}" where we have
> > multiple imc PMUs and we need to group cas_count_read and
> > cas_count_write events for each PMU - the eventual grouping looks like
> > "{uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/},{uncore_imc_1/cas_count_read/,uncore_imc_1/cas_count_write/}...".
> > The aim with the code was to have a single sorting mechanism for the
> > existing uncore case and the new ungrouped topdown events case.
>
> Are there any tests for that?
Not specifically. The parse-events tests don't test it as we don't yet
have a way to force the sysfs layout for a test. The attribute test
has some coverage of topdown events in the default case, because the
TopdownL1 metrics are parsed. The metric test checks the metrics work,
which involves topdown and uncore event regrouping, but they consider
any output valid. Weilin is working to improve this.
Fwiw, the original issue that sparked this fix was the parse-events
tests but only with clang and compilation level O1.
Thanks,
Ian
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-05-29 16:16 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-23 4:44 [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Ian Rogers
2023-05-23 4:44 ` [PATCH v1 2/2] perf evsel: for_each_group fixes Ian Rogers
2023-05-23 14:31 ` Adrian Hunter
2023-05-23 17:04 ` Ian Rogers
2023-05-23 13:00 ` [PATCH v1 1/2] perf evsel: evsel__group_pmu_name fixes Adrian Hunter
2023-05-23 16:58 ` Ian Rogers
2023-05-29 16:03 ` Adrian Hunter
2023-05-29 16:16 ` Ian Rogers
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).