linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf
@ 2025-03-05  8:37 Ian Rogers
  2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Ian Rogers @ 2025-03-05  8:37 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, James Clark,
	Dominique Martinet, Andi Kleen, linux-perf-users, linux-kernel,
	Dapeng Mi, Thomas Falcon

Support the PMU name from the legacy hardware and hw_cache PMU
extended types.  Remove some macros and make variables more intention
revealing, rather than just being called "value".

Before:
```
$ perf stat -vv -e instructions true
...
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             136
  config                           0xa00000001
  sample_type                      IDENTIFIER
  read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
  disabled                         1
  inherit                          1
  enable_on_exec                   1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 5
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             136
  config                           0x400000001
  sample_type                      IDENTIFIER
  read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
  disabled                         1
  inherit                          1
  enable_on_exec                   1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 6
...
```

After:
```
$ perf stat -vv -e instructions true
...
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             136
  config                           0xa00000001 (cpu_atom/PERF_COUNT_HW_INSTRUCTIONS/)
  sample_type                      IDENTIFIER
  read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
  disabled                         1
  inherit                          1
  enable_on_exec                   1
------------------------------------------------------------
sys_perf_event_open: pid 181724  cpu -1  group_fd -1  flags 0x8 = 5
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             136
  config                           0x400000001 (cpu_core/PERF_COUNT_HW_INSTRUCTIONS/)
  sample_type                      IDENTIFIER
  read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
  disabled                         1
  inherit                          1
  enable_on_exec                   1
------------------------------------------------------------
sys_perf_event_open: pid 181724  cpu -1  group_fd -1  flags 0x8 = 6
...
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/perf_event_attr_fprintf.c | 124 +++++++++++++---------
 1 file changed, 75 insertions(+), 49 deletions(-)

diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index c7f3543b9921..66b666d9ce64 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value)
 #define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
 static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type)
 {
-	if (pmu)
-		return pmu->name;
-
 	switch (type) {
 	ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
 	ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
 	ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
 	ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
-	ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
 	ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
+	case PERF_TYPE_RAW:
+		return pmu ? pmu->name : "PERF_TYPE_RAW";
 	default:
-		return NULL;
+		return pmu ? pmu->name : NULL;
 	}
 }
 
 static const char *stringify_perf_hw_id(u64 value)
 {
-	switch (value) {
+	switch (value & PERF_HW_EVENT_MASK) {
 	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
 	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
 	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
@@ -169,79 +167,100 @@ static const char *stringify_perf_sw_id(u64 value)
 }
 #undef ENUM_ID_TO_STR_CASE
 
-#define PRINT_ID(_s, _f)					\
-do {								\
-	const char *__s = _s;					\
-	if (__s == NULL)					\
-		snprintf(buf, size, _f, value);			\
-	else							\
-		snprintf(buf, size, _f" (%s)", value, __s);	\
-} while (0)
-#define print_id_unsigned(_s)	PRINT_ID(_s, "%"PRIu64)
-#define print_id_hex(_s)	PRINT_ID(_s, "%#"PRIx64)
+static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s)
+{
+	if (s == NULL)
+		snprintf(buf, size, "%"PRIu64, value);
+	else
+		snprintf(buf, size, "%"PRIu64" (%s)", value, s);
+}
+
+static void print_id_hex(char *buf, size_t size, u64 value, const char *s)
+{
+	if (s == NULL)
+		snprintf(buf, size, "%#"PRIx64, value);
+	else
+		snprintf(buf, size, "%#"PRIx64" (%s)", value, s);
+}
 
-static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value)
+static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type)
 {
-	print_id_unsigned(stringify_perf_type_id(pmu, value));
+	print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type));
 }
 
-static void __p_config_hw_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
 {
-	print_id_hex(stringify_perf_hw_id(value));
+	const char *name = stringify_perf_hw_id(config);
+
+	if (name == NULL) {
+		if (pmu == NULL) {
+			snprintf(buf, size, "%#"PRIx64, config);
+		} else {
+			snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+				 config);
+		}
+	} else {
+		if (pmu == NULL)
+			snprintf(buf, size, "%#"PRIx64" (%s)", config, name);
+		else
+			snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name);
+	}
 }
 
-static void __p_config_sw_id(char *buf, size_t size, u64 value)
+static void __p_config_sw_id(char *buf, size_t size, u64 id)
 {
-	print_id_hex(stringify_perf_sw_id(value));
+	print_id_hex(buf, size, id, stringify_perf_sw_id(id));
 }
 
-static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
+static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config)
 {
-	const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff);
+	const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff);
 	const char *hw_cache_op_str =
-		stringify_perf_hw_cache_op_id((value & 0xff00) >> 8);
+		stringify_perf_hw_cache_op_id((config & 0xff00) >> 8);
 	const char *hw_cache_op_result_str =
-		stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16);
-
-	if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
-	    hw_cache_op_result_str == NULL) {
-		snprintf(buf, size, "%#"PRIx64, value);
+		stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16);
+
+	if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) {
+		if (pmu == NULL) {
+			snprintf(buf, size, "%#"PRIx64, config);
+		} else {
+			snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name,
+				 config);
+		}
 	} else {
-		snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value,
-			 hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+		if (pmu == NULL) {
+			snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config,
+				 hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+		} else {
+			snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name,
+				 hw_cache_op_result_str, hw_cache_op_str, hw_cache_str);
+		}
 	}
 }
 
-static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
+static void __p_config_tracepoint_id(char *buf, size_t size, u64 id)
 {
-	char *str = tracepoint_id_to_name(value);
+	char *str = tracepoint_id_to_name(id);
 
-	print_id_hex(str);
+	print_id_hex(buf, size, id, str);
 	free(str);
 }
 
-static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
+static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config)
 {
-	const char *name = perf_pmu__name_from_config(pmu, value);
-
-	if (name) {
-		print_id_hex(name);
-		return;
-	}
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		return __p_config_hw_id(buf, size, value);
+		return __p_config_hw_id(buf, size, pmu, config);
 	case PERF_TYPE_SOFTWARE:
-		return __p_config_sw_id(buf, size, value);
+		return __p_config_sw_id(buf, size, config);
 	case PERF_TYPE_HW_CACHE:
-		return __p_config_hw_cache_id(buf, size, value);
+		return __p_config_hw_cache_id(buf, size, pmu, config);
 	case PERF_TYPE_TRACEPOINT:
-		return __p_config_tracepoint_id(buf, size, value);
+		return __p_config_tracepoint_id(buf, size, config);
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_BREAKPOINT:
 	default:
-		snprintf(buf, size, "%#"PRIx64, value);
-		return;
+		return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config));
 	}
 }
 
@@ -253,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type
 #define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
 #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
 #define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
-#define p_type_id(val)		__p_type_id(pmu, buf, BUF_SIZE, val)
+#define p_type_id(val)		__p_type_id(buf, BUF_SIZE, pmu, val)
 #define p_config_id(val)	__p_config_id(pmu, buf, BUF_SIZE, attr->type, val)
 
 #define PRINT_ATTRn(_n, _f, _p, _a)			\
@@ -273,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	char buf[BUF_SIZE];
 	int ret = 0;
 
+	if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) {
+		u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT;
+
+		if (extended_type)
+			pmu = perf_pmus__find_by_type(extended_type);
+	}
+
 	PRINT_ATTRn("type", type, p_type_id, true);
 	PRINT_ATTRf(size, p_unsigned);
 	PRINT_ATTRn("config", config, p_config_id, true);
-- 
2.48.1.711.g2feabab25a-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-05  8:37 [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf Ian Rogers
@ 2025-03-05  8:37 ` Ian Rogers
  2025-03-05 13:44   ` James Clark
  2025-03-06  9:17   ` Mi, Dapeng
  2025-03-05 11:06 ` [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf James Clark
  2025-03-05 21:39 ` Falcon, Thomas
  2 siblings, 2 replies; 9+ messages in thread
From: Ian Rogers @ 2025-03-05  8:37 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, James Clark,
	Dominique Martinet, Andi Kleen, linux-perf-users, linux-kernel,
	Dapeng Mi, Thomas Falcon

In the case of '{instructions,slots},faults,topdown-retiring' the
first event that must be grouped, slots, is ignored causing the
topdown-retiring event not to be adjacent to the group it needs to be
inserted into. Don't ignore the group members when computing the
force_grouped_index.

Make the force_grouped_index be for the leader of the group it is
within and always use it first rather than a group leader index so
that topdown events may be sorted from one group into another.

Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/parse-events.c | 54 ++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 35e48fe56dfa..cf32abc496e9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1983,31 +1983,30 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
 	bool lhs_has_group, rhs_has_group;
 
 	/*
-	 * First sort by grouping/leader. Read the leader idx only if the evsel
-	 * is part of a group, by default ungrouped events will be sorted
-	 * relative to grouped events based on where the first ungrouped event
-	 * occurs. If both events don't have a group we want to fall-through to
-	 * the arch specific sorting, that can reorder and fix things like
-	 * Intel's topdown events.
+	 * Get the indexes of the 2 events to sort. If the events are
+	 * in groups then the leader's index is used otherwise the
+	 * event's index is used. Events in the same group will be
+	 * sorted by PMU name. An index may be forced for events that
+	 * must be in the same group, namely Intel topdown events.
+	 * When everything is identical arch specific sorting is used,
+	 * that can reorder and fix things like Intel's topdown
+	 * events.
 	 */
-	if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
-		lhs_has_group = true;
+	lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
+	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs))
+		lhs_sort_idx = *force_grouped_idx;
+	else if (lhs_has_group)
 		lhs_sort_idx = lhs_core->leader->idx;
-	} else {
-		lhs_has_group = false;
-		lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
-			? *force_grouped_idx
-			: lhs_core->idx;
-	}
-	if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
-		rhs_has_group = true;
+	else
+		lhs_sort_idx = lhs_core->idx;
+	rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
+
+	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs))
+		rhs_sort_idx = *force_grouped_idx;
+	else if (rhs_has_group)
 		rhs_sort_idx = rhs_core->leader->idx;
-	} else {
-		rhs_has_group = false;
-		rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
-			? *force_grouped_idx
-			: rhs_core->idx;
-	}
+	else
+		rhs_sort_idx = rhs_core->idx;
 
 	if (lhs_sort_idx != rhs_sort_idx)
 		return lhs_sort_idx - rhs_sort_idx;
@@ -2055,10 +2054,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
 		 */
 		pos->core.idx = idx++;
 
-		/* Remember an index to sort all forced grouped events together to. */
-		if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
-		    arch_evsel__must_be_in_group(pos))
-			force_grouped_idx = pos->core.idx;
+		/*
+		 * Remember an index to sort all forced grouped events
+		 * together to. Use the group leader as some events
+		 * must appear first within the group.
+		 */
+		if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
+			force_grouped_idx = pos_leader->core.idx;
 	}
 
 	/* Sort events. */
-- 
2.48.1.711.g2feabab25a-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf
  2025-03-05  8:37 [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf Ian Rogers
  2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
@ 2025-03-05 11:06 ` James Clark
  2025-03-05 21:39 ` Falcon, Thomas
  2 siblings, 0 replies; 9+ messages in thread
From: James Clark @ 2025-03-05 11:06 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, Dominique Martinet, Andi Kleen,
	linux-perf-users, linux-kernel, Dapeng Mi, Thomas Falcon



On 05/03/2025 8:37 am, Ian Rogers wrote:
> Support the PMU name from the legacy hardware and hw_cache PMU
> extended types.  Remove some macros and make variables more intention
> revealing, rather than just being called "value".
> 
> Before:
> ```
> $ perf stat -vv -e instructions true
> ...
> ------------------------------------------------------------
> perf_event_attr:
>    type                             0 (PERF_TYPE_HARDWARE)
>    size                             136
>    config                           0xa00000001
>    sample_type                      IDENTIFIER
>    read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>    disabled                         1
>    inherit                          1
>    enable_on_exec                   1
>    exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>    type                             0 (PERF_TYPE_HARDWARE)
>    size                             136
>    config                           0x400000001
>    sample_type                      IDENTIFIER
>    read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>    disabled                         1
>    inherit                          1
>    enable_on_exec                   1
>    exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 6
> ...
> ```
> 
> After:
> ```
> $ perf stat -vv -e instructions true
> ...
> ------------------------------------------------------------
> perf_event_attr:
>    type                             0 (PERF_TYPE_HARDWARE)
>    size                             136
>    config                           0xa00000001 (cpu_atom/PERF_COUNT_HW_INSTRUCTIONS/)

Seems like a good idea, I'm always decoding these by eye.

Reviewed-by: James Clark <james.clark@linaro.org>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
@ 2025-03-05 13:44   ` James Clark
  2025-03-05 14:06     ` Ian Rogers
  2025-03-06  9:17   ` Mi, Dapeng
  1 sibling, 1 reply; 9+ messages in thread
From: James Clark @ 2025-03-05 13:44 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, Dominique Martinet, Andi Kleen,
	linux-perf-users, linux-kernel, Dapeng Mi, Thomas Falcon



On 05/03/2025 8:37 am, Ian Rogers wrote:
> In the case of '{instructions,slots},faults,topdown-retiring' the
> first event that must be grouped, slots, is ignored causing the
> topdown-retiring event not to be adjacent to the group it needs to be
> inserted into. Don't ignore the group members when computing the
> force_grouped_index.
> 
> Make the force_grouped_index be for the leader of the group it is
> within and always use it first rather than a group leader index so
> that topdown events may be sorted from one group into another.
> 
> Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
> Signed-off-by: Ian Rogers <irogers@google.com>

Testing on Arm seems ok, but presumably this doesn't change anything 
there because arch_evsel__must_be_in_group() is always false.

On x86 I ran into the topdown metrics not opening on cpu_core at all, so 
I'm not sure if I'm able to test that the original issue is fixed on my 
machine. From looking at the link the issue is that the ungrouped 
topdown event is "<not supported>", but I always see that regardless of 
grouping despite perf list saying it exists:

  $ perf list --unit cpu_core | grep -i topdown
   topdown-bad-spec OR cpu_core/topdown-bad-spec/     [Kernel PMU event]
   topdown-be-bound OR cpu_core/topdown-be-bound/     [Kernel PMU event]
   topdown-br-mispredict OR cpu_core/topdown-br-mispredict/[Kernel PMU 
event]
   topdown-fe-bound OR cpu_core/topdown-fe-bound/     [Kernel PMU event]
   topdown-fetch-lat OR cpu_core/topdown-fetch-lat/   [Kernel PMU event]
   topdown-heavy-ops OR cpu_core/topdown-heavy-ops/   [Kernel PMU event]
   topdown-mem-bound OR cpu_core/topdown-mem-bound/   [Kernel PMU event]
   topdown-retiring OR cpu_core/topdown-retiring/     [Kernel PMU event]
   topdown.backend_bound_slots
   topdown.bad_spec_slots
   topdown.br_mispredict_slots
   topdown.memory_bound_slots
        [TOPDOWN.MEMORY_BOUND_SLOTS. Unit: cpu_core]


  $ sudo perf stat -e topdown-retiring -- true
  Performance counter stats for 'true':
      <not counted>   cpu_atom/topdown-retiring/           (0.00%)
    <not supported>   cpu_core/topdown-retiring/


  $ sudo perf stat -e topdown-retiring -vvv -- true
Control descriptor is not initialized
Opening: topdown-retiring
------------------------------------------------------------
perf_event_attr:
   type                             10 (cpu_atom)
   size                             136
   config                           0xc2 (topdown-retiring)
   sample_type                      IDENTIFIER
   read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
   disabled                         1
   inherit                          1
   enable_on_exec                   1
------------------------------------------------------------
sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8 = 3
Opening: topdown-retiring
------------------------------------------------------------
perf_event_attr:
   type                             4 (cpu_core)
   size                             136
   config                           0x8000 (topdown-retiring)
   sample_type                      IDENTIFIER
   read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
   disabled                         1
   inherit                          1
   enable_on_exec                   1
------------------------------------------------------------
sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8
sys_perf_event_open failed, error -22
switching off exclude_guest for PMU cpu_core
Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, 
falling back to no-inherit.
Warning:
topdown-retiring event is not supported by the kernel.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-05 13:44   ` James Clark
@ 2025-03-05 14:06     ` Ian Rogers
  2025-03-05 15:59       ` James Clark
  0 siblings, 1 reply; 9+ messages in thread
From: Ian Rogers @ 2025-03-05 14:06 UTC (permalink / raw)
  To: James Clark
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, Dominique Martinet, Andi Kleen,
	linux-perf-users, linux-kernel, Dapeng Mi, Thomas Falcon

On Wed, Mar 5, 2025 at 5:44 AM James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 05/03/2025 8:37 am, Ian Rogers wrote:
> > In the case of '{instructions,slots},faults,topdown-retiring' the
> > first event that must be grouped, slots, is ignored causing the
> > topdown-retiring event not to be adjacent to the group it needs to be
> > inserted into. Don't ignore the group members when computing the
> > force_grouped_index.
> >
> > Make the force_grouped_index be for the leader of the group it is
> > within and always use it first rather than a group leader index so
> > that topdown events may be sorted from one group into another.
> >
> > Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> > Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
> > Signed-off-by: Ian Rogers <irogers@google.com>
>
> Testing on Arm seems ok, but presumably this doesn't change anything
> there because arch_evsel__must_be_in_group() is always false.
>
> On x86 I ran into the topdown metrics not opening on cpu_core at all, so
> I'm not sure if I'm able to test that the original issue is fixed on my
> machine. From looking at the link the issue is that the ungrouped
> topdown event is "<not supported>", but I always see that regardless of
> grouping despite perf list saying it exists:
>
>   $ perf list --unit cpu_core | grep -i topdown
>    topdown-bad-spec OR cpu_core/topdown-bad-spec/     [Kernel PMU event]
>    topdown-be-bound OR cpu_core/topdown-be-bound/     [Kernel PMU event]
>    topdown-br-mispredict OR cpu_core/topdown-br-mispredict/[Kernel PMU
> event]
>    topdown-fe-bound OR cpu_core/topdown-fe-bound/     [Kernel PMU event]
>    topdown-fetch-lat OR cpu_core/topdown-fetch-lat/   [Kernel PMU event]
>    topdown-heavy-ops OR cpu_core/topdown-heavy-ops/   [Kernel PMU event]
>    topdown-mem-bound OR cpu_core/topdown-mem-bound/   [Kernel PMU event]
>    topdown-retiring OR cpu_core/topdown-retiring/     [Kernel PMU event]
>    topdown.backend_bound_slots
>    topdown.bad_spec_slots
>    topdown.br_mispredict_slots
>    topdown.memory_bound_slots
>         [TOPDOWN.MEMORY_BOUND_SLOTS. Unit: cpu_core]
>
>
>   $ sudo perf stat -e topdown-retiring -- true
>   Performance counter stats for 'true':
>       <not counted>   cpu_atom/topdown-retiring/           (0.00%)
>     <not supported>   cpu_core/topdown-retiring/
>
>
>   $ sudo perf stat -e topdown-retiring -vvv -- true
> Control descriptor is not initialized
> Opening: topdown-retiring
> ------------------------------------------------------------
> perf_event_attr:
>    type                             10 (cpu_atom)
>    size                             136
>    config                           0xc2 (topdown-retiring)
>    sample_type                      IDENTIFIER
>    read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>    disabled                         1
>    inherit                          1
>    enable_on_exec                   1
> ------------------------------------------------------------
> sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8 = 3
> Opening: topdown-retiring
> ------------------------------------------------------------
> perf_event_attr:
>    type                             4 (cpu_core)
>    size                             136
>    config                           0x8000 (topdown-retiring)
>    sample_type                      IDENTIFIER
>    read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>    disabled                         1
>    inherit                          1
>    enable_on_exec                   1
> ------------------------------------------------------------
> sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8
> sys_perf_event_open failed, error -22
> switching off exclude_guest for PMU cpu_core
> Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit,
> falling back to no-inherit.
> Warning:
> topdown-retiring event is not supported by the kernel.

Yep, unfortunately there is a requirement that a topdown event like
topdown-retiring is always programmed with slots on performance cores.
The slots event must be the group leader. You can see in metrics the
slots event as "+ 0 * slots":
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n754
```
        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound +
topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0
* slots",
```
and making it the group leader is done by the sorting:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/arch/x86/util/evlist.c?h=perf-tools-next#n67

We could probably add something to
parse_events__sort_events_and_fix_groups to inject the slots event,
but this hasn't been done yet.

My main concern with this change is there is some sensitivity to the
event ordering when parsing them in scripts. There's some context in:
https://lore.kernel.org/all/20230719001836.198363-1-irogers@google.com/
This change makes the topdown events appear first in the group always,
but as you say you only see that if you use those events, otherwise
things are unchanged.

Thanks for testing!
Ian

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-05 14:06     ` Ian Rogers
@ 2025-03-05 15:59       ` James Clark
  0 siblings, 0 replies; 9+ messages in thread
From: James Clark @ 2025-03-05 15:59 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, Dominique Martinet, Andi Kleen,
	linux-perf-users, linux-kernel, Dapeng Mi, Thomas Falcon



On 05/03/2025 2:06 pm, Ian Rogers wrote:
> On Wed, Mar 5, 2025 at 5:44 AM James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 05/03/2025 8:37 am, Ian Rogers wrote:
>>> In the case of '{instructions,slots},faults,topdown-retiring' the
>>> first event that must be grouped, slots, is ignored causing the
>>> topdown-retiring event not to be adjacent to the group it needs to be
>>> inserted into. Don't ignore the group members when computing the
>>> force_grouped_index.
>>>
>>> Make the force_grouped_index be for the leader of the group it is
>>> within and always use it first rather than a group leader index so
>>> that topdown events may be sorted from one group into another.
>>>
>>> Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>> Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>
>> Testing on Arm seems ok, but presumably this doesn't change anything
>> there because arch_evsel__must_be_in_group() is always false.
>>
>> On x86 I ran into the topdown metrics not opening on cpu_core at all, so
>> I'm not sure if I'm able to test that the original issue is fixed on my
>> machine. From looking at the link the issue is that the ungrouped
>> topdown event is "<not supported>", but I always see that regardless of
>> grouping despite perf list saying it exists:
>>
>>    $ perf list --unit cpu_core | grep -i topdown
>>     topdown-bad-spec OR cpu_core/topdown-bad-spec/     [Kernel PMU event]
>>     topdown-be-bound OR cpu_core/topdown-be-bound/     [Kernel PMU event]
>>     topdown-br-mispredict OR cpu_core/topdown-br-mispredict/[Kernel PMU
>> event]
>>     topdown-fe-bound OR cpu_core/topdown-fe-bound/     [Kernel PMU event]
>>     topdown-fetch-lat OR cpu_core/topdown-fetch-lat/   [Kernel PMU event]
>>     topdown-heavy-ops OR cpu_core/topdown-heavy-ops/   [Kernel PMU event]
>>     topdown-mem-bound OR cpu_core/topdown-mem-bound/   [Kernel PMU event]
>>     topdown-retiring OR cpu_core/topdown-retiring/     [Kernel PMU event]
>>     topdown.backend_bound_slots
>>     topdown.bad_spec_slots
>>     topdown.br_mispredict_slots
>>     topdown.memory_bound_slots
>>          [TOPDOWN.MEMORY_BOUND_SLOTS. Unit: cpu_core]
>>
>>
>>    $ sudo perf stat -e topdown-retiring -- true
>>    Performance counter stats for 'true':
>>        <not counted>   cpu_atom/topdown-retiring/           (0.00%)
>>      <not supported>   cpu_core/topdown-retiring/
>>
>>
>>    $ sudo perf stat -e topdown-retiring -vvv -- true
>> Control descriptor is not initialized
>> Opening: topdown-retiring
>> ------------------------------------------------------------
>> perf_event_attr:
>>     type                             10 (cpu_atom)
>>     size                             136
>>     config                           0xc2 (topdown-retiring)
>>     sample_type                      IDENTIFIER
>>     read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>>     disabled                         1
>>     inherit                          1
>>     enable_on_exec                   1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8 = 3
>> Opening: topdown-retiring
>> ------------------------------------------------------------
>> perf_event_attr:
>>     type                             4 (cpu_core)
>>     size                             136
>>     config                           0x8000 (topdown-retiring)
>>     sample_type                      IDENTIFIER
>>     read_format                      TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>>     disabled                         1
>>     inherit                          1
>>     enable_on_exec                   1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid 151404  cpu -1  group_fd -1  flags 0x8
>> sys_perf_event_open failed, error -22
>> switching off exclude_guest for PMU cpu_core
>> Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit,
>> falling back to no-inherit.
>> Warning:
>> topdown-retiring event is not supported by the kernel.
> 
> Yep, unfortunately there is a requirement that a topdown event like
> topdown-retiring is always programmed with slots on performance cores.
> The slots event must be the group leader. You can see in metrics the
> slots event as "+ 0 * slots":
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n754
> ```
>          "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound +
> topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0
> * slots",
> ```
> and making it the group leader is done by the sorting:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/arch/x86/util/evlist.c?h=perf-tools-next#n67
> 
> We could probably add something to
> parse_events__sort_events_and_fix_groups to inject the slots event,
> but this hasn't been done yet.
> 
> My main concern with this change is there is some sensitivity to the
> event ordering when parsing them in scripts. There's some context in:
> https://lore.kernel.org/all/20230719001836.198363-1-irogers@google.com/
> This change makes the topdown events appear first in the group always,
> but as you say you only see that if you use those events, otherwise
> things are unchanged.
> 
> Thanks for testing!
> Ian

Ah ok got it. Yeah it works with slots in the group, and the topdown 
metrics work out of the box. I didn't realize there was that slots 
limitation.

Tested-by: James Clark <james.clark@linaro.org>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf
  2025-03-05  8:37 [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf Ian Rogers
  2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
  2025-03-05 11:06 ` [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf James Clark
@ 2025-03-05 21:39 ` Falcon, Thomas
  2 siblings, 0 replies; 9+ messages in thread
From: Falcon, Thomas @ 2025-03-05 21:39 UTC (permalink / raw)
  To: alexander.shishkin@linux.intel.com, ak@linux.intel.com,
	Hunter, Adrian, linux-kernel@vger.kernel.org,
	asmadeus@codewreck.org, linux-perf-users@vger.kernel.org,
	irogers@google.com, mingo@redhat.com, james.clark@linaro.org,
	kan.liang@linux.intel.com, mark.rutland@arm.com,
	peterz@infradead.org, dapeng1.mi@linux.intel.com, acme@kernel.org,
	jolsa@kernel.org, namhyung@kernel.org

On Wed, 2025-03-05 at 00:37 -0800, Ian Rogers wrote:
> Support the PMU name from the legacy hardware and hw_cache PMU
> extended types.  Remove some macros and make variables more intention
> revealing, rather than just being called "value".
> 
> Before:
> ```
> $ perf stat -vv -e instructions true
> ...
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             136
>   config                           0xa00000001
>   sample_type                      IDENTIFIER
>   read_format                     
> TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>   disabled                         1
>   inherit                          1
>   enable_on_exec                   1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             136
>   config                           0x400000001
>   sample_type                      IDENTIFIER
>   read_format                     
> TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>   disabled                         1
>   inherit                          1
>   enable_on_exec                   1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181636  cpu -1  group_fd -1  flags 0x8 = 6
> ...
> ```
> 
> After:
> ```
> $ perf stat -vv -e instructions true
> ...
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             136
>   config                           0xa00000001
> (cpu_atom/PERF_COUNT_HW_INSTRUCTIONS/)
>   sample_type                      IDENTIFIER
>   read_format                     
> TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>   disabled                         1
>   inherit                          1
>   enable_on_exec                   1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181724  cpu -1  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             136
>   config                           0x400000001
> (cpu_core/PERF_COUNT_HW_INSTRUCTIONS/)
>   sample_type                      IDENTIFIER
>   read_format                     
> TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
>   disabled                         1
>   inherit                          1
>   enable_on_exec                   1
> ------------------------------------------------------------
> sys_perf_event_open: pid 181724  cpu -1  group_fd -1  flags 0x8 = 6
> ...
> ```
> 
> Signed-off-by: Ian Rogers <irogers@google.com>

Tested on an Alder Lake.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>

> ---
>  tools/perf/util/perf_event_attr_fprintf.c | 124 +++++++++++++-------
> --
>  1 file changed, 75 insertions(+), 49 deletions(-)
> 
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c
> b/tools/perf/util/perf_event_attr_fprintf.c
> index c7f3543b9921..66b666d9ce64 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t
> size, u64 value)
>  #define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
>  static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32
> type)
>  {
> -	if (pmu)
> -		return pmu->name;
> -
>  	switch (type) {
>  	ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
>  	ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
>  	ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
>  	ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE)
> -	ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW)
>  	ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT)
> +	case PERF_TYPE_RAW:
> +		return pmu ? pmu->name : "PERF_TYPE_RAW";
>  	default:
> -		return NULL;
> +		return pmu ? pmu->name : NULL;
>  	}
>  }
>  
>  static const char *stringify_perf_hw_id(u64 value)
>  {
> -	switch (value) {
> +	switch (value & PERF_HW_EVENT_MASK) {
>  	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES)
>  	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS)
>  	ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES)
> @@ -169,79 +167,100 @@ static const char *stringify_perf_sw_id(u64
> value)
>  }
>  #undef ENUM_ID_TO_STR_CASE
>  
> -#define PRINT_ID(_s, _f)					\
> -do {								\
> -	const char *__s = _s;					\
> -	if (__s == NULL)					\
> -		snprintf(buf, size, _f,
> value);			\
> -	else							\
> -		snprintf(buf, size, _f" (%s)", value, __s);	\
> -} while (0)
> -#define print_id_unsigned(_s)	PRINT_ID(_s, "%"PRIu64)
> -#define print_id_hex(_s)	PRINT_ID(_s, "%#"PRIx64)
> +static void print_id_unsigned(char *buf, size_t size, u64 value,
> const char *s)
> +{
> +	if (s == NULL)
> +		snprintf(buf, size, "%"PRIu64, value);
> +	else
> +		snprintf(buf, size, "%"PRIu64" (%s)", value, s);
> +}
> +
> +static void print_id_hex(char *buf, size_t size, u64 value, const
> char *s)
> +{
> +	if (s == NULL)
> +		snprintf(buf, size, "%#"PRIx64, value);
> +	else
> +		snprintf(buf, size, "%#"PRIx64" (%s)", value, s);
> +}
>  
> -static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t
> size, u64 value)
> +static void __p_type_id(char *buf, size_t size, struct perf_pmu
> *pmu, u32 type)
>  {
> -	print_id_unsigned(stringify_perf_type_id(pmu, value));
> +	print_id_unsigned(buf, size, type,
> stringify_perf_type_id(pmu, type));
>  }
>  
> -static void __p_config_hw_id(char *buf, size_t size, u64 value)
> +static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu
> *pmu, u64 config)
>  {
> -	print_id_hex(stringify_perf_hw_id(value));
> +	const char *name = stringify_perf_hw_id(config);
> +
> +	if (name == NULL) {
> +		if (pmu == NULL) {
> +			snprintf(buf, size, "%#"PRIx64, config);
> +		} else {
> +			snprintf(buf, size, "%#"PRIx64"
> (%s/config=%#"PRIx64"/)", config, pmu->name,
> +				 config);
> +		}
> +	} else {
> +		if (pmu == NULL)
> +			snprintf(buf, size, "%#"PRIx64" (%s)",
> config, name);
> +		else
> +			snprintf(buf, size, "%#"PRIx64" (%s/%s/)",
> config, pmu->name, name);
> +	}
>  }
>  
> -static void __p_config_sw_id(char *buf, size_t size, u64 value)
> +static void __p_config_sw_id(char *buf, size_t size, u64 id)
>  {
> -	print_id_hex(stringify_perf_sw_id(value));
> +	print_id_hex(buf, size, id, stringify_perf_sw_id(id));
>  }
>  
> -static void __p_config_hw_cache_id(char *buf, size_t size, u64
> value)
> +static void __p_config_hw_cache_id(char *buf, size_t size, struct
> perf_pmu *pmu, u64 config)
>  {
> -	const char *hw_cache_str = stringify_perf_hw_cache_id(value
> & 0xff);
> +	const char *hw_cache_str = stringify_perf_hw_cache_id(config
> & 0xff);
>  	const char *hw_cache_op_str =
> -		stringify_perf_hw_cache_op_id((value & 0xff00) >>
> 8);
> +		stringify_perf_hw_cache_op_id((config & 0xff00) >>
> 8);
>  	const char *hw_cache_op_result_str =
> -		stringify_perf_hw_cache_op_result_id((value &
> 0xff0000) >> 16);
> -
> -	if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
> -	    hw_cache_op_result_str == NULL) {
> -		snprintf(buf, size, "%#"PRIx64, value);
> +		stringify_perf_hw_cache_op_result_id((config &
> 0xff0000) >> 16);
> +
> +	if (hw_cache_str == NULL || hw_cache_op_str == NULL ||
> hw_cache_op_result_str == NULL) {
> +		if (pmu == NULL) {
> +			snprintf(buf, size, "%#"PRIx64, config);
> +		} else {
> +			snprintf(buf, size, "%#"PRIx64"
> (%s/config=%#"PRIx64"/)", config, pmu->name,
> +				 config);
> +		}
>  	} else {
> -		snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)",
> value,
> -			 hw_cache_op_result_str, hw_cache_op_str,
> hw_cache_str);
> +		if (pmu == NULL) {
> +			snprintf(buf, size, "%#"PRIx64" (%s | %s |
> %s)", config,
> +				 hw_cache_op_result_str,
> hw_cache_op_str, hw_cache_str);
> +		} else {
> +			snprintf(buf, size, "%#"PRIx64" (%s/%s | %s
> | %s/)", config, pmu->name,
> +				 hw_cache_op_result_str,
> hw_cache_op_str, hw_cache_str);
> +		}
>  	}
>  }
>  
> -static void __p_config_tracepoint_id(char *buf, size_t size, u64
> value)
> +static void __p_config_tracepoint_id(char *buf, size_t size, u64 id)
>  {
> -	char *str = tracepoint_id_to_name(value);
> +	char *str = tracepoint_id_to_name(id);
>  
> -	print_id_hex(str);
> +	print_id_hex(buf, size, id, str);
>  	free(str);
>  }
>  
> -static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t
> size, u32 type, u64 value)
> +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t
> size, u32 type, u64 config)
>  {
> -	const char *name = perf_pmu__name_from_config(pmu, value);
> -
> -	if (name) {
> -		print_id_hex(name);
> -		return;
> -	}
>  	switch (type) {
>  	case PERF_TYPE_HARDWARE:
> -		return __p_config_hw_id(buf, size, value);
> +		return __p_config_hw_id(buf, size, pmu, config);
>  	case PERF_TYPE_SOFTWARE:
> -		return __p_config_sw_id(buf, size, value);
> +		return __p_config_sw_id(buf, size, config);
>  	case PERF_TYPE_HW_CACHE:
> -		return __p_config_hw_cache_id(buf, size, value);
> +		return __p_config_hw_cache_id(buf, size, pmu,
> config);
>  	case PERF_TYPE_TRACEPOINT:
> -		return __p_config_tracepoint_id(buf, size, value);
> +		return __p_config_tracepoint_id(buf, size, config);
>  	case PERF_TYPE_RAW:
>  	case PERF_TYPE_BREAKPOINT:
>  	default:
> -		snprintf(buf, size, "%#"PRIx64, value);
> -		return;
> +		return print_id_hex(buf, size, config,
> perf_pmu__name_from_config(pmu, config));
>  	}
>  }
>  
> @@ -253,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu,
> char *buf, size_t size, u32 type
>  #define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
>  #define p_branch_sample_type(val) __p_branch_sample_type(buf,
> BUF_SIZE, val)
>  #define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
> -#define p_type_id(val)		__p_type_id(pmu, buf, BUF_SIZE, val)
> +#define p_type_id(val)		__p_type_id(buf, BUF_SIZE, pmu, val)
>  #define p_config_id(val)	__p_config_id(pmu, buf, BUF_SIZE,
> attr->type, val)
>  
>  #define PRINT_ATTRn(_n, _f, _p, _a)			\
> @@ -273,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct
> perf_event_attr *attr,
>  	char buf[BUF_SIZE];
>  	int ret = 0;
>  
> +	if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type
> == PERF_TYPE_HW_CACHE)) {
> +		u32 extended_type = attr->config >>
> PERF_PMU_TYPE_SHIFT;
> +
> +		if (extended_type)
> +			pmu =
> perf_pmus__find_by_type(extended_type);
> +	}
> +
>  	PRINT_ATTRn("type", type, p_type_id, true);
>  	PRINT_ATTRf(size, p_unsigned);
>  	PRINT_ATTRn("config", config, p_config_id, true);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
  2025-03-05 13:44   ` James Clark
@ 2025-03-06  9:17   ` Mi, Dapeng
  2025-03-06 17:25     ` Ian Rogers
  1 sibling, 1 reply; 9+ messages in thread
From: Mi, Dapeng @ 2025-03-06  9:17 UTC (permalink / raw)
  To: Ian Rogers, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, James Clark, Dominique Martinet,
	Andi Kleen, linux-perf-users, linux-kernel, Thomas Falcon


On 3/5/2025 4:37 PM, Ian Rogers wrote:
> In the case of '{instructions,slots},faults,topdown-retiring' the
> first event that must be grouped, slots, is ignored causing the
> topdown-retiring event not to be adjacent to the group it needs to be
> inserted into. Don't ignore the group members when computing the
> force_grouped_index.
>
> Make the force_grouped_index be for the leader of the group it is
> within and always use it first rather than a group leader index so
> that topdown events may be sorted from one group into another.
>
> Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/parse-events.c | 54 ++++++++++++++++++----------------
>  1 file changed, 28 insertions(+), 26 deletions(-)
>
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 35e48fe56dfa..cf32abc496e9 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -1983,31 +1983,30 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
>  	bool lhs_has_group, rhs_has_group;
>  
>  	/*
> -	 * First sort by grouping/leader. Read the leader idx only if the evsel
> -	 * is part of a group, by default ungrouped events will be sorted
> -	 * relative to grouped events based on where the first ungrouped event
> -	 * occurs. If both events don't have a group we want to fall-through to
> -	 * the arch specific sorting, that can reorder and fix things like
> -	 * Intel's topdown events.
> +	 * Get the indexes of the 2 events to sort. If the events are
> +	 * in groups then the leader's index is used otherwise the
> +	 * event's index is used. Events in the same group will be
> +	 * sorted by PMU name. An index may be forced for events that
> +	 * must be in the same group, namely Intel topdown events.
> +	 * When everything is identical arch specific sorting is used,
> +	 * that can reorder and fix things like Intel's topdown
> +	 * events.
>  	 */
> -	if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
> -		lhs_has_group = true;
> +	lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
> +	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs))
> +		lhs_sort_idx = *force_grouped_idx;
> +	else if (lhs_has_group)
>  		lhs_sort_idx = lhs_core->leader->idx;
> -	} else {
> -		lhs_has_group = false;
> -		lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
> -			? *force_grouped_idx
> -			: lhs_core->idx;
> -	}
> -	if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
> -		rhs_has_group = true;
> +	else
> +		lhs_sort_idx = lhs_core->idx;
> +	rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
> +
> +	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs))
> +		rhs_sort_idx = *force_grouped_idx;
> +	else if (rhs_has_group)
>  		rhs_sort_idx = rhs_core->leader->idx;
> -	} else {
> -		rhs_has_group = false;
> -		rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
> -			? *force_grouped_idx
> -			: rhs_core->idx;
> -	}
> +	else
> +		rhs_sort_idx = rhs_core->idx;
>  
>  	if (lhs_sort_idx != rhs_sort_idx)
>  		return lhs_sort_idx - rhs_sort_idx;
> @@ -2055,10 +2054,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
>  		 */
>  		pos->core.idx = idx++;
>  
> -		/* Remember an index to sort all forced grouped events together to. */
> -		if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
> -		    arch_evsel__must_be_in_group(pos))
> -			force_grouped_idx = pos->core.idx;
> +		/*
> +		 * Remember an index to sort all forced grouped events
> +		 * together to. Use the group leader as some events
> +		 * must appear first within the group.
> +		 */
> +		if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
> +			force_grouped_idx = pos_leader->core.idx;
>  	}
>  
>  	/* Sort events. */

Hi Ian,

With this fix,  this topdown metrics sequence
"{instructions,slots},faults,topdown-retiring" indeed works on non-hybrid
platform, like SPR, but it still fails on hybrid platform.

Here is the result on Intel LNL platform.

./perf stat -e "{instructions,slots},faults,topdown-retiring" true
WARNING: events were regrouped to match PMUs

 Performance counter stats for 'true':

   *<not supported> *     cpu_core/topdown-retiring/u
           146,710      instructions:u
     <not counted>     
cpu_core/slots/u                                                        (0.00%)
     <not counted>     
instructions:u                                                          (0.00%)
                49      faults:u
           195,855      cpu_atom/topdown-retiring/u

       0.001367139 seconds time elapsed

       0.001402000 seconds user
       0.000000000 seconds sys

the "cpu_core/topdown-retiring/" event is incorrectly moved to the head and
becomes the group leader.

To thoroughly fix this issue on hybrid platform, we need an extra below
change.

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 91c2b2e2c6bd..1f7772d4db6e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2006,7 +2006,7 @@ static int evlist__cmp(void *_fg_idx, const struct
list_head *l, const struct li
                return lhs_sort_idx - rhs_sort_idx;

        /* Group by PMU if there is a group. Groups can't span PMUs. */
-       if (lhs_has_group && rhs_has_group) {
+       if (lhs_has_group || rhs_has_group) {
                lhs_pmu_name = lhs->group_pmu_name;
                rhs_pmu_name = rhs->group_pmu_name;
                ret = strcmp(lhs_pmu_name, rhs_pmu_name);

Besides, since we support this new topdown events sequence regroup, the
comments and tests are need to be updated accordingly.

diff --git a/tools/perf/arch/x86/util/evlist.c
b/tools/perf/arch/x86/util/evlist.c
index 447a734e591c..8d7a7c4acd4b 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -39,28 +39,21 @@ int arch_evlist__cmp(const struct evsel *lhs, const
struct evsel *rhs)
         *         26,319,024      slots
         *          2,427,791      instructions
         *          2,683,508      topdown-retiring
-        *
-        * If slots event and topdown metrics events are not in same group, the
-        * topdown metrics events must be first event after the slots event
group,
-        * otherwise topdown metrics events can't be regrouped correctly, e.g.
-        *
-        * a. perf stat -e "{instructions,slots},cycles,topdown-retiring"
-C0 sleep 1
+        * e. slots event and metrics event are in a group and not adjacent
+        *    perf stat -e "{instructions,slots},cycles,topdown-retiring"
-C0 sleep 1
         *    WARNING: events were regrouped to match PMUs
-        *     Performance counter stats for 'CPU(s) 0':
-        *         17,923,134      slots
-        *          2,154,855      instructions
-        *          3,015,058      cycles
-        *    <not supported>      topdown-retiring
-        *
-        * If slots event and topdown metrics events are in two groups, the
group which
-        * has topdown metrics events must contain only the topdown metrics
event,
-        * otherwise topdown metrics event can't be regrouped correctly as
well, e.g.
-        *
-        * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}"
-C0 sleep 1
+        *    Performance counter stats for 'true':
+        *         78,452,058      slots
+        *         10,767,929      topdown-retiring
+        *          9,438,226      instructions
+        *         13,080,988      cycles
+        * f. slots event and metrics event are in two groups and not adjacent
+        *    perf stat -e "{instructions,slots},{cycles,topdown-retiring}"
-C0 sleep 1
         *    WARNING: events were regrouped to match PMUs
-        *    Error:
-        *    The sys_perf_event_open() syscall returned with 22 (Invalid
argument) for
-        *    event (topdown-retiring)
+        *         68,433,522      slots
+        *          8,856,102      topdown-retiring
+        *          7,791,494      instructions
+        *         11,469,513      cycles
         */
        if (topdown_sys_has_perf_metrics() &&
            (arch_evsel__must_be_in_group(lhs) ||
arch_evsel__must_be_in_group(rhs))) {
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 68323d636fb7..a1b847c16f07 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -97,6 +97,18 @@ test_topdown_groups() {
     err=1
     return
   fi
+  if perf stat -e '{instructions,slots},cycles,topdown-retiring' true 2>&1
| grep -E -q "<not supported>"
+  then
+    echo "Topdown event group test [Failed non-adjacent topdown metrics
group not move into slots group]"
+    err=1
+    return
+  fi
+  if perf stat -e '{instructions,slots},{cycles,topdown-retiring}' true
2>&1 | grep -E -q "<not supported>"
+  then
+    echo "Topdown event group test [Failed non-adjacent topdown metrics
group not merge into slots group]"
+    err=1
+    return
+  fi
   if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not
supported>"
   then
     echo "Topdown event group test [Failed raw format slots not reordered
first]"

Thanks,

Dapeng Mi



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting
  2025-03-06  9:17   ` Mi, Dapeng
@ 2025-03-06 17:25     ` Ian Rogers
  0 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2025-03-06 17:25 UTC (permalink / raw)
  To: Mi, Dapeng
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Kan Liang, James Clark, Dominique Martinet,
	Andi Kleen, linux-perf-users, linux-kernel, Thomas Falcon

On Thu, Mar 6, 2025 at 1:17 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 3/5/2025 4:37 PM, Ian Rogers wrote:
> > In the case of '{instructions,slots},faults,topdown-retiring' the
> > first event that must be grouped, slots, is ignored causing the
> > topdown-retiring event not to be adjacent to the group it needs to be
> > inserted into. Don't ignore the group members when computing the
> > force_grouped_index.
> >
> > Make the force_grouped_index be for the leader of the group it is
> > within and always use it first rather than a group leader index so
> > that topdown events may be sorted from one group into another.
> >
> > Reported-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> > Closes: https://lore.kernel.org/lkml/20250224083306.71813-2-dapeng1.mi@linux.intel.com/
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/parse-events.c | 54 ++++++++++++++++++----------------
> >  1 file changed, 28 insertions(+), 26 deletions(-)
> >
> > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> > index 35e48fe56dfa..cf32abc496e9 100644
> > --- a/tools/perf/util/parse-events.c
> > +++ b/tools/perf/util/parse-events.c
> > @@ -1983,31 +1983,30 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
> >       bool lhs_has_group, rhs_has_group;
> >
> >       /*
> > -      * First sort by grouping/leader. Read the leader idx only if the evsel
> > -      * is part of a group, by default ungrouped events will be sorted
> > -      * relative to grouped events based on where the first ungrouped event
> > -      * occurs. If both events don't have a group we want to fall-through to
> > -      * the arch specific sorting, that can reorder and fix things like
> > -      * Intel's topdown events.
> > +      * Get the indexes of the 2 events to sort. If the events are
> > +      * in groups then the leader's index is used otherwise the
> > +      * event's index is used. Events in the same group will be
> > +      * sorted by PMU name. An index may be forced for events that
> > +      * must be in the same group, namely Intel topdown events.
> > +      * When everything is identical arch specific sorting is used,
> > +      * that can reorder and fix things like Intel's topdown
> > +      * events.
> >        */
> > -     if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
> > -             lhs_has_group = true;
> > +     lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
> > +     if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs))
> > +             lhs_sort_idx = *force_grouped_idx;
> > +     else if (lhs_has_group)
> >               lhs_sort_idx = lhs_core->leader->idx;
> > -     } else {
> > -             lhs_has_group = false;
> > -             lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
> > -                     ? *force_grouped_idx
> > -                     : lhs_core->idx;
> > -     }
> > -     if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
> > -             rhs_has_group = true;
> > +     else
> > +             lhs_sort_idx = lhs_core->idx;
> > +     rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
> > +
> > +     if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs))
> > +             rhs_sort_idx = *force_grouped_idx;
> > +     else if (rhs_has_group)
> >               rhs_sort_idx = rhs_core->leader->idx;
> > -     } else {
> > -             rhs_has_group = false;
> > -             rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
> > -                     ? *force_grouped_idx
> > -                     : rhs_core->idx;
> > -     }
> > +     else
> > +             rhs_sort_idx = rhs_core->idx;
> >
> >       if (lhs_sort_idx != rhs_sort_idx)
> >               return lhs_sort_idx - rhs_sort_idx;
> > @@ -2055,10 +2054,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
> >                */
> >               pos->core.idx = idx++;
> >
> > -             /* Remember an index to sort all forced grouped events together to. */
> > -             if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
> > -                 arch_evsel__must_be_in_group(pos))
> > -                     force_grouped_idx = pos->core.idx;
> > +             /*
> > +              * Remember an index to sort all forced grouped events
> > +              * together to. Use the group leader as some events
> > +              * must appear first within the group.
> > +              */
> > +             if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos))
> > +                     force_grouped_idx = pos_leader->core.idx;
> >       }
> >
> >       /* Sort events. */
>
> Hi Ian,
>
> With this fix,  this topdown metrics sequence
> "{instructions,slots},faults,topdown-retiring" indeed works on non-hybrid
> platform, like SPR, but it still fails on hybrid platform.
>
> Here is the result on Intel LNL platform.
>
> ./perf stat -e "{instructions,slots},faults,topdown-retiring" true
> WARNING: events were regrouped to match PMUs
>
>  Performance counter stats for 'true':
>
>    *<not supported> *     cpu_core/topdown-retiring/u
>            146,710      instructions:u
>      <not counted>
> cpu_core/slots/u                                                        (0.00%)
>      <not counted>
> instructions:u                                                          (0.00%)
>                 49      faults:u
>            195,855      cpu_atom/topdown-retiring/u
>
>        0.001367139 seconds time elapsed
>
>        0.001402000 seconds user
>        0.000000000 seconds sys
>
> the "cpu_core/topdown-retiring/" event is incorrectly moved to the head and
> becomes the group leader.
>
> To thoroughly fix this issue on hybrid platform, we need an extra below
> change.
>
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 91c2b2e2c6bd..1f7772d4db6e 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -2006,7 +2006,7 @@ static int evlist__cmp(void *_fg_idx, const struct
> list_head *l, const struct li
>                 return lhs_sort_idx - rhs_sort_idx;
>
>         /* Group by PMU if there is a group. Groups can't span PMUs. */
> -       if (lhs_has_group && rhs_has_group) {
> +       if (lhs_has_group || rhs_has_group) {
>                 lhs_pmu_name = lhs->group_pmu_name;
>                 rhs_pmu_name = rhs->group_pmu_name;
>                 ret = strcmp(lhs_pmu_name, rhs_pmu_name);
>
> Besides, since we support this new topdown events sequence regroup, the
> comments and tests are need to be updated accordingly.

Thanks, I was able to reproduce the problem and agree with extending
the tests. I think this needs more comments so I'll update and repost
the patch with those.

Thanks,
Ian

> diff --git a/tools/perf/arch/x86/util/evlist.c
> b/tools/perf/arch/x86/util/evlist.c
> index 447a734e591c..8d7a7c4acd4b 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -39,28 +39,21 @@ int arch_evlist__cmp(const struct evsel *lhs, const
> struct evsel *rhs)
>          *         26,319,024      slots
>          *          2,427,791      instructions
>          *          2,683,508      topdown-retiring
> -        *
> -        * If slots event and topdown metrics events are not in same group, the
> -        * topdown metrics events must be first event after the slots event
> group,
> -        * otherwise topdown metrics events can't be regrouped correctly, e.g.
> -        *
> -        * a. perf stat -e "{instructions,slots},cycles,topdown-retiring"
> -C0 sleep 1
> +        * e. slots event and metrics event are in a group and not adjacent
> +        *    perf stat -e "{instructions,slots},cycles,topdown-retiring"
> -C0 sleep 1
>          *    WARNING: events were regrouped to match PMUs
> -        *     Performance counter stats for 'CPU(s) 0':
> -        *         17,923,134      slots
> -        *          2,154,855      instructions
> -        *          3,015,058      cycles
> -        *    <not supported>      topdown-retiring
> -        *
> -        * If slots event and topdown metrics events are in two groups, the
> group which
> -        * has topdown metrics events must contain only the topdown metrics
> event,
> -        * otherwise topdown metrics event can't be regrouped correctly as
> well, e.g.
> -        *
> -        * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}"
> -C0 sleep 1
> +        *    Performance counter stats for 'true':
> +        *         78,452,058      slots
> +        *         10,767,929      topdown-retiring
> +        *          9,438,226      instructions
> +        *         13,080,988      cycles
> +        * f. slots event and metrics event are in two groups and not adjacent
> +        *    perf stat -e "{instructions,slots},{cycles,topdown-retiring}"
> -C0 sleep 1
>          *    WARNING: events were regrouped to match PMUs
> -        *    Error:
> -        *    The sys_perf_event_open() syscall returned with 22 (Invalid
> argument) for
> -        *    event (topdown-retiring)
> +        *         68,433,522      slots
> +        *          8,856,102      topdown-retiring
> +        *          7,791,494      instructions
> +        *         11,469,513      cycles
>          */
>         if (topdown_sys_has_perf_metrics() &&
>             (arch_evsel__must_be_in_group(lhs) ||
> arch_evsel__must_be_in_group(rhs))) {
> diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
> index 68323d636fb7..a1b847c16f07 100755
> --- a/tools/perf/tests/shell/stat.sh
> +++ b/tools/perf/tests/shell/stat.sh
> @@ -97,6 +97,18 @@ test_topdown_groups() {
>      err=1
>      return
>    fi
> +  if perf stat -e '{instructions,slots},cycles,topdown-retiring' true 2>&1
> | grep -E -q "<not supported>"
> +  then
> +    echo "Topdown event group test [Failed non-adjacent topdown metrics
> group not move into slots group]"
> +    err=1
> +    return
> +  fi
> +  if perf stat -e '{instructions,slots},{cycles,topdown-retiring}' true
> 2>&1 | grep -E -q "<not supported>"
> +  then
> +    echo "Topdown event group test [Failed non-adjacent topdown metrics
> group not merge into slots group]"
> +    err=1
> +    return
> +  fi
>    if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not
> supported>"
>    then
>      echo "Topdown event group test [Failed raw format slots not reordered
> first]"
>
> Thanks,
>
> Dapeng Mi
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-03-06 17:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-05  8:37 [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf Ian Rogers
2025-03-05  8:37 ` [PATCH v1 2/2] perf parse-events: Corrections to topdown sorting Ian Rogers
2025-03-05 13:44   ` James Clark
2025-03-05 14:06     ` Ian Rogers
2025-03-05 15:59       ` James Clark
2025-03-06  9:17   ` Mi, Dapeng
2025-03-06 17:25     ` Ian Rogers
2025-03-05 11:06 ` [PATCH v1 1/2] perf tools: Improve handling of hybrid PMUs in perf_event_attr__fprintf James Clark
2025-03-05 21:39 ` Falcon, Thomas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).