linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check
  2024-07-08 14:42 ` [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check Dapeng Mi
@ 2024-07-08 13:28   ` Liang, Kan
  2024-07-09  1:58     ` Mi, Dapeng
  0 siblings, 1 reply; 16+ messages in thread
From: Liang, Kan @ 2024-07-08 13:28 UTC (permalink / raw)
  To: Dapeng Mi, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi



On 2024-07-08 10:42 a.m., Dapeng Mi wrote:
> It's not complete to check whether an event is a topdown slots or
> topdown metrics event by only comparing the event name since user
> may assign the event by RAW format, e.g.
> 
> perf stat -e '{instructions,cpu/r400/,cpu/r8300/}' sleep 1
> 
>  Performance counter stats for 'sleep 1':
> 
>      <not counted>      instructions
>      <not counted>      cpu/r400/
>    <not supported>      cpu/r8300/
> 
>        1.002917796 seconds time elapsed
> 
>        0.002955000 seconds user
>        0.000000000 seconds sys
> 
> The RAW format slots and topdown-be-bound events are not recognized and
> not regroup the events, and eventually cause error.
> 
> Thus add two helpers arch_is_topdown_slots()/arch_is_topdown_metrics()
> to detect whether an event is topdown slots/metrics event by comparing
> the event config directly, and use these two helpers to replace the
> original event name comparisons.
> 
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  tools/perf/arch/x86/util/evlist.c  |  8 +++---
>  tools/perf/arch/x86/util/evsel.c   |  3 ++-
>  tools/perf/arch/x86/util/topdown.c | 43 +++++++++++++++++++++++++++++-
>  tools/perf/arch/x86/util/topdown.h |  2 ++
>  4 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index b1ce0c52d88d..332e8907f43e 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -78,14 +78,14 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>  	if (topdown_sys_has_perf_metrics() &&
>  	    (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
>  		/* Ensure the topdown slots comes first. */
> -		if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots"))
> +		if (arch_is_topdown_slots(lhs))
>  			return -1;
> -		if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots"))
> +		if (arch_is_topdown_slots(rhs))
>  			return 1;
>  		/* Followed by topdown events. */
> -		if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown"))
> +		if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
>  			return -1;
> -		if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown"))
> +		if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
>  			return 1;
>  	}
>  
> diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
> index 090d0f371891..181f2ba0bb2a 100644
> --- a/tools/perf/arch/x86/util/evsel.c
> +++ b/tools/perf/arch/x86/util/evsel.c
> @@ -6,6 +6,7 @@
>  #include "util/pmu.h"
>  #include "util/pmus.h"
>  #include "linux/string.h"
> +#include "topdown.h"
>  #include "evsel.h"
>  #include "util/debug.h"
>  #include "env.h"
> @@ -44,7 +45,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel)
>  	    strcasestr(evsel->name, "uops_retired.slots"))
>  		return false;
>  
> -	return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots");
> +	return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel);
>  }
>  
>  int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
> index 3f9a267d4501..e805065bb7e1 100644
> --- a/tools/perf/arch/x86/util/topdown.c
> +++ b/tools/perf/arch/x86/util/topdown.c
> @@ -32,6 +32,47 @@ bool topdown_sys_has_perf_metrics(void)
>  }
>  
>  #define TOPDOWN_SLOTS		0x0400
> +bool arch_is_topdown_slots(const struct evsel *evsel)
> +{
> +	if (evsel->core.attr.config == TOPDOWN_SLOTS)
> +		return true;
> +
> +	return false;
> +}
> +
> +static int compare_topdown_event(void *vstate, struct pmu_event_info *info)
> +{
> +	int *config = vstate;
> +	int event = 0;
> +	int umask = 0;
> +	char *str;
> +

The compare is only needed for the "topdown" event.
Check the name first before the sscanf and compare.

	if (!strcasestr(info->name, "topdown"))
		return 0;

> +	str = strcasestr(info->str, "event=");
> +	if (str)
> +		sscanf(str, "event=%x", &event);
> +
> +	str = strcasestr(info->str, "umask=");
> +	if (str)
> +		sscanf(str, "umask=%x", &umask);
> +
> +	if (strcasestr(info->name, "topdown") && event == 0 &&
> +	    *config == (event | umask << 8))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +bool arch_is_topdown_metrics(const struct evsel *evsel)
> +{
> +	struct perf_pmu *pmu = evsel__find_pmu(evsel);
> +	int config = evsel->core.attr.config;
> +

The topdown events are only available for the core PMU.
You may want to return earlier for the !core PMUs.

	if (!pmu || !pmu->is_core)
		return false;

Thanks,
Kan
> +	if (pmu && perf_pmu__for_each_event(pmu, false, &config,
> +					    compare_topdown_event))
> +		return true;
> +
> +	return false;
> +}
>  
>  /*
>   * Check whether a topdown group supports sample-read.
> @@ -44,7 +85,7 @@ bool arch_topdown_sample_read(struct evsel *leader)
>  	if (!evsel__sys_has_perf_metrics(leader))
>  		return false;
>  
> -	if (leader->core.attr.config == TOPDOWN_SLOTS)
> +	if (arch_is_topdown_slots(leader))
>  		return true;
>  
>  	return false;
> diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
> index 46bf9273e572..1bae9b1822d7 100644
> --- a/tools/perf/arch/x86/util/topdown.h
> +++ b/tools/perf/arch/x86/util/topdown.h
> @@ -3,5 +3,7 @@
>  #define _TOPDOWN_H 1
>  
>  bool topdown_sys_has_perf_metrics(void);
> +bool arch_is_topdown_slots(const struct evsel *evsel);
> +bool arch_is_topdown_metrics(const struct evsel *evsel);
>  
>  #endif

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests
  2024-07-08 14:42 ` [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests Dapeng Mi
@ 2024-07-08 13:40   ` Liang, Kan
  2024-07-09  5:27     ` Mi, Dapeng
  0 siblings, 1 reply; 16+ messages in thread
From: Liang, Kan @ 2024-07-08 13:40 UTC (permalink / raw)
  To: Dapeng Mi, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi



On 2024-07-08 10:42 a.m., Dapeng Mi wrote:
> Add counting and leader sampling tests to verify topdown events including
> raw format can be reordered correctly.
> 
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  tools/perf/tests/shell/record.sh | 6 ++++++
>  tools/perf/tests/shell/stat.sh   | 6 ++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
> index 8e3e66780fed..164f0cf9648e 100755
> --- a/tools/perf/tests/shell/record.sh
> +++ b/tools/perf/tests/shell/record.sh
> @@ -214,6 +214,12 @@ test_leader_sampling() {
>      index=$(($index+1))
>      prev_branches=$branches
>    done < $script_output

The topdown is a model specific feature and only be available for the
big core.

We need to check if topdown is supported before doing the test.

The same check in the test_topdown_groups() may be used here as well.

  if ! perf stat -e '{slots,topdown-retiring}' true > /dev/null 2>&1
  then
    echo "Topdown sampling read test [Skipped event parsing failed]"
    return
  fi

Thanks,
Kan

> +  if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null
> +  then
> +    echo "Leader sampling [Failed topdown events not reordered correctly]"
> +    err=1
> +    return
> +  fi
>    echo "Basic leader sampling test [Success]"
>  }
>  
> diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
> index 3f1e67795490..092a7a2abcf8 100755
> --- a/tools/perf/tests/shell/stat.sh
> +++ b/tools/perf/tests/shell/stat.sh
> @@ -79,6 +79,12 @@ test_topdown_groups() {
>      err=1
>      return
>    fi
> +  if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>"
> +  then
> +    echo "Topdown event group test [Failed raw format slots not reordered first]"
> +    err=1
> +    return
> +  fi
>    echo "Topdown event group test [Success]"
>  }
>  

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Patch v2 0/5] Bug fixes on topdown events reordering
@ 2024-07-08 14:41 Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check Dapeng Mi
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:41 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

Changes:
v1 -> v2"
  * Use event/umask code instead of event name to indentify if an event
    is a topdown slots/metric event (patch 1/5).
  * Add perf tests to validate topdown events reordering including raw
    format topdown events (patch 5/5).
  * Drop the v1 patch 3/4 which doesn't move slots event if no topdown
    metrics event in group.

Currently whether an event is a topdown slots/metric event is only
identified by comparing event name. It's inaccurate since topdown events
can be assigned by raw format and the event name is null in this case,
e.g.

perf stat -e '{instructions,cpu/r400/,cpu/r8300/}' sleep 1

 Performance counter stats for 'sleep 1':

     <not counted>      instructions
     <not counted>      cpu/r400/
   <not supported>      cpu/r8300/

       1.002917796 seconds time elapsed

       0.002955000 seconds user
       0.000000000 seconds sys

In this case slots and topdown-be-bound events are assigned by raw
format (slots:r400, topdown-be-bound:r8300) and they are not reordered
correctly.

The reason of dropping the patch "don't move slots event if no topdown
metric events in group" is that no any function issues but a warning is
introduced, and the cost of fixing this issue is expensive.

History:
  v1: https://lore.kernel.org/all/20240702224037.343958-1-dapeng1.mi@linux.intel.com/

Dapeng Mi (5):
  perf x86/topdown: Complete topdown slots/metrics events check
  perf x86/topdown: Correct leader selection with sample_read enabled
  perf x86/topdown: Don't move topdown metrics events when sorting
    events
  perf tests: Add leader sampling test in record tests
  perf tests: Add topdown events counting and sampling tests

 tools/perf/arch/x86/util/evlist.c  |  9 ++---
 tools/perf/arch/x86/util/evsel.c   |  3 +-
 tools/perf/arch/x86/util/topdown.c | 57 ++++++++++++++++++++++++++++--
 tools/perf/arch/x86/util/topdown.h |  2 ++
 tools/perf/tests/shell/record.sh   | 34 ++++++++++++++++++
 tools/perf/tests/shell/stat.sh     |  6 ++++
 6 files changed, 101 insertions(+), 10 deletions(-)


base-commit: 73e931504f8e0d42978bfcda37b323dbbd1afc08
-- 
2.40.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check
  2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
@ 2024-07-08 14:42 ` Dapeng Mi
  2024-07-08 13:28   ` Liang, Kan
  2024-07-08 14:42 ` [Patch v2 2/5] perf x86/topdown: Correct leader selection with sample_read enabled Dapeng Mi
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

It's not complete to check whether an event is a topdown slots or
topdown metrics event by only comparing the event name since user
may assign the event by RAW format, e.g.

perf stat -e '{instructions,cpu/r400/,cpu/r8300/}' sleep 1

 Performance counter stats for 'sleep 1':

     <not counted>      instructions
     <not counted>      cpu/r400/
   <not supported>      cpu/r8300/

       1.002917796 seconds time elapsed

       0.002955000 seconds user
       0.000000000 seconds sys

The RAW format slots and topdown-be-bound events are not recognized and
not regroup the events, and eventually cause error.

Thus add two helpers arch_is_topdown_slots()/arch_is_topdown_metrics()
to detect whether an event is topdown slots/metrics event by comparing
the event config directly, and use these two helpers to replace the
original event name comparisons.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/arch/x86/util/evlist.c  |  8 +++---
 tools/perf/arch/x86/util/evsel.c   |  3 ++-
 tools/perf/arch/x86/util/topdown.c | 43 +++++++++++++++++++++++++++++-
 tools/perf/arch/x86/util/topdown.h |  2 ++
 4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..332e8907f43e 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -78,14 +78,14 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
 	if (topdown_sys_has_perf_metrics() &&
 	    (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
 		/* Ensure the topdown slots comes first. */
-		if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots"))
+		if (arch_is_topdown_slots(lhs))
 			return -1;
-		if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots"))
+		if (arch_is_topdown_slots(rhs))
 			return 1;
 		/* Followed by topdown events. */
-		if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown"))
+		if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
 			return -1;
-		if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown"))
+		if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
 			return 1;
 	}
 
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 090d0f371891..181f2ba0bb2a 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -6,6 +6,7 @@
 #include "util/pmu.h"
 #include "util/pmus.h"
 #include "linux/string.h"
+#include "topdown.h"
 #include "evsel.h"
 #include "util/debug.h"
 #include "env.h"
@@ -44,7 +45,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel)
 	    strcasestr(evsel->name, "uops_retired.slots"))
 		return false;
 
-	return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots");
+	return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel);
 }
 
 int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 3f9a267d4501..e805065bb7e1 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -32,6 +32,47 @@ bool topdown_sys_has_perf_metrics(void)
 }
 
 #define TOPDOWN_SLOTS		0x0400
+bool arch_is_topdown_slots(const struct evsel *evsel)
+{
+	if (evsel->core.attr.config == TOPDOWN_SLOTS)
+		return true;
+
+	return false;
+}
+
+static int compare_topdown_event(void *vstate, struct pmu_event_info *info)
+{
+	int *config = vstate;
+	int event = 0;
+	int umask = 0;
+	char *str;
+
+	str = strcasestr(info->str, "event=");
+	if (str)
+		sscanf(str, "event=%x", &event);
+
+	str = strcasestr(info->str, "umask=");
+	if (str)
+		sscanf(str, "umask=%x", &umask);
+
+	if (strcasestr(info->name, "topdown") && event == 0 &&
+	    *config == (event | umask << 8))
+		return 1;
+
+	return 0;
+}
+
+bool arch_is_topdown_metrics(const struct evsel *evsel)
+{
+	struct perf_pmu *pmu = evsel__find_pmu(evsel);
+	int config = evsel->core.attr.config;
+
+	if (pmu && perf_pmu__for_each_event(pmu, false, &config,
+					    compare_topdown_event))
+		return true;
+
+	return false;
+}
 
 /*
  * Check whether a topdown group supports sample-read.
@@ -44,7 +85,7 @@ bool arch_topdown_sample_read(struct evsel *leader)
 	if (!evsel__sys_has_perf_metrics(leader))
 		return false;
 
-	if (leader->core.attr.config == TOPDOWN_SLOTS)
+	if (arch_is_topdown_slots(leader))
 		return true;
 
 	return false;
diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
index 46bf9273e572..1bae9b1822d7 100644
--- a/tools/perf/arch/x86/util/topdown.h
+++ b/tools/perf/arch/x86/util/topdown.h
@@ -3,5 +3,7 @@
 #define _TOPDOWN_H 1
 
 bool topdown_sys_has_perf_metrics(void);
+bool arch_is_topdown_slots(const struct evsel *evsel);
+bool arch_is_topdown_metrics(const struct evsel *evsel);
 
 #endif
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Patch v2 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
  2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check Dapeng Mi
@ 2024-07-08 14:42 ` Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events Dapeng Mi
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

Addresses an issue where, in the absence of a topdown metrics event
within a sampling group, the slots event was incorrectly bypassed as
the sampling leader when sample_read was enabled.

perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1

In this case, the slots event should be sampled as leader but the
branches event is sampled in fact like the verbose output shows.

perf_event_attr:
  type                             4 (cpu)
  size                             168
  config                           0x400 (slots)
  sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
  read_format                      ID|GROUP|LOST
  disabled                         1
  sample_id_all                    1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             168
  config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
  { sample_period, sample_freq }   10000
  sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
  read_format                      ID|GROUP|LOST
  sample_id_all                    1
  exclude_guest                    1

The sample period of slots event instead of branches event is reset to
0.

This fix ensures the slots event remains the leader under these
conditions.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index e805065bb7e1..b9210f6486fd 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "api/fs/fs.h"
 #include "util/evsel.h"
+#include "util/evlist.h"
 #include "util/pmu.h"
 #include "util/pmus.h"
 #include "util/topdown.h"
@@ -82,11 +83,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
  */
 bool arch_topdown_sample_read(struct evsel *leader)
 {
+	struct evsel *evsel;
+
 	if (!evsel__sys_has_perf_metrics(leader))
 		return false;
 
-	if (arch_is_topdown_slots(leader))
-		return true;
+	if (!arch_is_topdown_slots(leader))
+		return false;
+
+	/*
+         * If slots event as leader event but no topdown metric events
+         * in group, slots event should still sample as leader.
+         */
+	evlist__for_each_entry(leader->evlist, evsel) {
+		if (evsel != leader && arch_is_topdown_metrics(evsel))
+			return true;
+	}
 
 	return false;
 }
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 2/5] perf x86/topdown: Correct leader selection with sample_read enabled Dapeng Mi
@ 2024-07-08 14:42 ` Dapeng Mi
  2024-07-08 15:08   ` Ian Rogers
  2024-07-08 14:42 ` [Patch v2 4/5] perf tests: Add leader sampling test in record tests Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests Dapeng Mi
  4 siblings, 1 reply; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

when running below perf command, we say error is reported.

perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1

------------------------------------------------------------
perf_event_attr:
  type                             4 (cpu)
  size                             168
  config                           0x400 (slots)
  sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
  read_format                      ID|GROUP|LOST
  disabled                         1
  sample_id_all                    1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
------------------------------------------------------------
perf_event_attr:
  type                             4 (cpu)
  size                             168
  config                           0x8000 (topdown-retiring)
  { sample_period, sample_freq }   4000
  sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
  read_format                      ID|GROUP|LOST
  freq                             1
  sample_id_all                    1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
sys_perf_event_open failed, error -22

Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).

The reason of error is that the events are regrouped and
topdown-retiring event is moved to closely after the slots event and
topdown-retiring event needs to do the sampling, but Intel PMU driver
doesn't support to sample topdown metrics events.

For topdown metrics events, it just requires to be in a group which has
slots event as leader. It doesn't require topdown metrics event must be
closely after slots event. Thus it's a overkill to move topdown metrics
event closely after slots event in events regrouping and furtherly cause
the above issue.

Thus delete the code that moving topdown metrics events to fix the
issue.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/arch/x86/util/evlist.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 332e8907f43e..6046981d61cf 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
 			return -1;
 		if (arch_is_topdown_slots(rhs))
 			return 1;
-		/* Followed by topdown events. */
-		if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
-			return -1;
-		if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
-			return 1;
 	}
 
 	/* Default ordering by insertion index. */
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Patch v2 4/5] perf tests: Add leader sampling test in record tests
  2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
                   ` (2 preceding siblings ...)
  2024-07-08 14:42 ` [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events Dapeng Mi
@ 2024-07-08 14:42 ` Dapeng Mi
  2024-07-08 14:42 ` [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests Dapeng Mi
  4 siblings, 0 replies; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

Add leader sampling test to validate event counts are captured into
record and the count value is consistent.

Suggested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/tests/shell/record.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 3d1a7759a7b2..8e3e66780fed 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -17,6 +17,7 @@ skip_test_missing_symbol ${testsym}
 
 err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script)
 testprog="perf test -w thloop"
 cpu_pmu_dir="/sys/bus/event_source/devices/cpu*"
 br_cntr_file="/caps/branch_counter_nr"
@@ -190,11 +191,38 @@ test_branch_counter() {
   echo "Basic branch counter test [Success]"
 }
 
+test_leader_sampling() {
+  echo "Basic leader sampling test"
+  if ! perf record -o "${perfdata}" -e "{branches,branches}:Su" perf test -w brstack 2> /dev/null
+  then
+    echo "Leader sampling [Failed record]"
+    err=1
+    return
+  fi
+  index=0
+  perf script -i "${perfdata}" > $script_output
+  while IFS= read -r line
+  do
+    # Check if the two branches counts are equal in each record
+    branches=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="branches:") print $(i-1)}')
+    if [ $(($index%2)) -ne 0 ] && [ ${branches}x != ${prev_branches}x ]
+    then
+      echo "Leader sampling [Failed inconsistent branches count]"
+      err=1
+      return
+    fi
+    index=$(($index+1))
+    prev_branches=$branches
+  done < $script_output
+  echo "Basic leader sampling test [Success]"
+}
+
 test_per_thread
 test_register_capture
 test_system_wide
 test_workload
 test_branch_counter
+test_leader_sampling
 
 cleanup
 exit $err
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests
  2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
                   ` (3 preceding siblings ...)
  2024-07-08 14:42 ` [Patch v2 4/5] perf tests: Add leader sampling test in record tests Dapeng Mi
@ 2024-07-08 14:42 ` Dapeng Mi
  2024-07-08 13:40   ` Liang, Kan
  4 siblings, 1 reply; 16+ messages in thread
From: Dapeng Mi @ 2024-07-08 14:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin,
	Kan Liang
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi, Dapeng Mi

Add counting and leader sampling tests to verify topdown events including
raw format can be reordered correctly.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/tests/shell/record.sh | 6 ++++++
 tools/perf/tests/shell/stat.sh   | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 8e3e66780fed..164f0cf9648e 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -214,6 +214,12 @@ test_leader_sampling() {
     index=$(($index+1))
     prev_branches=$branches
   done < $script_output
+  if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null
+  then
+    echo "Leader sampling [Failed topdown events not reordered correctly]"
+    err=1
+    return
+  fi
   echo "Basic leader sampling test [Success]"
 }
 
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 3f1e67795490..092a7a2abcf8 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -79,6 +79,12 @@ test_topdown_groups() {
     err=1
     return
   fi
+  if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>"
+  then
+    echo "Topdown event group test [Failed raw format slots not reordered first]"
+    err=1
+    return
+  fi
   echo "Topdown event group test [Success]"
 }
 
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-08 14:42 ` [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events Dapeng Mi
@ 2024-07-08 15:08   ` Ian Rogers
  2024-07-09  4:17     ` Mi, Dapeng
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Rogers @ 2024-07-08 15:08 UTC (permalink / raw)
  To: Dapeng Mi
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi

On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>
> when running below perf command, we say error is reported.
>
> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
>
> ------------------------------------------------------------
> perf_event_attr:
>   type                             4 (cpu)
>   size                             168
>   config                           0x400 (slots)
>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   disabled                         1
>   sample_id_all                    1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>   type                             4 (cpu)
>   size                             168
>   config                           0x8000 (topdown-retiring)
>   { sample_period, sample_freq }   4000
>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   freq                             1
>   sample_id_all                    1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
> sys_perf_event_open failed, error -22
>
> Error:
> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
>
> The reason of error is that the events are regrouped and
> topdown-retiring event is moved to closely after the slots event and
> topdown-retiring event needs to do the sampling, but Intel PMU driver
> doesn't support to sample topdown metrics events.
>
> For topdown metrics events, it just requires to be in a group which has
> slots event as leader. It doesn't require topdown metrics event must be
> closely after slots event. Thus it's a overkill to move topdown metrics
> event closely after slots event in events regrouping and furtherly cause
> the above issue.
>
> Thus delete the code that moving topdown metrics events to fix the
> issue.

I think this is wrong. The topdown events may not be in a group, such
cases can come from metrics due to grouping constraints, and so they
must be sorted together so that they may be gathered into a group to
avoid the perf event opens failing for ungrouped topdown events. I'm
not understanding what these patches are trying to do, if you want to
prioritize the event for leader sampling why not modify it to compare
first?

Thanks,
Ian

> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  tools/perf/arch/x86/util/evlist.c | 5 -----
>  1 file changed, 5 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index 332e8907f43e..6046981d61cf 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>                         return -1;
>                 if (arch_is_topdown_slots(rhs))
>                         return 1;
> -               /* Followed by topdown events. */
> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
> -                       return -1;
> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
> -                       return 1;
>         }
>
>         /* Default ordering by insertion index. */
> --
> 2.40.1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check
  2024-07-08 13:28   ` Liang, Kan
@ 2024-07-09  1:58     ` Mi, Dapeng
  0 siblings, 0 replies; 16+ messages in thread
From: Mi, Dapeng @ 2024-07-09  1:58 UTC (permalink / raw)
  To: Liang, Kan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi


On 7/8/2024 9:28 PM, Liang, Kan wrote:
>
> On 2024-07-08 10:42 a.m., Dapeng Mi wrote:
>> It's not complete to check whether an event is a topdown slots or
>> topdown metrics event by only comparing the event name since user
>> may assign the event by RAW format, e.g.
>>
>> perf stat -e '{instructions,cpu/r400/,cpu/r8300/}' sleep 1
>>
>>  Performance counter stats for 'sleep 1':
>>
>>      <not counted>      instructions
>>      <not counted>      cpu/r400/
>>    <not supported>      cpu/r8300/
>>
>>        1.002917796 seconds time elapsed
>>
>>        0.002955000 seconds user
>>        0.000000000 seconds sys
>>
>> The RAW format slots and topdown-be-bound events are not recognized and
>> not regroup the events, and eventually cause error.
>>
>> Thus add two helpers arch_is_topdown_slots()/arch_is_topdown_metrics()
>> to detect whether an event is topdown slots/metrics event by comparing
>> the event config directly, and use these two helpers to replace the
>> original event name comparisons.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  tools/perf/arch/x86/util/evlist.c  |  8 +++---
>>  tools/perf/arch/x86/util/evsel.c   |  3 ++-
>>  tools/perf/arch/x86/util/topdown.c | 43 +++++++++++++++++++++++++++++-
>>  tools/perf/arch/x86/util/topdown.h |  2 ++
>>  4 files changed, 50 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
>> index b1ce0c52d88d..332e8907f43e 100644
>> --- a/tools/perf/arch/x86/util/evlist.c
>> +++ b/tools/perf/arch/x86/util/evlist.c
>> @@ -78,14 +78,14 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>>  	if (topdown_sys_has_perf_metrics() &&
>>  	    (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
>>  		/* Ensure the topdown slots comes first. */
>> -		if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots"))
>> +		if (arch_is_topdown_slots(lhs))
>>  			return -1;
>> -		if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots"))
>> +		if (arch_is_topdown_slots(rhs))
>>  			return 1;
>>  		/* Followed by topdown events. */
>> -		if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown"))
>> +		if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
>>  			return -1;
>> -		if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown"))
>> +		if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
>>  			return 1;
>>  	}
>>  
>> diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
>> index 090d0f371891..181f2ba0bb2a 100644
>> --- a/tools/perf/arch/x86/util/evsel.c
>> +++ b/tools/perf/arch/x86/util/evsel.c
>> @@ -6,6 +6,7 @@
>>  #include "util/pmu.h"
>>  #include "util/pmus.h"
>>  #include "linux/string.h"
>> +#include "topdown.h"
>>  #include "evsel.h"
>>  #include "util/debug.h"
>>  #include "env.h"
>> @@ -44,7 +45,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel)
>>  	    strcasestr(evsel->name, "uops_retired.slots"))
>>  		return false;
>>  
>> -	return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots");
>> +	return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel);
>>  }
>>  
>>  int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
>> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
>> index 3f9a267d4501..e805065bb7e1 100644
>> --- a/tools/perf/arch/x86/util/topdown.c
>> +++ b/tools/perf/arch/x86/util/topdown.c
>> @@ -32,6 +32,47 @@ bool topdown_sys_has_perf_metrics(void)
>>  }
>>  
>>  #define TOPDOWN_SLOTS		0x0400
>> +bool arch_is_topdown_slots(const struct evsel *evsel)
>> +{
>> +	if (evsel->core.attr.config == TOPDOWN_SLOTS)
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> +static int compare_topdown_event(void *vstate, struct pmu_event_info *info)
>> +{
>> +	int *config = vstate;
>> +	int event = 0;
>> +	int umask = 0;
>> +	char *str;
>> +
> The compare is only needed for the "topdown" event.
> Check the name first before the sscanf and compare.
>
> 	if (!strcasestr(info->name, "topdown"))
> 		return 0;

Yes, thanks.


>> +	str = strcasestr(info->str, "event=");
>> +	if (str)
>> +		sscanf(str, "event=%x", &event);
>> +
>> +	str = strcasestr(info->str, "umask=");
>> +	if (str)
>> +		sscanf(str, "umask=%x", &umask);
>> +
>> +	if (strcasestr(info->name, "topdown") && event == 0 &&
>> +	    *config == (event | umask << 8))
>> +		return 1;
>> +
>> +	return 0;
>> +}
>> +
>> +bool arch_is_topdown_metrics(const struct evsel *evsel)
>> +{
>> +	struct perf_pmu *pmu = evsel__find_pmu(evsel);
>> +	int config = evsel->core.attr.config;
>> +
> The topdown events are only available for the core PMU.
> You may want to return earlier for the !core PMUs.
>
> 	if (!pmu || !pmu->is_core)
> 		return false;

Sure. Thanks.


>
> Thanks,
> Kan
>> +	if (pmu && perf_pmu__for_each_event(pmu, false, &config,
>> +					    compare_topdown_event))
>> +		return true;
>> +
>> +	return false;
>> +}
>>  
>>  /*
>>   * Check whether a topdown group supports sample-read.
>> @@ -44,7 +85,7 @@ bool arch_topdown_sample_read(struct evsel *leader)
>>  	if (!evsel__sys_has_perf_metrics(leader))
>>  		return false;
>>  
>> -	if (leader->core.attr.config == TOPDOWN_SLOTS)
>> +	if (arch_is_topdown_slots(leader))
>>  		return true;
>>  
>>  	return false;
>> diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
>> index 46bf9273e572..1bae9b1822d7 100644
>> --- a/tools/perf/arch/x86/util/topdown.h
>> +++ b/tools/perf/arch/x86/util/topdown.h
>> @@ -3,5 +3,7 @@
>>  #define _TOPDOWN_H 1
>>  
>>  bool topdown_sys_has_perf_metrics(void);
>> +bool arch_is_topdown_slots(const struct evsel *evsel);
>> +bool arch_is_topdown_metrics(const struct evsel *evsel);
>>  
>>  #endif

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-08 15:08   ` Ian Rogers
@ 2024-07-09  4:17     ` Mi, Dapeng
  2024-07-09 22:37       ` Ian Rogers
  0 siblings, 1 reply; 16+ messages in thread
From: Mi, Dapeng @ 2024-07-09  4:17 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi


On 7/8/2024 11:08 PM, Ian Rogers wrote:
> On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>> when running below perf command, we say error is reported.
>>
>> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
>>
>> ------------------------------------------------------------
>> perf_event_attr:
>>   type                             4 (cpu)
>>   size                             168
>>   config                           0x400 (slots)
>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   disabled                         1
>>   sample_id_all                    1
>>   exclude_guest                    1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
>> ------------------------------------------------------------
>> perf_event_attr:
>>   type                             4 (cpu)
>>   size                             168
>>   config                           0x8000 (topdown-retiring)
>>   { sample_period, sample_freq }   4000
>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   freq                             1
>>   sample_id_all                    1
>>   exclude_guest                    1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
>> sys_perf_event_open failed, error -22
>>
>> Error:
>> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
>>
>> The reason of error is that the events are regrouped and
>> topdown-retiring event is moved to closely after the slots event and
>> topdown-retiring event needs to do the sampling, but Intel PMU driver
>> doesn't support to sample topdown metrics events.
>>
>> For topdown metrics events, it just requires to be in a group which has
>> slots event as leader. It doesn't require topdown metrics event must be
>> closely after slots event. Thus it's a overkill to move topdown metrics
>> event closely after slots event in events regrouping and furtherly cause
>> the above issue.
>>
>> Thus delete the code that moving topdown metrics events to fix the
>> issue.
> I think this is wrong. The topdown events may not be in a group, such
> cases can come from metrics due to grouping constraints, and so they
> must be sorted together so that they may be gathered into a group to
> avoid the perf event opens failing for ungrouped topdown events. I'm
> not understanding what these patches are trying to do, if you want to
> prioritize the event for leader sampling why not modify it to compare

Per my understanding, this change doesn't break anything. The events
regrouping can be divided into below several cases.

a. all events in a group

perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
WARNING: events were regrouped to match PMUs

 Performance counter stats for 'CPU(s) 0':

        15,066,240      slots
         1,899,760      instructions
         2,126,998      topdown-retiring

       1.045783464 seconds time elapsed

In this case, slots event would be adjusted as the leader event and all
events are still in same group.

b. all events not in a group

perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
WARNING: events were regrouped to match PMUs

 Performance counter stats for 'CPU(s) 0':

         2,045,561      instructions
        17,108,370      slots
         2,281,116      topdown-retiring

       1.045639284 seconds time elapsed

In this case, slots and topdown-retiring are placed into a group and slots
is the group leader. instructions event is outside the group.

c. slots event in group but topdown metric events outside the group

perf stat -e "{instructions,slots},topdown-retiring"  -C0 sleep 1
WARNING: events were regrouped to match PMUs

 Performance counter stats for 'CPU(s) 0':

        20,323,878      slots
         2,634,884      instructions
         3,028,656      topdown-retiring

       1.045076380 seconds time elapsed

In this case, topdown-retiring event is placed into previous group and
slots is adjusted to leader event.

d. multiple event groups

perf stat -e "{instructions,slots},{topdown-retiring}"  -C0 sleep 1
WARNING: events were regrouped to match PMUs

 Performance counter stats for 'CPU(s) 0':

        26,319,024      slots
         2,427,791      instructions
         2,683,508      topdown-retiring

       1.045495830 seconds time elapsed

In this case, the two groups are merged to one group and slots event is
adjusted as leader.

The key point of this patch is that it's unnecessary to move topdown
metrics events closely after slots event. It's a overkill since Intel core
PMU driver doesn't require that. Intel PMU driver just requires topdown
metrics events are in a group where slots event is the group leader, and
worse the movement for topdown metrics events causes the issue in the
commit message mentioned.

This patch doesn't block to regroup topdown metrics event. It just removes
the unnecessary movement for topdown metrics events.


> first?
>
> Thanks,
> Ian
>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  tools/perf/arch/x86/util/evlist.c | 5 -----
>>  1 file changed, 5 deletions(-)
>>
>> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
>> index 332e8907f43e..6046981d61cf 100644
>> --- a/tools/perf/arch/x86/util/evlist.c
>> +++ b/tools/perf/arch/x86/util/evlist.c
>> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>>                         return -1;
>>                 if (arch_is_topdown_slots(rhs))
>>                         return 1;
>> -               /* Followed by topdown events. */
>> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
>> -                       return -1;
>> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
>> -                       return 1;
>>         }
>>
>>         /* Default ordering by insertion index. */
>> --
>> 2.40.1
>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests
  2024-07-08 13:40   ` Liang, Kan
@ 2024-07-09  5:27     ` Mi, Dapeng
  0 siblings, 0 replies; 16+ messages in thread
From: Mi, Dapeng @ 2024-07-09  5:27 UTC (permalink / raw)
  To: Liang, Kan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Ian Rogers, Adrian Hunter, Alexander Shishkin
  Cc: linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi


On 7/8/2024 9:40 PM, Liang, Kan wrote:
>
> On 2024-07-08 10:42 a.m., Dapeng Mi wrote:
>> Add counting and leader sampling tests to verify topdown events including
>> raw format can be reordered correctly.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  tools/perf/tests/shell/record.sh | 6 ++++++
>>  tools/perf/tests/shell/stat.sh   | 6 ++++++
>>  2 files changed, 12 insertions(+)
>>
>> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
>> index 8e3e66780fed..164f0cf9648e 100755
>> --- a/tools/perf/tests/shell/record.sh
>> +++ b/tools/perf/tests/shell/record.sh
>> @@ -214,6 +214,12 @@ test_leader_sampling() {
>>      index=$(($index+1))
>>      prev_branches=$branches
>>    done < $script_output
> The topdown is a model specific feature and only be available for the
> big core.
>
> We need to check if topdown is supported before doing the test.
>
> The same check in the test_topdown_groups() may be used here as well.
>
>   if ! perf stat -e '{slots,topdown-retiring}' true > /dev/null 2>&1
>   then
>     echo "Topdown sampling read test [Skipped event parsing failed]"
>     return
>   fi

Sure. Thanks.

>
> Thanks,
> Kan
>
>> +  if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null
>> +  then
>> +    echo "Leader sampling [Failed topdown events not reordered correctly]"
>> +    err=1
>> +    return
>> +  fi
>>    echo "Basic leader sampling test [Success]"
>>  }
>>  
>> diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
>> index 3f1e67795490..092a7a2abcf8 100755
>> --- a/tools/perf/tests/shell/stat.sh
>> +++ b/tools/perf/tests/shell/stat.sh
>> @@ -79,6 +79,12 @@ test_topdown_groups() {
>>      err=1
>>      return
>>    fi
>> +  if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>"
>> +  then
>> +    echo "Topdown event group test [Failed raw format slots not reordered first]"
>> +    err=1
>> +    return
>> +  fi
>>    echo "Topdown event group test [Success]"
>>  }
>>  

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-09  4:17     ` Mi, Dapeng
@ 2024-07-09 22:37       ` Ian Rogers
  2024-07-10  9:40         ` Mi, Dapeng
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Rogers @ 2024-07-09 22:37 UTC (permalink / raw)
  To: Mi, Dapeng
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi

On Mon, Jul 8, 2024 at 9:18 PM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 7/8/2024 11:08 PM, Ian Rogers wrote:
> > On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
> >> when running below perf command, we say error is reported.
> >>
> >> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
> >>
> >> ------------------------------------------------------------
> >> perf_event_attr:
> >>   type                             4 (cpu)
> >>   size                             168
> >>   config                           0x400 (slots)
> >>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
> >>   read_format                      ID|GROUP|LOST
> >>   disabled                         1
> >>   sample_id_all                    1
> >>   exclude_guest                    1
> >> ------------------------------------------------------------
> >> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
> >> ------------------------------------------------------------
> >> perf_event_attr:
> >>   type                             4 (cpu)
> >>   size                             168
> >>   config                           0x8000 (topdown-retiring)
> >>   { sample_period, sample_freq }   4000
> >>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
> >>   read_format                      ID|GROUP|LOST
> >>   freq                             1
> >>   sample_id_all                    1
> >>   exclude_guest                    1
> >> ------------------------------------------------------------
> >> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
> >> sys_perf_event_open failed, error -22
> >>
> >> Error:
> >> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
> >>
> >> The reason of error is that the events are regrouped and
> >> topdown-retiring event is moved to closely after the slots event and
> >> topdown-retiring event needs to do the sampling, but Intel PMU driver
> >> doesn't support to sample topdown metrics events.
> >>
> >> For topdown metrics events, it just requires to be in a group which has
> >> slots event as leader. It doesn't require topdown metrics event must be
> >> closely after slots event. Thus it's a overkill to move topdown metrics
> >> event closely after slots event in events regrouping and furtherly cause
> >> the above issue.
> >>
> >> Thus delete the code that moving topdown metrics events to fix the
> >> issue.
> > I think this is wrong. The topdown events may not be in a group, such
> > cases can come from metrics due to grouping constraints, and so they
> > must be sorted together so that they may be gathered into a group to
> > avoid the perf event opens failing for ungrouped topdown events. I'm
> > not understanding what these patches are trying to do, if you want to
> > prioritize the event for leader sampling why not modify it to compare
>
> Per my understanding, this change doesn't break anything. The events
> regrouping can be divided into below several cases.
>
> a. all events in a group
>
> perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
> WARNING: events were regrouped to match PMUs
>
>  Performance counter stats for 'CPU(s) 0':
>
>         15,066,240      slots
>          1,899,760      instructions
>          2,126,998      topdown-retiring
>
>        1.045783464 seconds time elapsed
>
> In this case, slots event would be adjusted as the leader event and all
> events are still in same group.
>
> b. all events not in a group
>
> perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
> WARNING: events were regrouped to match PMUs
>
>  Performance counter stats for 'CPU(s) 0':
>
>          2,045,561      instructions
>         17,108,370      slots
>          2,281,116      topdown-retiring
>
>        1.045639284 seconds time elapsed
>
> In this case, slots and topdown-retiring are placed into a group and slots
> is the group leader. instructions event is outside the group.
>
> c. slots event in group but topdown metric events outside the group
>
> perf stat -e "{instructions,slots},topdown-retiring"  -C0 sleep 1
> WARNING: events were regrouped to match PMUs
>
>  Performance counter stats for 'CPU(s) 0':
>
>         20,323,878      slots
>          2,634,884      instructions
>          3,028,656      topdown-retiring
>
>        1.045076380 seconds time elapsed
>
> In this case, topdown-retiring event is placed into previous group and
> slots is adjusted to leader event.
>
> d. multiple event groups
>
> perf stat -e "{instructions,slots},{topdown-retiring}"  -C0 sleep 1
> WARNING: events were regrouped to match PMUs
>
>  Performance counter stats for 'CPU(s) 0':
>
>         26,319,024      slots
>          2,427,791      instructions
>          2,683,508      topdown-retiring
>
>        1.045495830 seconds time elapsed
>
> In this case, the two groups are merged to one group and slots event is
> adjusted as leader.
>
> The key point of this patch is that it's unnecessary to move topdown
> metrics events closely after slots event. It's a overkill since Intel core
> PMU driver doesn't require that. Intel PMU driver just requires topdown
> metrics events are in a group where slots event is the group leader, and
> worse the movement for topdown metrics events causes the issue in the
> commit message mentioned.
>
> This patch doesn't block to regroup topdown metrics event. It just removes
> the unnecessary movement for topdown metrics events.

But you will get the same behavior because of the non-arch dependent
force group index - I guess you don't care as the sample read only
happens when you have a group.

I'm thinking of cases like (which admittedly is broken):
```
$ perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
[sudo] password for irogers:

Performance counter stats for 'system wide':

    2,589,345,900      slots
      852,492,838      instructions
      583,525,372      cycles
  <not supported>      topdown-fe-bound

      0.103930790 seconds time elapsed
```
As the slots event is grouped there's no force group index on it, we
want to shuffle the topdown-fe-bound into the group so we want it to
compare as less than cycles - ie we're comparing topdown events with
non topdown events and trying to shuffle the topdown events first.

Thanks,
Ian



>
> > first?
> >
> > Thanks,
> > Ian
> >
> >> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> >> ---
> >>  tools/perf/arch/x86/util/evlist.c | 5 -----
> >>  1 file changed, 5 deletions(-)
> >>
> >> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> >> index 332e8907f43e..6046981d61cf 100644
> >> --- a/tools/perf/arch/x86/util/evlist.c
> >> +++ b/tools/perf/arch/x86/util/evlist.c
> >> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> >>                         return -1;
> >>                 if (arch_is_topdown_slots(rhs))
> >>                         return 1;
> >> -               /* Followed by topdown events. */
> >> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
> >> -                       return -1;
> >> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
> >> -                       return 1;
> >>         }
> >>
> >>         /* Default ordering by insertion index. */
> >> --
> >> 2.40.1
> >>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-09 22:37       ` Ian Rogers
@ 2024-07-10  9:40         ` Mi, Dapeng
  2024-07-10 15:07           ` Ian Rogers
  0 siblings, 1 reply; 16+ messages in thread
From: Mi, Dapeng @ 2024-07-10  9:40 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi


On 7/10/2024 6:37 AM, Ian Rogers wrote:
> On Mon, Jul 8, 2024 at 9:18 PM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>>
>> On 7/8/2024 11:08 PM, Ian Rogers wrote:
>>> On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>>>> when running below perf command, we say error is reported.
>>>>
>>>> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
>>>>
>>>> ------------------------------------------------------------
>>>> perf_event_attr:
>>>>   type                             4 (cpu)
>>>>   size                             168
>>>>   config                           0x400 (slots)
>>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>>>   read_format                      ID|GROUP|LOST
>>>>   disabled                         1
>>>>   sample_id_all                    1
>>>>   exclude_guest                    1
>>>> ------------------------------------------------------------
>>>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
>>>> ------------------------------------------------------------
>>>> perf_event_attr:
>>>>   type                             4 (cpu)
>>>>   size                             168
>>>>   config                           0x8000 (topdown-retiring)
>>>>   { sample_period, sample_freq }   4000
>>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>>>   read_format                      ID|GROUP|LOST
>>>>   freq                             1
>>>>   sample_id_all                    1
>>>>   exclude_guest                    1
>>>> ------------------------------------------------------------
>>>> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
>>>> sys_perf_event_open failed, error -22
>>>>
>>>> Error:
>>>> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
>>>>
>>>> The reason of error is that the events are regrouped and
>>>> topdown-retiring event is moved to closely after the slots event and
>>>> topdown-retiring event needs to do the sampling, but Intel PMU driver
>>>> doesn't support to sample topdown metrics events.
>>>>
>>>> For topdown metrics events, it just requires to be in a group which has
>>>> slots event as leader. It doesn't require topdown metrics event must be
>>>> closely after slots event. Thus it's a overkill to move topdown metrics
>>>> event closely after slots event in events regrouping and furtherly cause
>>>> the above issue.
>>>>
>>>> Thus delete the code that moving topdown metrics events to fix the
>>>> issue.
>>> I think this is wrong. The topdown events may not be in a group, such
>>> cases can come from metrics due to grouping constraints, and so they
>>> must be sorted together so that they may be gathered into a group to
>>> avoid the perf event opens failing for ungrouped topdown events. I'm
>>> not understanding what these patches are trying to do, if you want to
>>> prioritize the event for leader sampling why not modify it to compare
>> Per my understanding, this change doesn't break anything. The events
>> regrouping can be divided into below several cases.
>>
>> a. all events in a group
>>
>> perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
>> WARNING: events were regrouped to match PMUs
>>
>>  Performance counter stats for 'CPU(s) 0':
>>
>>         15,066,240      slots
>>          1,899,760      instructions
>>          2,126,998      topdown-retiring
>>
>>        1.045783464 seconds time elapsed
>>
>> In this case, slots event would be adjusted as the leader event and all
>> events are still in same group.
>>
>> b. all events not in a group
>>
>> perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
>> WARNING: events were regrouped to match PMUs
>>
>>  Performance counter stats for 'CPU(s) 0':
>>
>>          2,045,561      instructions
>>         17,108,370      slots
>>          2,281,116      topdown-retiring
>>
>>        1.045639284 seconds time elapsed
>>
>> In this case, slots and topdown-retiring are placed into a group and slots
>> is the group leader. instructions event is outside the group.
>>
>> c. slots event in group but topdown metric events outside the group
>>
>> perf stat -e "{instructions,slots},topdown-retiring"  -C0 sleep 1
>> WARNING: events were regrouped to match PMUs
>>
>>  Performance counter stats for 'CPU(s) 0':
>>
>>         20,323,878      slots
>>          2,634,884      instructions
>>          3,028,656      topdown-retiring
>>
>>        1.045076380 seconds time elapsed
>>
>> In this case, topdown-retiring event is placed into previous group and
>> slots is adjusted to leader event.
>>
>> d. multiple event groups
>>
>> perf stat -e "{instructions,slots},{topdown-retiring}"  -C0 sleep 1
>> WARNING: events were regrouped to match PMUs
>>
>>  Performance counter stats for 'CPU(s) 0':
>>
>>         26,319,024      slots
>>          2,427,791      instructions
>>          2,683,508      topdown-retiring
>>
>>        1.045495830 seconds time elapsed
>>
>> In this case, the two groups are merged to one group and slots event is
>> adjusted as leader.
>>
>> The key point of this patch is that it's unnecessary to move topdown
>> metrics events closely after slots event. It's a overkill since Intel core
>> PMU driver doesn't require that. Intel PMU driver just requires topdown
>> metrics events are in a group where slots event is the group leader, and
>> worse the movement for topdown metrics events causes the issue in the
>> commit message mentioned.
>>
>> This patch doesn't block to regroup topdown metrics event. It just removes
>> the unnecessary movement for topdown metrics events.
> But you will get the same behavior because of the non-arch dependent
> force group index - I guess you don't care as the sample read only
> happens when you have a group.
>
> I'm thinking of cases like (which admittedly is broken):
> ```
> $ perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
> [sudo] password for irogers:
>
> Performance counter stats for 'system wide':
>
>     2,589,345,900      slots
>       852,492,838      instructions
>       583,525,372      cycles
>   <not supported>      topdown-fe-bound
>
>       0.103930790 seconds time elapsed
> ```

I run the upstream code (commit 73e931504f8e0d42978bfcda37b323dbbd1afc08)
without this patchset, I see same issue.

perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1

 Performance counter stats for 'system wide':

       262,448,922      slots
        29,630,373      instructions
        43,891,902      cycles
   <not supported>      topdown-fe-bound

       0.150369560 seconds time elapsed

#perf -v
perf version 6.10.rc6.g73e931504f8e

This issue is not caused by this patchset.

> As the slots event is grouped there's no force group index on it, we
> want to shuffle the topdown-fe-bound into the group so we want it to
> compare as less than cycles - ie we're comparing topdown events with
> non topdown events and trying to shuffle the topdown events first.

Current evlist__cmp() won't really swap the order of cycles and
topdown-fe-bound.

if (lhs_sort_idx != rhs_sort_idx)
        return lhs_sort_idx - rhs_sort_idx;

When comparing cycles and topdown-fe-bound events, lhs_sort_idx is 2 and
rhs_sort_idx is 3, so the swap won't happen.

So the event sequence after sorting is still "slots, instructions ,cycles,
topdown-fe-bound". Both cycles and topdown-fe-bound events won't be placed
into the group.


>
> Thanks,
> Ian
>
>
>
>>> first?
>>>
>>> Thanks,
>>> Ian
>>>
>>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>>> ---
>>>>  tools/perf/arch/x86/util/evlist.c | 5 -----
>>>>  1 file changed, 5 deletions(-)
>>>>
>>>> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
>>>> index 332e8907f43e..6046981d61cf 100644
>>>> --- a/tools/perf/arch/x86/util/evlist.c
>>>> +++ b/tools/perf/arch/x86/util/evlist.c
>>>> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>>>>                         return -1;
>>>>                 if (arch_is_topdown_slots(rhs))
>>>>                         return 1;
>>>> -               /* Followed by topdown events. */
>>>> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
>>>> -                       return -1;
>>>> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
>>>> -                       return 1;
>>>>         }
>>>>
>>>>         /* Default ordering by insertion index. */
>>>> --
>>>> 2.40.1
>>>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-10  9:40         ` Mi, Dapeng
@ 2024-07-10 15:07           ` Ian Rogers
  2024-07-11  4:48             ` Mi, Dapeng
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Rogers @ 2024-07-10 15:07 UTC (permalink / raw)
  To: Mi, Dapeng
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi

On Wed, Jul 10, 2024 at 2:40 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 7/10/2024 6:37 AM, Ian Rogers wrote:
> > On Mon, Jul 8, 2024 at 9:18 PM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
> >>
> >> On 7/8/2024 11:08 PM, Ian Rogers wrote:
> >>> On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
> >>>> when running below perf command, we say error is reported.
> >>>>
> >>>> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
> >>>>
> >>>> ------------------------------------------------------------
> >>>> perf_event_attr:
> >>>>   type                             4 (cpu)
> >>>>   size                             168
> >>>>   config                           0x400 (slots)
> >>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
> >>>>   read_format                      ID|GROUP|LOST
> >>>>   disabled                         1
> >>>>   sample_id_all                    1
> >>>>   exclude_guest                    1
> >>>> ------------------------------------------------------------
> >>>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
> >>>> ------------------------------------------------------------
> >>>> perf_event_attr:
> >>>>   type                             4 (cpu)
> >>>>   size                             168
> >>>>   config                           0x8000 (topdown-retiring)
> >>>>   { sample_period, sample_freq }   4000
> >>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
> >>>>   read_format                      ID|GROUP|LOST
> >>>>   freq                             1
> >>>>   sample_id_all                    1
> >>>>   exclude_guest                    1
> >>>> ------------------------------------------------------------
> >>>> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
> >>>> sys_perf_event_open failed, error -22
> >>>>
> >>>> Error:
> >>>> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
> >>>>
> >>>> The reason of error is that the events are regrouped and
> >>>> topdown-retiring event is moved to closely after the slots event and
> >>>> topdown-retiring event needs to do the sampling, but Intel PMU driver
> >>>> doesn't support to sample topdown metrics events.
> >>>>
> >>>> For topdown metrics events, it just requires to be in a group which has
> >>>> slots event as leader. It doesn't require topdown metrics event must be
> >>>> closely after slots event. Thus it's a overkill to move topdown metrics
> >>>> event closely after slots event in events regrouping and furtherly cause
> >>>> the above issue.
> >>>>
> >>>> Thus delete the code that moving topdown metrics events to fix the
> >>>> issue.
> >>> I think this is wrong. The topdown events may not be in a group, such
> >>> cases can come from metrics due to grouping constraints, and so they
> >>> must be sorted together so that they may be gathered into a group to
> >>> avoid the perf event opens failing for ungrouped topdown events. I'm
> >>> not understanding what these patches are trying to do, if you want to
> >>> prioritize the event for leader sampling why not modify it to compare
> >> Per my understanding, this change doesn't break anything. The events
> >> regrouping can be divided into below several cases.
> >>
> >> a. all events in a group
> >>
> >> perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
> >> WARNING: events were regrouped to match PMUs
> >>
> >>  Performance counter stats for 'CPU(s) 0':
> >>
> >>         15,066,240      slots
> >>          1,899,760      instructions
> >>          2,126,998      topdown-retiring
> >>
> >>        1.045783464 seconds time elapsed
> >>
> >> In this case, slots event would be adjusted as the leader event and all
> >> events are still in same group.
> >>
> >> b. all events not in a group
> >>
> >> perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
> >> WARNING: events were regrouped to match PMUs
> >>
> >>  Performance counter stats for 'CPU(s) 0':
> >>
> >>          2,045,561      instructions
> >>         17,108,370      slots
> >>          2,281,116      topdown-retiring
> >>
> >>        1.045639284 seconds time elapsed
> >>
> >> In this case, slots and topdown-retiring are placed into a group and slots
> >> is the group leader. instructions event is outside the group.
> >>
> >> c. slots event in group but topdown metric events outside the group
> >>
> >> perf stat -e "{instructions,slots},topdown-retiring"  -C0 sleep 1
> >> WARNING: events were regrouped to match PMUs
> >>
> >>  Performance counter stats for 'CPU(s) 0':
> >>
> >>         20,323,878      slots
> >>          2,634,884      instructions
> >>          3,028,656      topdown-retiring
> >>
> >>        1.045076380 seconds time elapsed
> >>
> >> In this case, topdown-retiring event is placed into previous group and
> >> slots is adjusted to leader event.
> >>
> >> d. multiple event groups
> >>
> >> perf stat -e "{instructions,slots},{topdown-retiring}"  -C0 sleep 1
> >> WARNING: events were regrouped to match PMUs
> >>
> >>  Performance counter stats for 'CPU(s) 0':
> >>
> >>         26,319,024      slots
> >>          2,427,791      instructions
> >>          2,683,508      topdown-retiring
> >>
> >>        1.045495830 seconds time elapsed
> >>
> >> In this case, the two groups are merged to one group and slots event is
> >> adjusted as leader.
> >>
> >> The key point of this patch is that it's unnecessary to move topdown
> >> metrics events closely after slots event. It's a overkill since Intel core
> >> PMU driver doesn't require that. Intel PMU driver just requires topdown
> >> metrics events are in a group where slots event is the group leader, and
> >> worse the movement for topdown metrics events causes the issue in the
> >> commit message mentioned.
> >>
> >> This patch doesn't block to regroup topdown metrics event. It just removes
> >> the unnecessary movement for topdown metrics events.
> > But you will get the same behavior because of the non-arch dependent
> > force group index - I guess you don't care as the sample read only
> > happens when you have a group.
> >
> > I'm thinking of cases like (which admittedly is broken):
> > ```
> > $ perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
> > [sudo] password for irogers:
> >
> > Performance counter stats for 'system wide':
> >
> >     2,589,345,900      slots
> >       852,492,838      instructions
> >       583,525,372      cycles
> >   <not supported>      topdown-fe-bound
> >
> >       0.103930790 seconds time elapsed
> > ```
>
> I run the upstream code (commit 73e931504f8e0d42978bfcda37b323dbbd1afc08)
> without this patchset, I see same issue.
>
> perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
>
>  Performance counter stats for 'system wide':
>
>        262,448,922      slots
>         29,630,373      instructions
>         43,891,902      cycles
>    <not supported>      topdown-fe-bound
>
>        0.150369560 seconds time elapsed
>
> #perf -v
> perf version 6.10.rc6.g73e931504f8e
>
> This issue is not caused by this patchset.

I agree, but I think what is broken above was caused by the forced
grouping change that I did for Andi. The point of your change here is
to say that topdown events don't need to be moved while sorting, but
what should be happening here is just that. topdown-fe-bound should be
moved into the group with slots and instructions so it isn't "<not
supported>". So yes the current code has issues, but that's not the
same as saying we don't need to move topdown events, we do so that we
may group them better.

Thanks,
Ian

> > As the slots event is grouped there's no force group index on it, we
> > want to shuffle the topdown-fe-bound into the group so we want it to
> > compare as less than cycles - ie we're comparing topdown events with
> > non topdown events and trying to shuffle the topdown events first.
>
> Current evlist__cmp() won't really swap the order of cycles and
> topdown-fe-bound.
>
> if (lhs_sort_idx != rhs_sort_idx)
>         return lhs_sort_idx - rhs_sort_idx;
>
> When comparing cycles and topdown-fe-bound events, lhs_sort_idx is 2 and
> rhs_sort_idx is 3, so the swap won't happen.
>
> So the event sequence after sorting is still "slots, instructions ,cycles,
> topdown-fe-bound". Both cycles and topdown-fe-bound events won't be placed
> into the group.
>
>
> >
> > Thanks,
> > Ian
> >
> >
> >
> >>> first?
> >>>
> >>> Thanks,
> >>> Ian
> >>>
> >>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> >>>> ---
> >>>>  tools/perf/arch/x86/util/evlist.c | 5 -----
> >>>>  1 file changed, 5 deletions(-)
> >>>>
> >>>> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> >>>> index 332e8907f43e..6046981d61cf 100644
> >>>> --- a/tools/perf/arch/x86/util/evlist.c
> >>>> +++ b/tools/perf/arch/x86/util/evlist.c
> >>>> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> >>>>                         return -1;
> >>>>                 if (arch_is_topdown_slots(rhs))
> >>>>                         return 1;
> >>>> -               /* Followed by topdown events. */
> >>>> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
> >>>> -                       return -1;
> >>>> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
> >>>> -                       return 1;
> >>>>         }
> >>>>
> >>>>         /* Default ordering by insertion index. */
> >>>> --
> >>>> 2.40.1
> >>>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events
  2024-07-10 15:07           ` Ian Rogers
@ 2024-07-11  4:48             ` Mi, Dapeng
  0 siblings, 0 replies; 16+ messages in thread
From: Mi, Dapeng @ 2024-07-11  4:48 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Adrian Hunter, Alexander Shishkin, Kan Liang,
	linux-perf-users, linux-kernel, Yongwei Ma, Dapeng Mi


On 7/10/2024 11:07 PM, Ian Rogers wrote:
> On Wed, Jul 10, 2024 at 2:40 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>>
>> On 7/10/2024 6:37 AM, Ian Rogers wrote:
>>> On Mon, Jul 8, 2024 at 9:18 PM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>>>> On 7/8/2024 11:08 PM, Ian Rogers wrote:
>>>>> On Mon, Jul 8, 2024 at 12:40 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>>>>>> when running below perf command, we say error is reported.
>>>>>>
>>>>>> perf record -e "{slots,instructions,topdown-retiring}:S" -vv -C0 sleep 1
>>>>>>
>>>>>> ------------------------------------------------------------
>>>>>> perf_event_attr:
>>>>>>   type                             4 (cpu)
>>>>>>   size                             168
>>>>>>   config                           0x400 (slots)
>>>>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>>>>>   read_format                      ID|GROUP|LOST
>>>>>>   disabled                         1
>>>>>>   sample_id_all                    1
>>>>>>   exclude_guest                    1
>>>>>> ------------------------------------------------------------
>>>>>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
>>>>>> ------------------------------------------------------------
>>>>>> perf_event_attr:
>>>>>>   type                             4 (cpu)
>>>>>>   size                             168
>>>>>>   config                           0x8000 (topdown-retiring)
>>>>>>   { sample_period, sample_freq }   4000
>>>>>>   sample_type                      IP|TID|TIME|READ|CPU|PERIOD|IDENTIFIER
>>>>>>   read_format                      ID|GROUP|LOST
>>>>>>   freq                             1
>>>>>>   sample_id_all                    1
>>>>>>   exclude_guest                    1
>>>>>> ------------------------------------------------------------
>>>>>> sys_perf_event_open: pid -1  cpu 0  group_fd 5  flags 0x8
>>>>>> sys_perf_event_open failed, error -22
>>>>>>
>>>>>> Error:
>>>>>> The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (topdown-retiring).
>>>>>>
>>>>>> The reason of error is that the events are regrouped and
>>>>>> topdown-retiring event is moved to closely after the slots event and
>>>>>> topdown-retiring event needs to do the sampling, but Intel PMU driver
>>>>>> doesn't support to sample topdown metrics events.
>>>>>>
>>>>>> For topdown metrics events, it just requires to be in a group which has
>>>>>> slots event as leader. It doesn't require topdown metrics event must be
>>>>>> closely after slots event. Thus it's a overkill to move topdown metrics
>>>>>> event closely after slots event in events regrouping and furtherly cause
>>>>>> the above issue.
>>>>>>
>>>>>> Thus delete the code that moving topdown metrics events to fix the
>>>>>> issue.
>>>>> I think this is wrong. The topdown events may not be in a group, such
>>>>> cases can come from metrics due to grouping constraints, and so they
>>>>> must be sorted together so that they may be gathered into a group to
>>>>> avoid the perf event opens failing for ungrouped topdown events. I'm
>>>>> not understanding what these patches are trying to do, if you want to
>>>>> prioritize the event for leader sampling why not modify it to compare
>>>> Per my understanding, this change doesn't break anything. The events
>>>> regrouping can be divided into below several cases.
>>>>
>>>> a. all events in a group
>>>>
>>>> perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
>>>> WARNING: events were regrouped to match PMUs
>>>>
>>>>  Performance counter stats for 'CPU(s) 0':
>>>>
>>>>         15,066,240      slots
>>>>          1,899,760      instructions
>>>>          2,126,998      topdown-retiring
>>>>
>>>>        1.045783464 seconds time elapsed
>>>>
>>>> In this case, slots event would be adjusted as the leader event and all
>>>> events are still in same group.
>>>>
>>>> b. all events not in a group
>>>>
>>>> perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
>>>> WARNING: events were regrouped to match PMUs
>>>>
>>>>  Performance counter stats for 'CPU(s) 0':
>>>>
>>>>          2,045,561      instructions
>>>>         17,108,370      slots
>>>>          2,281,116      topdown-retiring
>>>>
>>>>        1.045639284 seconds time elapsed
>>>>
>>>> In this case, slots and topdown-retiring are placed into a group and slots
>>>> is the group leader. instructions event is outside the group.
>>>>
>>>> c. slots event in group but topdown metric events outside the group
>>>>
>>>> perf stat -e "{instructions,slots},topdown-retiring"  -C0 sleep 1
>>>> WARNING: events were regrouped to match PMUs
>>>>
>>>>  Performance counter stats for 'CPU(s) 0':
>>>>
>>>>         20,323,878      slots
>>>>          2,634,884      instructions
>>>>          3,028,656      topdown-retiring
>>>>
>>>>        1.045076380 seconds time elapsed
>>>>
>>>> In this case, topdown-retiring event is placed into previous group and
>>>> slots is adjusted to leader event.
>>>>
>>>> d. multiple event groups
>>>>
>>>> perf stat -e "{instructions,slots},{topdown-retiring}"  -C0 sleep 1
>>>> WARNING: events were regrouped to match PMUs
>>>>
>>>>  Performance counter stats for 'CPU(s) 0':
>>>>
>>>>         26,319,024      slots
>>>>          2,427,791      instructions
>>>>          2,683,508      topdown-retiring
>>>>
>>>>        1.045495830 seconds time elapsed
>>>>
>>>> In this case, the two groups are merged to one group and slots event is
>>>> adjusted as leader.
>>>>
>>>> The key point of this patch is that it's unnecessary to move topdown
>>>> metrics events closely after slots event. It's a overkill since Intel core
>>>> PMU driver doesn't require that. Intel PMU driver just requires topdown
>>>> metrics events are in a group where slots event is the group leader, and
>>>> worse the movement for topdown metrics events causes the issue in the
>>>> commit message mentioned.
>>>>
>>>> This patch doesn't block to regroup topdown metrics event. It just removes
>>>> the unnecessary movement for topdown metrics events.
>>> But you will get the same behavior because of the non-arch dependent
>>> force group index - I guess you don't care as the sample read only
>>> happens when you have a group.
>>>
>>> I'm thinking of cases like (which admittedly is broken):
>>> ```
>>> $ perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
>>> [sudo] password for irogers:
>>>
>>> Performance counter stats for 'system wide':
>>>
>>>     2,589,345,900      slots
>>>       852,492,838      instructions
>>>       583,525,372      cycles
>>>   <not supported>      topdown-fe-bound
>>>
>>>       0.103930790 seconds time elapsed
>>> ```
>> I run the upstream code (commit 73e931504f8e0d42978bfcda37b323dbbd1afc08)
>> without this patchset, I see same issue.
>>
>> perf stat -e "{slots,instructions},cycles,topdown-fe-bound" -a sleep 0.1
>>
>>  Performance counter stats for 'system wide':
>>
>>        262,448,922      slots
>>         29,630,373      instructions
>>         43,891,902      cycles
>>    <not supported>      topdown-fe-bound
>>
>>        0.150369560 seconds time elapsed
>>
>> #perf -v
>> perf version 6.10.rc6.g73e931504f8e
>>
>> This issue is not caused by this patchset.
> I agree, but I think what is broken above was caused by the forced
> grouping change that I did for Andi. The point of your change here is
> to say that topdown events don't need to be moved while sorting, but
> what should be happening here is just that. topdown-fe-bound should be
> moved into the group with slots and instructions so it isn't "<not
> supported>". So yes the current code has issues, but that's not the
> same as saying we don't need to move topdown events, we do so that we
> may group them better.
>
> Thanks,
> Ian

I see your point. I think the key is to ensure the topdown metrics events
in a group which has slots as the leader. As for where the topdown metrics
event is in the group, it doesn't matter. I would see if there is a better
method to fix this issue. Thanks.


>
>>> As the slots event is grouped there's no force group index on it, we
>>> want to shuffle the topdown-fe-bound into the group so we want it to
>>> compare as less than cycles - ie we're comparing topdown events with
>>> non topdown events and trying to shuffle the topdown events first.
>> Current evlist__cmp() won't really swap the order of cycles and
>> topdown-fe-bound.
>>
>> if (lhs_sort_idx != rhs_sort_idx)
>>         return lhs_sort_idx - rhs_sort_idx;
>>
>> When comparing cycles and topdown-fe-bound events, lhs_sort_idx is 2 and
>> rhs_sort_idx is 3, so the swap won't happen.
>>
>> So the event sequence after sorting is still "slots, instructions ,cycles,
>> topdown-fe-bound". Both cycles and topdown-fe-bound events won't be placed
>> into the group.
>>
>>
>>> Thanks,
>>> Ian
>>>
>>>
>>>
>>>>> first?
>>>>>
>>>>> Thanks,
>>>>> Ian
>>>>>
>>>>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>>>>> ---
>>>>>>  tools/perf/arch/x86/util/evlist.c | 5 -----
>>>>>>  1 file changed, 5 deletions(-)
>>>>>>
>>>>>> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
>>>>>> index 332e8907f43e..6046981d61cf 100644
>>>>>> --- a/tools/perf/arch/x86/util/evlist.c
>>>>>> +++ b/tools/perf/arch/x86/util/evlist.c
>>>>>> @@ -82,11 +82,6 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
>>>>>>                         return -1;
>>>>>>                 if (arch_is_topdown_slots(rhs))
>>>>>>                         return 1;
>>>>>> -               /* Followed by topdown events. */
>>>>>> -               if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs))
>>>>>> -                       return -1;
>>>>>> -               if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs))
>>>>>> -                       return 1;
>>>>>>         }
>>>>>>
>>>>>>         /* Default ordering by insertion index. */
>>>>>> --
>>>>>> 2.40.1
>>>>>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2024-07-11  4:48 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-08 14:41 [Patch v2 0/5] Bug fixes on topdown events reordering Dapeng Mi
2024-07-08 14:42 ` [Patch v2 1/5] perf x86/topdown: Complete topdown slots/metrics events check Dapeng Mi
2024-07-08 13:28   ` Liang, Kan
2024-07-09  1:58     ` Mi, Dapeng
2024-07-08 14:42 ` [Patch v2 2/5] perf x86/topdown: Correct leader selection with sample_read enabled Dapeng Mi
2024-07-08 14:42 ` [Patch v2 3/5] perf x86/topdown: Don't move topdown metrics events when sorting events Dapeng Mi
2024-07-08 15:08   ` Ian Rogers
2024-07-09  4:17     ` Mi, Dapeng
2024-07-09 22:37       ` Ian Rogers
2024-07-10  9:40         ` Mi, Dapeng
2024-07-10 15:07           ` Ian Rogers
2024-07-11  4:48             ` Mi, Dapeng
2024-07-08 14:42 ` [Patch v2 4/5] perf tests: Add leader sampling test in record tests Dapeng Mi
2024-07-08 14:42 ` [Patch v2 5/5] perf tests: Add topdown events counting and sampling tests Dapeng Mi
2024-07-08 13:40   ` Liang, Kan
2024-07-09  5:27     ` Mi, Dapeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).