linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json
@ 2025-05-12 18:46 Ian Rogers
  2025-05-12 18:47 ` [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test Ian Rogers
  2025-05-13 20:25 ` [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 7+ messages in thread
From: Ian Rogers @ 2025-05-12 18:46 UTC (permalink / raw)
  To: Weilin Wang, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Kan Liang, James Clark, linux-perf-users, linux-kernel

The perf stat --cputype option can be used to filter which metrics
will be applied, for this reason the json metrics have an associated
PMU. List this PMU name in the perf list output in json mode so that
tooling may access it.

An example of the new field is:
```
{
        "MetricGroup": "Backend",
        "MetricName": "tma_core_bound",
        "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)",
        "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
        "ScaleUnit": "100%",
        "BriefDescription": "This metric represents fraction of slots where ...
        "PublicDescription": "This metric represents fraction of slots where ...
        "Unit": "cpu_core"
},
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-list.c      | 12 ++++++++++--
 tools/perf/util/metricgroup.c  |  5 ++++-
 tools/perf/util/print-events.h |  3 ++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index fed482adb039..e9b595d75df2 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -197,7 +197,8 @@ static void default_print_metric(void *ps,
 				const char *long_desc,
 				const char *expr,
 				const char *threshold,
-				const char *unit __maybe_unused)
+				const char *unit __maybe_unused,
+				const char *pmu_name __maybe_unused)
 {
 	struct print_state *print_state = ps;
 	FILE *fp = print_state->fp;
@@ -433,7 +434,8 @@ static void json_print_event(void *ps, const char *topic, const char *pmu_name,
 static void json_print_metric(void *ps __maybe_unused, const char *group,
 			      const char *name, const char *desc,
 			      const char *long_desc, const char *expr,
-			      const char *threshold, const char *unit)
+			      const char *threshold, const char *unit,
+			      const char *pmu_name)
 {
 	struct json_print_state *print_state = ps;
 	bool need_sep = false;
@@ -483,6 +485,12 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
 				   long_desc);
 		need_sep = true;
 	}
+	if (pmu_name) {
+		fix_escape_fprintf(fp, &buf, "%s\t\"Unit\": \"%S\"",
+				   need_sep ? ",\n" : "",
+				   pmu_name);
+		need_sep = true;
+	}
 	fprintf(fp, "%s}", need_sep ? "\n" : "");
 	strbuf_release(&buf);
 }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 126a631686b0..43d35f956a33 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -396,6 +396,7 @@ struct mep {
 	const char *metric_expr;
 	const char *metric_threshold;
 	const char *metric_unit;
+	const char *pmu_name;
 };
 
 static int mep_cmp(struct rb_node *rb_node, const void *entry)
@@ -476,6 +477,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
 			me->metric_expr = pm->metric_expr;
 			me->metric_threshold = pm->metric_threshold;
 			me->metric_unit = pm->unit;
+			me->pmu_name = pm->pmu;
 		}
 	}
 	free(omg);
@@ -551,7 +553,8 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat
 				me->metric_long_desc,
 				me->metric_expr,
 				me->metric_threshold,
-				me->metric_unit);
+				me->metric_unit,
+				me->pmu_name);
 		next = rb_next(node);
 		rblist__remove_node(&groups, node);
 	}
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index 445efa1636c1..8f19c2bea64a 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -25,7 +25,8 @@ struct print_callbacks {
 			const char *long_desc,
 			const char *expr,
 			const char *threshold,
-			const char *unit);
+			const char *unit,
+			const char *pmu_name);
 	bool (*skip_duplicate_pmus)(void *print_state);
 };
 
-- 
2.49.0.1045.g170613ef41-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test
  2025-05-12 18:46 [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Ian Rogers
@ 2025-05-12 18:47 ` Ian Rogers
  2025-05-12 20:30   ` Falcon, Thomas
  2025-05-13 20:25 ` [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2025-05-12 18:47 UTC (permalink / raw)
  To: Weilin Wang, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Kan Liang, James Clark, linux-perf-users, linux-kernel

On my alderlake I currently see for the "perf metrics value validation" test:
```
Total Test Count:  142
Passed Test Count:  139
[
Metric Relationship Error:      The collected value of metric ['tma_fetch_latency', 'tma_fetch_bandwidth', 'tma_frontend_bound']
                        is [31.137028] in workload(s): ['perf bench futex hash -r 2 -s']
                        but expected value range is [tma_frontend_bound, tma_frontend_bound]
                        Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
Metric Relationship Error:      The collected value of metric ['tma_memory_bound', 'tma_core_bound', 'tma_backend_bound']
                        is [6.564442] in workload(s): ['perf bench futex hash -r 2 -s']
                        but expected value range is [tma_backend_bound, tma_backend_bound]
                        Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
Metric Relationship Error:      The collected value of metric ['tma_light_operations', 'tma_heavy_operations', 'tma_retiring']
                        is [57.806179] in workload(s): ['perf bench futex hash -r 2 -s']
                        but expected value range is [tma_retiring, tma_retiring]
                        Relationship rule description: 'Sum of the level 2 children should equal level 1 parent']
Metric validation return with erros. Please check metrics reported with errors.
```
I suspect it is due to two metrics for different CPU types being
enabled. Add a -cputype option to avoid this. The test still fails with:
```
Total Test Count:  115
Passed Test Count:  114
[
Wrong Metric Value Error:       The collected value of metric ['tma_l2_hit_latency']
                        is [117.947088] in workload(s): ['perf bench futex hash -r 2 -s']
                        but expected value range is [0, 100]]
Metric validation return with errors. Please check metrics reported with errors.
```
which is a reproducible genuine error and likely requires a metric fix.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 .../tests/shell/lib/perf_metric_validation.py   | 12 +++++++++---
 tools/perf/tests/shell/stat_metrics_values.sh   | 17 +++++++++++------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
index 0b94216c9c46..dea8ef1977bf 100644
--- a/tools/perf/tests/shell/lib/perf_metric_validation.py
+++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
@@ -35,7 +35,8 @@ class TestError:
 
 
 class Validator:
-    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
+    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
+                 workload='true', metrics='', cputype='cpu'):
         self.rulefname = rulefname
         self.reportfname = reportfname
         self.rules = None
@@ -43,6 +44,7 @@ class Validator:
         self.metrics = self.__set_metrics(metrics)
         self.skiplist = set()
         self.tolerance = t
+        self.cputype = cputype
 
         self.workloads = [x for x in workload.split(",") if x]
         self.wlidx = 0  # idx of current workloads
@@ -377,7 +379,7 @@ class Validator:
 
     def _run_perf(self, metric, workload: str):
         tool = 'perf'
-        command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
+        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
         wl = workload.split()
         command.extend(wl)
         print(" ".join(command))
@@ -443,6 +445,8 @@ class Validator:
                 if 'MetricName' not in m:
                     print("Warning: no metric name")
                     continue
+                if 'Unit' in m and m['Unit'] != self.cputype:
+                    continue
                 name = m['MetricName'].lower()
                 self.metrics.add(name)
                 if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
@@ -578,6 +582,8 @@ def main() -> None:
     parser.add_argument(
         "-wl", help="Workload to run while data collection", default="true")
     parser.add_argument("-m", help="Metric list to validate", default="")
+    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
+                        default="cpu")
     args = parser.parse_args()
     outpath = Path(args.output_dir)
     reportf = Path.joinpath(outpath, 'perf_report.json')
@@ -586,7 +592,7 @@ def main() -> None:
 
     validator = Validator(args.rule, reportf, debug=args.debug,
                           datafname=datafile, fullrulefname=fullrule, workload=args.wl,
-                          metrics=args.m)
+                          metrics=args.m, cputype=args.cputype)
     ret = validator.test()
 
     return ret
diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
index 279f19c5919a..30566f0b5427 100755
--- a/tools/perf/tests/shell/stat_metrics_values.sh
+++ b/tools/perf/tests/shell/stat_metrics_values.sh
@@ -16,11 +16,16 @@ workload="perf bench futex hash -r 2 -s"
 # Add -debug, save data file and full rule file
 echo "Launch python validation script $pythonvalidator"
 echo "Output will be stored in: $tmpdir"
-$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
-ret=$?
-rm -rf $tmpdir
-if [ $ret -ne 0 ]; then
-	echo "Metric validation return with erros. Please check metrics reported with errors."
-fi
+for cputype in /sys/bus/event_source/devices/cpu_*; do
+	cputype=$(basename "$cputype")
+	echo "Testing metrics for: $cputype"
+	$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
+		-cputype "${cputype}"
+	ret=$?
+	rm -rf $tmpdir
+	if [ $ret -ne 0 ]; then
+		echo "Metric validation return with errors. Please check metrics reported with errors."
+	fi
+done
 exit $ret
 
-- 
2.49.0.1045.g170613ef41-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test
  2025-05-12 18:47 ` [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test Ian Rogers
@ 2025-05-12 20:30   ` Falcon, Thomas
  2025-05-12 21:52     ` Ian Rogers
  0 siblings, 1 reply; 7+ messages in thread
From: Falcon, Thomas @ 2025-05-12 20:30 UTC (permalink / raw)
  To: james.clark@linaro.org, alexander.shishkin@linux.intel.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	peterz@infradead.org, mark.rutland@arm.com, mingo@redhat.com,
	Hunter, Adrian, acme@kernel.org, namhyung@kernel.org,
	irogers@google.com, Wang, Weilin, kan.liang@linux.intel.com,
	jolsa@kernel.org

On Mon, 2025-05-12 at 11:47 -0700, Ian Rogers wrote:
> On my alderlake I currently see for the "perf metrics value validation" test:
> ```
> Total Test Count:  142
> Passed Test Count:  139
> [
> Metric Relationship Error:      The collected value of metric ['tma_fetch_latency', 'tma_fetch_bandwidth', 'tma_frontend_bound']
>                         is [31.137028] in workload(s): ['perf bench futex hash -r 2 -s']
>                         but expected value range is [tma_frontend_bound, tma_frontend_bound]
>                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> Metric Relationship Error:      The collected value of metric ['tma_memory_bound', 'tma_core_bound', 'tma_backend_bound']
>                         is [6.564442] in workload(s): ['perf bench futex hash -r 2 -s']
>                         but expected value range is [tma_backend_bound, tma_backend_bound]
>                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> Metric Relationship Error:      The collected value of metric ['tma_light_operations', 'tma_heavy_operations', 'tma_retiring']
>                         is [57.806179] in workload(s): ['perf bench futex hash -r 2 -s']
>                         but expected value range is [tma_retiring, tma_retiring]
>                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent']
> Metric validation return with erros. Please check metrics reported with errors.
> ```
> I suspect it is due to two metrics for different CPU types being
> enabled. Add a -cputype option to avoid this. The test still fails with:
> ```
> Total Test Count:  115
> Passed Test Count:  114
> [
> Wrong Metric Value Error:       The collected value of metric ['tma_l2_hit_latency']
>                         is [117.947088] in workload(s): ['perf bench futex hash -r 2 -s']
>                         but expected value range is [0, 100]]
> Metric validation return with errors. Please check metrics reported with errors.
> ```
> which is a reproducible genuine error and likely requires a metric fix.

Hi Ian, I tested this on my alder lake and an arrow lake. All tests, including tma_l2_hit_latency,
pass on my end.

Tested-by: Thomas Falcon <thomas.falcon@intel.com>

Thanks,
Tom
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  .../tests/shell/lib/perf_metric_validation.py   | 12 +++++++++---
>  tools/perf/tests/shell/stat_metrics_values.sh   | 17 +++++++++++------
>  2 files changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
> index 0b94216c9c46..dea8ef1977bf 100644
> --- a/tools/perf/tests/shell/lib/perf_metric_validation.py
> +++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
> @@ -35,7 +35,8 @@ class TestError:
>  
>  
>  class Validator:
> -    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
> +    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
> +                 workload='true', metrics='', cputype='cpu'):
>          self.rulefname = rulefname
>          self.reportfname = reportfname
>          self.rules = None
> @@ -43,6 +44,7 @@ class Validator:
>          self.metrics = self.__set_metrics(metrics)
>          self.skiplist = set()
>          self.tolerance = t
> +        self.cputype = cputype
>  
>          self.workloads = [x for x in workload.split(",") if x]
>          self.wlidx = 0  # idx of current workloads
> @@ -377,7 +379,7 @@ class Validator:
>  
>      def _run_perf(self, metric, workload: str):
>          tool = 'perf'
> -        command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
> +        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
>          wl = workload.split()
>          command.extend(wl)
>          print(" ".join(command))
> @@ -443,6 +445,8 @@ class Validator:
>                  if 'MetricName' not in m:
>                      print("Warning: no metric name")
>                      continue
> +                if 'Unit' in m and m['Unit'] != self.cputype:
> +                    continue
>                  name = m['MetricName'].lower()
>                  self.metrics.add(name)
>                  if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
> @@ -578,6 +582,8 @@ def main() -> None:
>      parser.add_argument(
>          "-wl", help="Workload to run while data collection", default="true")
>      parser.add_argument("-m", help="Metric list to validate", default="")
> +    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
> +                        default="cpu")
>      args = parser.parse_args()
>      outpath = Path(args.output_dir)
>      reportf = Path.joinpath(outpath, 'perf_report.json')
> @@ -586,7 +592,7 @@ def main() -> None:
>  
>      validator = Validator(args.rule, reportf, debug=args.debug,
>                            datafname=datafile, fullrulefname=fullrule, workload=args.wl,
> -                          metrics=args.m)
> +                          metrics=args.m, cputype=args.cputype)
>      ret = validator.test()
>  
>      return ret
> diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
> index 279f19c5919a..30566f0b5427 100755
> --- a/tools/perf/tests/shell/stat_metrics_values.sh
> +++ b/tools/perf/tests/shell/stat_metrics_values.sh
> @@ -16,11 +16,16 @@ workload="perf bench futex hash -r 2 -s"
>  # Add -debug, save data file and full rule file
>  echo "Launch python validation script $pythonvalidator"
>  echo "Output will be stored in: $tmpdir"
> -$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
> -ret=$?
> -rm -rf $tmpdir
> -if [ $ret -ne 0 ]; then
> -	echo "Metric validation return with erros. Please check metrics reported with errors."
> -fi
> +for cputype in /sys/bus/event_source/devices/cpu_*; do
> +	cputype=$(basename "$cputype")
> +	echo "Testing metrics for: $cputype"
> +	$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> +		-cputype "${cputype}"
> +	ret=$?
> +	rm -rf $tmpdir
> +	if [ $ret -ne 0 ]; then
> +		echo "Metric validation return with errors. Please check metrics reported with errors."
> +	fi
> +done
>  exit $ret
>  


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test
  2025-05-12 20:30   ` Falcon, Thomas
@ 2025-05-12 21:52     ` Ian Rogers
  2025-05-13 17:26       ` Ian Rogers
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2025-05-12 21:52 UTC (permalink / raw)
  To: Falcon, Thomas, Wang, Weilin
  Cc: james.clark@linaro.org, alexander.shishkin@linux.intel.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	peterz@infradead.org, mark.rutland@arm.com, mingo@redhat.com,
	Hunter, Adrian, acme@kernel.org, namhyung@kernel.org,
	kan.liang@linux.intel.com, jolsa@kernel.org

On Mon, May 12, 2025 at 1:30 PM Falcon, Thomas <thomas.falcon@intel.com> wrote:
>
> On Mon, 2025-05-12 at 11:47 -0700, Ian Rogers wrote:
> > On my alderlake I currently see for the "perf metrics value validation" test:
> > ```
> > Total Test Count:  142
> > Passed Test Count:  139
> > [
> > Metric Relationship Error:      The collected value of metric ['tma_fetch_latency', 'tma_fetch_bandwidth', 'tma_frontend_bound']
> >                         is [31.137028] in workload(s): ['perf bench futex hash -r 2 -s']
> >                         but expected value range is [tma_frontend_bound, tma_frontend_bound]
> >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > Metric Relationship Error:      The collected value of metric ['tma_memory_bound', 'tma_core_bound', 'tma_backend_bound']
> >                         is [6.564442] in workload(s): ['perf bench futex hash -r 2 -s']
> >                         but expected value range is [tma_backend_bound, tma_backend_bound]
> >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > Metric Relationship Error:      The collected value of metric ['tma_light_operations', 'tma_heavy_operations', 'tma_retiring']
> >                         is [57.806179] in workload(s): ['perf bench futex hash -r 2 -s']
> >                         but expected value range is [tma_retiring, tma_retiring]
> >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent']
> > Metric validation return with erros. Please check metrics reported with errors.
> > ```
> > I suspect it is due to two metrics for different CPU types being
> > enabled. Add a -cputype option to avoid this. The test still fails with:
> > ```
> > Total Test Count:  115
> > Passed Test Count:  114
> > [
> > Wrong Metric Value Error:       The collected value of metric ['tma_l2_hit_latency']
> >                         is [117.947088] in workload(s): ['perf bench futex hash -r 2 -s']
> >                         but expected value range is [0, 100]]
> > Metric validation return with errors. Please check metrics reported with errors.
> > ```
> > which is a reproducible genuine error and likely requires a metric fix.
>
> Hi Ian, I tested this on my alder lake and an arrow lake. All tests, including tma_l2_hit_latency,
> pass on my end.
>
> Tested-by: Thomas Falcon <thomas.falcon@intel.com>

Thanks Thomas! It should also work for core_lowpower on ArrowLake. I
find some times that tma_l2_hit_latency passes for me. Trying a few
more times I see other failures, but they all seem to be "No Metric
Value Error" - perhaps these shouldn't fail the test. In the testing
code we're passing '-a' for system wide profiling:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/lib/perf_metric_validation.py?h=perf-tools-next#n380
I believe this is done so that counters for things like AVX will
gather values. I wonder if the tma_l2_hit_latency something is
happening due to scaling counts:
```
$ sudo /tmp/perf/perf stat -M tma_l2_hit_latency -a sleep 1

 Performance counter stats for 'system wide':

     7,987,903,325      cpu_core/TOPDOWN.SLOTS/          #    210.2 %
tma_l2_hit_latency       (87.27%)
     3,131,119,398      cpu_core/topdown-retiring/
                         (87.27%)
     1,910,718,811      cpu_core/topdown-mem-bound/
                         (87.27%)
       481,456,610      cpu_core/topdown-bad-spec/
                         (87.27%)
     1,681,347,944      cpu_core/topdown-fe-bound/
                         (87.27%)
     2,798,109,902      cpu_core/topdown-be-bound/
                         (87.27%)
       365,736,554      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/
                                  (87.27%)
       327,668,588      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/
                                 (87.30%)
        12,744,464      cpu_core/MEM_LOAD_RETIRED.L1_MISS/
                           (75.32%)
     1,403,250,041      cpu_core/CPU_CLK_UNHALTED.THREAD/
                          (87.65%)
         6,657,480      cpu_core/MEM_LOAD_RETIRED.L2_HIT/
                          (87.66%)
    59,424,499,192      TSC
        40,830,608      cpu_core/MEM_LOAD_RETIRED.FB_HIT/
                          (62.46%)
     1,461,544,380      cpu_core/CPU_CLK_UNHALTED.REF_TSC/
                           (74.79%)
     1,008,604,319      duration_time

       1.004974560 seconds time elapsed
```
The values in the parentheses is a scaling amount which should mean
for event multiplexing but for hybrid the events aren't running when
on the other core type, so we're seeing these odd multiplexing values
and these are used to scale counts:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/lib/perf/evsel.c?h=perf-tools-next#n599
I find when I run a benchmark rather than "sleep" the issue seems
harder to reproduce.

Thanks,
Ian

> Thanks,
> Tom
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  .../tests/shell/lib/perf_metric_validation.py   | 12 +++++++++---
> >  tools/perf/tests/shell/stat_metrics_values.sh   | 17 +++++++++++------
> >  2 files changed, 20 insertions(+), 9 deletions(-)
> >
> > diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > index 0b94216c9c46..dea8ef1977bf 100644
> > --- a/tools/perf/tests/shell/lib/perf_metric_validation.py
> > +++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > @@ -35,7 +35,8 @@ class TestError:
> >
> >
> >  class Validator:
> > -    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
> > +    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
> > +                 workload='true', metrics='', cputype='cpu'):
> >          self.rulefname = rulefname
> >          self.reportfname = reportfname
> >          self.rules = None
> > @@ -43,6 +44,7 @@ class Validator:
> >          self.metrics = self.__set_metrics(metrics)
> >          self.skiplist = set()
> >          self.tolerance = t
> > +        self.cputype = cputype
> >
> >          self.workloads = [x for x in workload.split(",") if x]
> >          self.wlidx = 0  # idx of current workloads
> > @@ -377,7 +379,7 @@ class Validator:
> >
> >      def _run_perf(self, metric, workload: str):
> >          tool = 'perf'
> > -        command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
> > +        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
> >          wl = workload.split()
> >          command.extend(wl)
> >          print(" ".join(command))
> > @@ -443,6 +445,8 @@ class Validator:
> >                  if 'MetricName' not in m:
> >                      print("Warning: no metric name")
> >                      continue
> > +                if 'Unit' in m and m['Unit'] != self.cputype:
> > +                    continue
> >                  name = m['MetricName'].lower()
> >                  self.metrics.add(name)
> >                  if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
> > @@ -578,6 +582,8 @@ def main() -> None:
> >      parser.add_argument(
> >          "-wl", help="Workload to run while data collection", default="true")
> >      parser.add_argument("-m", help="Metric list to validate", default="")
> > +    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
> > +                        default="cpu")
> >      args = parser.parse_args()
> >      outpath = Path(args.output_dir)
> >      reportf = Path.joinpath(outpath, 'perf_report.json')
> > @@ -586,7 +592,7 @@ def main() -> None:
> >
> >      validator = Validator(args.rule, reportf, debug=args.debug,
> >                            datafname=datafile, fullrulefname=fullrule, workload=args.wl,
> > -                          metrics=args.m)
> > +                          metrics=args.m, cputype=args.cputype)
> >      ret = validator.test()
> >
> >      return ret
> > diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
> > index 279f19c5919a..30566f0b5427 100755
> > --- a/tools/perf/tests/shell/stat_metrics_values.sh
> > +++ b/tools/perf/tests/shell/stat_metrics_values.sh
> > @@ -16,11 +16,16 @@ workload="perf bench futex hash -r 2 -s"
> >  # Add -debug, save data file and full rule file
> >  echo "Launch python validation script $pythonvalidator"
> >  echo "Output will be stored in: $tmpdir"
> > -$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
> > -ret=$?
> > -rm -rf $tmpdir
> > -if [ $ret -ne 0 ]; then
> > -     echo "Metric validation return with erros. Please check metrics reported with errors."
> > -fi
> > +for cputype in /sys/bus/event_source/devices/cpu_*; do
> > +     cputype=$(basename "$cputype")
> > +     echo "Testing metrics for: $cputype"
> > +     $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> > +             -cputype "${cputype}"
> > +     ret=$?
> > +     rm -rf $tmpdir
> > +     if [ $ret -ne 0 ]; then
> > +             echo "Metric validation return with errors. Please check metrics reported with errors."
> > +     fi
> > +done
> >  exit $ret
> >
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test
  2025-05-12 21:52     ` Ian Rogers
@ 2025-05-13 17:26       ` Ian Rogers
  2025-05-13 19:07         ` Ian Rogers
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Rogers @ 2025-05-13 17:26 UTC (permalink / raw)
  To: Falcon, Thomas, Wang, Weilin
  Cc: james.clark@linaro.org, alexander.shishkin@linux.intel.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	peterz@infradead.org, mark.rutland@arm.com, mingo@redhat.com,
	Hunter, Adrian, acme@kernel.org, namhyung@kernel.org,
	kan.liang@linux.intel.com, jolsa@kernel.org

On Mon, May 12, 2025 at 2:52 PM Ian Rogers <irogers@google.com> wrote:
>
> On Mon, May 12, 2025 at 1:30 PM Falcon, Thomas <thomas.falcon@intel.com> wrote:
> >
> > On Mon, 2025-05-12 at 11:47 -0700, Ian Rogers wrote:
> > > On my alderlake I currently see for the "perf metrics value validation" test:
> > > ```
> > > Total Test Count:  142
> > > Passed Test Count:  139
> > > [
> > > Metric Relationship Error:      The collected value of metric ['tma_fetch_latency', 'tma_fetch_bandwidth', 'tma_frontend_bound']
> > >                         is [31.137028] in workload(s): ['perf bench futex hash -r 2 -s']
> > >                         but expected value range is [tma_frontend_bound, tma_frontend_bound]
> > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > > Metric Relationship Error:      The collected value of metric ['tma_memory_bound', 'tma_core_bound', 'tma_backend_bound']
> > >                         is [6.564442] in workload(s): ['perf bench futex hash -r 2 -s']
> > >                         but expected value range is [tma_backend_bound, tma_backend_bound]
> > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > > Metric Relationship Error:      The collected value of metric ['tma_light_operations', 'tma_heavy_operations', 'tma_retiring']
> > >                         is [57.806179] in workload(s): ['perf bench futex hash -r 2 -s']
> > >                         but expected value range is [tma_retiring, tma_retiring]
> > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent']
> > > Metric validation return with erros. Please check metrics reported with errors.
> > > ```
> > > I suspect it is due to two metrics for different CPU types being
> > > enabled. Add a -cputype option to avoid this. The test still fails with:
> > > ```
> > > Total Test Count:  115
> > > Passed Test Count:  114
> > > [
> > > Wrong Metric Value Error:       The collected value of metric ['tma_l2_hit_latency']
> > >                         is [117.947088] in workload(s): ['perf bench futex hash -r 2 -s']
> > >                         but expected value range is [0, 100]]
> > > Metric validation return with errors. Please check metrics reported with errors.
> > > ```
> > > which is a reproducible genuine error and likely requires a metric fix.
> >
> > Hi Ian, I tested this on my alder lake and an arrow lake. All tests, including tma_l2_hit_latency,
> > pass on my end.
> >
> > Tested-by: Thomas Falcon <thomas.falcon@intel.com>
>
> Thanks Thomas! It should also work for core_lowpower on ArrowLake. I
> find some times that tma_l2_hit_latency passes for me. Trying a few
> more times I see other failures, but they all seem to be "No Metric
> Value Error" - perhaps these shouldn't fail the test. In the testing
> code we're passing '-a' for system wide profiling:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/lib/perf_metric_validation.py?h=perf-tools-next#n380
> I believe this is done so that counters for things like AVX will
> gather values. I wonder if the tma_l2_hit_latency something is
> happening due to scaling counts:
> ```
> $ sudo /tmp/perf/perf stat -M tma_l2_hit_latency -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>      7,987,903,325      cpu_core/TOPDOWN.SLOTS/          #    210.2 %
> tma_l2_hit_latency       (87.27%)
>      3,131,119,398      cpu_core/topdown-retiring/
>                          (87.27%)
>      1,910,718,811      cpu_core/topdown-mem-bound/
>                          (87.27%)
>        481,456,610      cpu_core/topdown-bad-spec/
>                          (87.27%)
>      1,681,347,944      cpu_core/topdown-fe-bound/
>                          (87.27%)
>      2,798,109,902      cpu_core/topdown-be-bound/
>                          (87.27%)
>        365,736,554      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/
>                                   (87.27%)
>        327,668,588      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/
>                                  (87.30%)
>         12,744,464      cpu_core/MEM_LOAD_RETIRED.L1_MISS/
>                            (75.32%)
>      1,403,250,041      cpu_core/CPU_CLK_UNHALTED.THREAD/
>                           (87.65%)
>          6,657,480      cpu_core/MEM_LOAD_RETIRED.L2_HIT/
>                           (87.66%)
>     59,424,499,192      TSC
>         40,830,608      cpu_core/MEM_LOAD_RETIRED.FB_HIT/
>                           (62.46%)
>      1,461,544,380      cpu_core/CPU_CLK_UNHALTED.REF_TSC/
>                            (74.79%)
>      1,008,604,319      duration_time
>
>        1.004974560 seconds time elapsed
> ```
> The values in the parentheses is a scaling amount which should mean
> for event multiplexing but for hybrid the events aren't running when
> on the other core type, so we're seeing these odd multiplexing values
> and these are used to scale counts:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/lib/perf/evsel.c?h=perf-tools-next#n599
> I find when I run a benchmark rather than "sleep" the issue seems
> harder to reproduce.

Ok chatting with Weilin and actually paying attention to warning
messages I think I see a problem. TSC (msr/tsc/) is aggregating data
across all CPUs (it is a software event and is a different performance
monitoring unit to cpu_core) while the counters are only on cpu_core.
So I think this means the TSC value is too large. However, even with
restricting the CPUs I see the >100% problem:
```
$ perf stat -M tma_l2_hit_latency -C 0-15 -a sleep 1

 Performance counter stats for 'system wide':

    27,985,670,146      cpu_core/TOPDOWN.SLOTS/          #    125.6 %
tma_l2_hit_latency       (87.22%)
     9,619,906,383      cpu_core/topdown-retiring/
                         (87.22%)
     2,333,124,385      cpu_core/topdown-mem-bound/
                         (87.22%)
     3,607,656,674      cpu_core/topdown-bad-spec/
                         (87.22%)
     9,839,779,867      cpu_core/topdown-fe-bound/
                         (87.22%)
     5,244,189,749      cpu_core/topdown-be-bound/
                         (87.22%)
       442,932,231      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/
                                  (87.24%)
       360,126,840      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/
                                 (87.63%)
        31,264,814      cpu_core/MEM_LOAD_RETIRED.L1_MISS/
                           (75.26%)
     4,761,244,040      cpu_core/CPU_CLK_UNHALTED.THREAD/
                          (87.63%)
        28,429,277      cpu_core/MEM_LOAD_RETIRED.L2_HIT/
                          (87.62%)
    33,863,490,835      TSC
        23,533,366      cpu_core/MEM_LOAD_RETIRED.FB_HIT/
                          (62.25%)
     3,158,155,632      cpu_core/CPU_CLK_UNHALTED.REF_TSC/
                           (74.63%)
     1,003,102,327      duration_time

       1.001912038 seconds time elapsed
```
So we still need to figure this one out. The multiplexing numbers
still worry me.

Thanks,
Ian


> Thanks,
> Ian
>
> > Thanks,
> > Tom
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >  .../tests/shell/lib/perf_metric_validation.py   | 12 +++++++++---
> > >  tools/perf/tests/shell/stat_metrics_values.sh   | 17 +++++++++++------
> > >  2 files changed, 20 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > index 0b94216c9c46..dea8ef1977bf 100644
> > > --- a/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > +++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > @@ -35,7 +35,8 @@ class TestError:
> > >
> > >
> > >  class Validator:
> > > -    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
> > > +    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
> > > +                 workload='true', metrics='', cputype='cpu'):
> > >          self.rulefname = rulefname
> > >          self.reportfname = reportfname
> > >          self.rules = None
> > > @@ -43,6 +44,7 @@ class Validator:
> > >          self.metrics = self.__set_metrics(metrics)
> > >          self.skiplist = set()
> > >          self.tolerance = t
> > > +        self.cputype = cputype
> > >
> > >          self.workloads = [x for x in workload.split(",") if x]
> > >          self.wlidx = 0  # idx of current workloads
> > > @@ -377,7 +379,7 @@ class Validator:
> > >
> > >      def _run_perf(self, metric, workload: str):
> > >          tool = 'perf'
> > > -        command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
> > > +        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
> > >          wl = workload.split()
> > >          command.extend(wl)
> > >          print(" ".join(command))
> > > @@ -443,6 +445,8 @@ class Validator:
> > >                  if 'MetricName' not in m:
> > >                      print("Warning: no metric name")
> > >                      continue
> > > +                if 'Unit' in m and m['Unit'] != self.cputype:
> > > +                    continue
> > >                  name = m['MetricName'].lower()
> > >                  self.metrics.add(name)
> > >                  if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
> > > @@ -578,6 +582,8 @@ def main() -> None:
> > >      parser.add_argument(
> > >          "-wl", help="Workload to run while data collection", default="true")
> > >      parser.add_argument("-m", help="Metric list to validate", default="")
> > > +    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
> > > +                        default="cpu")
> > >      args = parser.parse_args()
> > >      outpath = Path(args.output_dir)
> > >      reportf = Path.joinpath(outpath, 'perf_report.json')
> > > @@ -586,7 +592,7 @@ def main() -> None:
> > >
> > >      validator = Validator(args.rule, reportf, debug=args.debug,
> > >                            datafname=datafile, fullrulefname=fullrule, workload=args.wl,
> > > -                          metrics=args.m)
> > > +                          metrics=args.m, cputype=args.cputype)
> > >      ret = validator.test()
> > >
> > >      return ret
> > > diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
> > > index 279f19c5919a..30566f0b5427 100755
> > > --- a/tools/perf/tests/shell/stat_metrics_values.sh
> > > +++ b/tools/perf/tests/shell/stat_metrics_values.sh
> > > @@ -16,11 +16,16 @@ workload="perf bench futex hash -r 2 -s"
> > >  # Add -debug, save data file and full rule file
> > >  echo "Launch python validation script $pythonvalidator"
> > >  echo "Output will be stored in: $tmpdir"
> > > -$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
> > > -ret=$?
> > > -rm -rf $tmpdir
> > > -if [ $ret -ne 0 ]; then
> > > -     echo "Metric validation return with erros. Please check metrics reported with errors."
> > > -fi
> > > +for cputype in /sys/bus/event_source/devices/cpu_*; do
> > > +     cputype=$(basename "$cputype")
> > > +     echo "Testing metrics for: $cputype"
> > > +     $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> > > +             -cputype "${cputype}"
> > > +     ret=$?
> > > +     rm -rf $tmpdir
> > > +     if [ $ret -ne 0 ]; then
> > > +             echo "Metric validation return with errors. Please check metrics reported with errors."
> > > +     fi
> > > +done
> > >  exit $ret
> > >
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test
  2025-05-13 17:26       ` Ian Rogers
@ 2025-05-13 19:07         ` Ian Rogers
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Rogers @ 2025-05-13 19:07 UTC (permalink / raw)
  To: Falcon, Thomas, Wang, Weilin, kan.liang@linux.intel.com,
	Stephane Eranian
  Cc: james.clark@linaro.org, alexander.shishkin@linux.intel.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	peterz@infradead.org, mark.rutland@arm.com, mingo@redhat.com,
	Hunter, Adrian, acme@kernel.org, namhyung@kernel.org,
	jolsa@kernel.org

On Tue, May 13, 2025 at 10:26 AM Ian Rogers <irogers@google.com> wrote:
>
> On Mon, May 12, 2025 at 2:52 PM Ian Rogers <irogers@google.com> wrote:
> >
> > On Mon, May 12, 2025 at 1:30 PM Falcon, Thomas <thomas.falcon@intel.com> wrote:
> > >
> > > On Mon, 2025-05-12 at 11:47 -0700, Ian Rogers wrote:
> > > > On my alderlake I currently see for the "perf metrics value validation" test:
> > > > ```
> > > > Total Test Count:  142
> > > > Passed Test Count:  139
> > > > [
> > > > Metric Relationship Error:      The collected value of metric ['tma_fetch_latency', 'tma_fetch_bandwidth', 'tma_frontend_bound']
> > > >                         is [31.137028] in workload(s): ['perf bench futex hash -r 2 -s']
> > > >                         but expected value range is [tma_frontend_bound, tma_frontend_bound]
> > > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > > > Metric Relationship Error:      The collected value of metric ['tma_memory_bound', 'tma_core_bound', 'tma_backend_bound']
> > > >                         is [6.564442] in workload(s): ['perf bench futex hash -r 2 -s']
> > > >                         but expected value range is [tma_backend_bound, tma_backend_bound]
> > > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent',
> > > > Metric Relationship Error:      The collected value of metric ['tma_light_operations', 'tma_heavy_operations', 'tma_retiring']
> > > >                         is [57.806179] in workload(s): ['perf bench futex hash -r 2 -s']
> > > >                         but expected value range is [tma_retiring, tma_retiring]
> > > >                         Relationship rule description: 'Sum of the level 2 children should equal level 1 parent']
> > > > Metric validation return with erros. Please check metrics reported with errors.
> > > > ```
> > > > I suspect it is due to two metrics for different CPU types being
> > > > enabled. Add a -cputype option to avoid this. The test still fails with:
> > > > ```
> > > > Total Test Count:  115
> > > > Passed Test Count:  114
> > > > [
> > > > Wrong Metric Value Error:       The collected value of metric ['tma_l2_hit_latency']
> > > >                         is [117.947088] in workload(s): ['perf bench futex hash -r 2 -s']
> > > >                         but expected value range is [0, 100]]
> > > > Metric validation return with errors. Please check metrics reported with errors.
> > > > ```
> > > > which is a reproducible genuine error and likely requires a metric fix.
> > >
> > > Hi Ian, I tested this on my alder lake and an arrow lake. All tests, including tma_l2_hit_latency,
> > > pass on my end.
> > >
> > > Tested-by: Thomas Falcon <thomas.falcon@intel.com>
> >
> > Thanks Thomas! It should also work for core_lowpower on ArrowLake. I
> > find some times that tma_l2_hit_latency passes for me. Trying a few
> > more times I see other failures, but they all seem to be "No Metric
> > Value Error" - perhaps these shouldn't fail the test. In the testing
> > code we're passing '-a' for system wide profiling:
> > https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/tests/shell/lib/perf_metric_validation.py?h=perf-tools-next#n380
> > I believe this is done so that counters for things like AVX will
> > gather values. I wonder if the tma_l2_hit_latency something is
> > happening due to scaling counts:
> > ```
> > $ sudo /tmp/perf/perf stat -M tma_l2_hit_latency -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> >      7,987,903,325      cpu_core/TOPDOWN.SLOTS/          #    210.2 %
> > tma_l2_hit_latency       (87.27%)
> >      3,131,119,398      cpu_core/topdown-retiring/
> >                          (87.27%)
> >      1,910,718,811      cpu_core/topdown-mem-bound/
> >                          (87.27%)
> >        481,456,610      cpu_core/topdown-bad-spec/
> >                          (87.27%)
> >      1,681,347,944      cpu_core/topdown-fe-bound/
> >                          (87.27%)
> >      2,798,109,902      cpu_core/topdown-be-bound/
> >                          (87.27%)
> >        365,736,554      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/
> >                                   (87.27%)
> >        327,668,588      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/
> >                                  (87.30%)
> >         12,744,464      cpu_core/MEM_LOAD_RETIRED.L1_MISS/
> >                            (75.32%)
> >      1,403,250,041      cpu_core/CPU_CLK_UNHALTED.THREAD/
> >                           (87.65%)
> >          6,657,480      cpu_core/MEM_LOAD_RETIRED.L2_HIT/
> >                           (87.66%)
> >     59,424,499,192      TSC
> >         40,830,608      cpu_core/MEM_LOAD_RETIRED.FB_HIT/
> >                           (62.46%)
> >      1,461,544,380      cpu_core/CPU_CLK_UNHALTED.REF_TSC/
> >                            (74.79%)
> >      1,008,604,319      duration_time
> >
> >        1.004974560 seconds time elapsed
> > ```
> > The values in the parentheses is a scaling amount which should mean
> > for event multiplexing but for hybrid the events aren't running when
> > on the other core type, so we're seeing these odd multiplexing values
> > and these are used to scale counts:
> > https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/lib/perf/evsel.c?h=perf-tools-next#n599
> > I find when I run a benchmark rather than "sleep" the issue seems
> > harder to reproduce.
>
> Ok chatting with Weilin and actually paying attention to warning
> messages I think I see a problem. TSC (msr/tsc/) is aggregating data
> across all CPUs (it is a software event and is a different performance
> monitoring unit to cpu_core) while the counters are only on cpu_core.
> So I think this means the TSC value is too large. However, even with
> restricting the CPUs I see the >100% problem:
> ```
> $ perf stat -M tma_l2_hit_latency -C 0-15 -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>     27,985,670,146      cpu_core/TOPDOWN.SLOTS/          #    125.6 %
> tma_l2_hit_latency       (87.22%)
>      9,619,906,383      cpu_core/topdown-retiring/
>                          (87.22%)
>      2,333,124,385      cpu_core/topdown-mem-bound/
>                          (87.22%)
>      3,607,656,674      cpu_core/topdown-bad-spec/
>                          (87.22%)
>      9,839,779,867      cpu_core/topdown-fe-bound/
>                          (87.22%)
>      5,244,189,749      cpu_core/topdown-be-bound/
>                          (87.22%)
>        442,932,231      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/
>                                   (87.24%)
>        360,126,840      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/
>                                  (87.63%)
>         31,264,814      cpu_core/MEM_LOAD_RETIRED.L1_MISS/
>                            (75.26%)
>      4,761,244,040      cpu_core/CPU_CLK_UNHALTED.THREAD/
>                           (87.63%)
>         28,429,277      cpu_core/MEM_LOAD_RETIRED.L2_HIT/
>                           (87.62%)
>     33,863,490,835      TSC
>         23,533,366      cpu_core/MEM_LOAD_RETIRED.FB_HIT/
>                           (62.25%)
>      3,158,155,632      cpu_core/CPU_CLK_UNHALTED.REF_TSC/
>                            (74.63%)
>      1,003,102,327      duration_time
>
>        1.001912038 seconds time elapsed
> ```
> So we still need to figure this one out. The multiplexing numbers
> still worry me.

So I think the TSC bug is genuine, perhaps Kan has thoughts on how to
restrict the cpu mask to just the core cpus. The tma_l2_hit_latency
>100% bug appears to be global. I see the problem on a tigerlake:
```
$ perf stat --metric-no-threshold -M tma_l2_hit_latency -a sleep 1

Performance counter stats for 'system wide':

       46,745,378      MEM_LOAD_RETIRED.FB_HIT          #    105.8 %
tma_l2_hit_latency
    1,445,788,955      CPU_CLK_UNHALTED.REF_TSC
    2,532,066,403      CPU_CLK_UNHALTED.THREAD
   40,008,507,350      TSC
       11,922,390      MEM_LOAD_RETIRED.L1_MISS
        2,587,517      MEM_LOAD_RETIRED.L2_HIT
    1,002,819,485      duration_time

      1.002198593 seconds time elapsed
```
Anyway, I think this patch series should land and we can worry about
this metric and the hybrid problems separately.

Thanks,
Ian

> >
> > > Thanks,
> > > Tom
> > > >
> > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > ---
> > > >  .../tests/shell/lib/perf_metric_validation.py   | 12 +++++++++---
> > > >  tools/perf/tests/shell/stat_metrics_values.sh   | 17 +++++++++++------
> > > >  2 files changed, 20 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > > index 0b94216c9c46..dea8ef1977bf 100644
> > > > --- a/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > > +++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
> > > > @@ -35,7 +35,8 @@ class TestError:
> > > >
> > > >
> > > >  class Validator:
> > > > -    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
> > > > +    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
> > > > +                 workload='true', metrics='', cputype='cpu'):
> > > >          self.rulefname = rulefname
> > > >          self.reportfname = reportfname
> > > >          self.rules = None
> > > > @@ -43,6 +44,7 @@ class Validator:
> > > >          self.metrics = self.__set_metrics(metrics)
> > > >          self.skiplist = set()
> > > >          self.tolerance = t
> > > > +        self.cputype = cputype
> > > >
> > > >          self.workloads = [x for x in workload.split(",") if x]
> > > >          self.wlidx = 0  # idx of current workloads
> > > > @@ -377,7 +379,7 @@ class Validator:
> > > >
> > > >      def _run_perf(self, metric, workload: str):
> > > >          tool = 'perf'
> > > > -        command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
> > > > +        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
> > > >          wl = workload.split()
> > > >          command.extend(wl)
> > > >          print(" ".join(command))
> > > > @@ -443,6 +445,8 @@ class Validator:
> > > >                  if 'MetricName' not in m:
> > > >                      print("Warning: no metric name")
> > > >                      continue
> > > > +                if 'Unit' in m and m['Unit'] != self.cputype:
> > > > +                    continue
> > > >                  name = m['MetricName'].lower()
> > > >                  self.metrics.add(name)
> > > >                  if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
> > > > @@ -578,6 +582,8 @@ def main() -> None:
> > > >      parser.add_argument(
> > > >          "-wl", help="Workload to run while data collection", default="true")
> > > >      parser.add_argument("-m", help="Metric list to validate", default="")
> > > > +    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
> > > > +                        default="cpu")
> > > >      args = parser.parse_args()
> > > >      outpath = Path(args.output_dir)
> > > >      reportf = Path.joinpath(outpath, 'perf_report.json')
> > > > @@ -586,7 +592,7 @@ def main() -> None:
> > > >
> > > >      validator = Validator(args.rule, reportf, debug=args.debug,
> > > >                            datafname=datafile, fullrulefname=fullrule, workload=args.wl,
> > > > -                          metrics=args.m)
> > > > +                          metrics=args.m, cputype=args.cputype)
> > > >      ret = validator.test()
> > > >
> > > >      return ret
> > > > diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
> > > > index 279f19c5919a..30566f0b5427 100755
> > > > --- a/tools/perf/tests/shell/stat_metrics_values.sh
> > > > +++ b/tools/perf/tests/shell/stat_metrics_values.sh
> > > > @@ -16,11 +16,16 @@ workload="perf bench futex hash -r 2 -s"
> > > >  # Add -debug, save data file and full rule file
> > > >  echo "Launch python validation script $pythonvalidator"
> > > >  echo "Output will be stored in: $tmpdir"
> > > > -$PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
> > > > -ret=$?
> > > > -rm -rf $tmpdir
> > > > -if [ $ret -ne 0 ]; then
> > > > -     echo "Metric validation return with erros. Please check metrics reported with errors."
> > > > -fi
> > > > +for cputype in /sys/bus/event_source/devices/cpu_*; do
> > > > +     cputype=$(basename "$cputype")
> > > > +     echo "Testing metrics for: $cputype"
> > > > +     $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}" \
> > > > +             -cputype "${cputype}"
> > > > +     ret=$?
> > > > +     rm -rf $tmpdir
> > > > +     if [ $ret -ne 0 ]; then
> > > > +             echo "Metric validation return with errors. Please check metrics reported with errors."
> > > > +     fi
> > > > +done
> > > >  exit $ret
> > > >
> > >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json
  2025-05-12 18:46 [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Ian Rogers
  2025-05-12 18:47 ` [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test Ian Rogers
@ 2025-05-13 20:25 ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 7+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-05-13 20:25 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Weilin Wang, Peter Zijlstra, Ingo Molnar, Namhyung Kim,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, James Clark, linux-perf-users, linux-kernel

On Mon, May 12, 2025 at 11:46:59AM -0700, Ian Rogers wrote:
> The perf stat --cputype option can be used to filter which metrics
> will be applied, for this reason the json metrics have an associated
> PMU. List this PMU name in the perf list output in json mode so that
> tooling may access it.
> 
> An example of the new field is:
> ```
> {
>         "MetricGroup": "Backend",
>         "MetricName": "tma_core_bound",
>         "MetricExpr": "max(0, tma_backend_bound - tma_memory_bound)",
>         "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
>         "ScaleUnit": "100%",
>         "BriefDescription": "This metric represents fraction of slots where ...
>         "PublicDescription": "This metric represents fraction of slots where ...
>         "Unit": "cpu_core"
> },
> ```

Tried testing this but the Intel system I have (14700k) while hybrid
doesn't have this "unit"  field in its JSON files :-)

Anyway, applying the patches.

- Arnaldo
 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-list.c      | 12 ++++++++++--
>  tools/perf/util/metricgroup.c  |  5 ++++-
>  tools/perf/util/print-events.h |  3 ++-
>  3 files changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
> index fed482adb039..e9b595d75df2 100644
> --- a/tools/perf/builtin-list.c
> +++ b/tools/perf/builtin-list.c
> @@ -197,7 +197,8 @@ static void default_print_metric(void *ps,
>  				const char *long_desc,
>  				const char *expr,
>  				const char *threshold,
> -				const char *unit __maybe_unused)
> +				const char *unit __maybe_unused,
> +				const char *pmu_name __maybe_unused)
>  {
>  	struct print_state *print_state = ps;
>  	FILE *fp = print_state->fp;
> @@ -433,7 +434,8 @@ static void json_print_event(void *ps, const char *topic, const char *pmu_name,
>  static void json_print_metric(void *ps __maybe_unused, const char *group,
>  			      const char *name, const char *desc,
>  			      const char *long_desc, const char *expr,
> -			      const char *threshold, const char *unit)
> +			      const char *threshold, const char *unit,
> +			      const char *pmu_name)
>  {
>  	struct json_print_state *print_state = ps;
>  	bool need_sep = false;
> @@ -483,6 +485,12 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
>  				   long_desc);
>  		need_sep = true;
>  	}
> +	if (pmu_name) {
> +		fix_escape_fprintf(fp, &buf, "%s\t\"Unit\": \"%S\"",
> +				   need_sep ? ",\n" : "",
> +				   pmu_name);
> +		need_sep = true;
> +	}
>  	fprintf(fp, "%s}", need_sep ? "\n" : "");
>  	strbuf_release(&buf);
>  }
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 126a631686b0..43d35f956a33 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -396,6 +396,7 @@ struct mep {
>  	const char *metric_expr;
>  	const char *metric_threshold;
>  	const char *metric_unit;
> +	const char *pmu_name;
>  };
>  
>  static int mep_cmp(struct rb_node *rb_node, const void *entry)
> @@ -476,6 +477,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
>  			me->metric_expr = pm->metric_expr;
>  			me->metric_threshold = pm->metric_threshold;
>  			me->metric_unit = pm->unit;
> +			me->pmu_name = pm->pmu;
>  		}
>  	}
>  	free(omg);
> @@ -551,7 +553,8 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat
>  				me->metric_long_desc,
>  				me->metric_expr,
>  				me->metric_threshold,
> -				me->metric_unit);
> +				me->metric_unit,
> +				me->pmu_name);
>  		next = rb_next(node);
>  		rblist__remove_node(&groups, node);
>  	}
> diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
> index 445efa1636c1..8f19c2bea64a 100644
> --- a/tools/perf/util/print-events.h
> +++ b/tools/perf/util/print-events.h
> @@ -25,7 +25,8 @@ struct print_callbacks {
>  			const char *long_desc,
>  			const char *expr,
>  			const char *threshold,
> -			const char *unit);
> +			const char *unit,
> +			const char *pmu_name);
>  	bool (*skip_duplicate_pmus)(void *print_state);
>  };
>  
> -- 
> 2.49.0.1045.g170613ef41-goog

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-05-13 20:25 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-12 18:46 [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Ian Rogers
2025-05-12 18:47 ` [PATCH v1 2/2] perf test: Hybrid improvements for metric value validation test Ian Rogers
2025-05-12 20:30   ` Falcon, Thomas
2025-05-12 21:52     ` Ian Rogers
2025-05-13 17:26       ` Ian Rogers
2025-05-13 19:07         ` Ian Rogers
2025-05-13 20:25 ` [PATCH v1 1/2] perf list: Display the PMU name associated with a perf metric in json Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).