* [PATCH v6 01/13] perf jevents: Add RAPL event metric for AMD zen models
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 02/13] perf jevents: Add idle " Ian Rogers
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add power per second metrics based on RAPL.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 31 +++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 4f728e7aae4a..6fff81cd4db3 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -1,14 +1,37 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-from metric import (JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, LoadEvents,
- MetricGroup)
+from metric import (d_ratio, has_event, Event, JsonEncodeMetric, JsonEncodeMetricGroupDescriptions,
+ LoadEvents, Metric, MetricGroup, Select)
import argparse
import json
+import math
import os
# Global command line arguments.
_args = None
+interval_sec = Event("duration_time")
+
+def Rapl() -> MetricGroup:
+ """Processor socket power consumption estimate.
+
+ Use events from the running average power limit (RAPL) driver.
+ """
+ # Watts = joules/second
+ # Currently only energy-pkg is supported by AMD:
+ # https://lore.kernel.org/lkml/20220105185659.643355-1-eranian@google.com/
+ pkg = Event("power/energy\\-pkg/")
+ cond_pkg = Select(pkg, has_event(pkg), math.nan)
+ scale = 2.3283064365386962890625e-10
+ metrics = [
+ Metric("lpm_cpu_power_pkg", "",
+ d_ratio(cond_pkg * scale, interval_sec), "Watts"),
+ ]
+
+ return MetricGroup("lpm_cpu_power", metrics,
+ description="Processor socket power consumption estimates")
+
+
def main() -> None:
global _args
@@ -31,7 +54,9 @@ def main() -> None:
directory = f"{_args.events_path}/x86/{_args.model}/"
LoadEvents(directory)
- all_metrics = MetricGroup("",[])
+ all_metrics = MetricGroup("", [
+ Rapl(),
+ ])
if _args.metricgroups:
print(JsonEncodeMetricGroupDescriptions(all_metrics))
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 02/13] perf jevents: Add idle metric for AMD zen models
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
2025-09-04 4:40 ` [PATCH v6 01/13] perf jevents: Add RAPL event metric for AMD zen models Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 03/13] perf jevents: Add upc metric for uops per cycle for AMD Ian Rogers
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Compute using the msr PMU the percentage of wallclock cycles where the
CPUs are in a low power state.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 6fff81cd4db3..335e8a7e0537 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -1,7 +1,8 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-from metric import (d_ratio, has_event, Event, JsonEncodeMetric, JsonEncodeMetricGroupDescriptions,
- LoadEvents, Metric, MetricGroup, Select)
+from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
+ JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
+ MetricGroup, Select)
import argparse
import json
import math
@@ -12,6 +13,16 @@ _args = None
interval_sec = Event("duration_time")
+def Idle() -> Metric:
+ cyc = Event("msr/mperf/")
+ tsc = Event("msr/tsc/")
+ low = max(tsc - cyc, 0)
+ return Metric(
+ "lpm_idle",
+ "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)",
+ d_ratio(low, tsc), "100%")
+
+
def Rapl() -> MetricGroup:
"""Processor socket power consumption estimate.
@@ -55,6 +66,7 @@ def main() -> None:
LoadEvents(directory)
all_metrics = MetricGroup("", [
+ Idle(),
Rapl(),
])
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 03/13] perf jevents: Add upc metric for uops per cycle for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
2025-09-04 4:40 ` [PATCH v6 01/13] perf jevents: Add RAPL event metric for AMD zen models Ian Rogers
2025-09-04 4:40 ` [PATCH v6 02/13] perf jevents: Add idle " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 04/13] perf jevents: Add br metric group for branch statistics on AMD Ian Rogers
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
The metric adjusts for whether or not SMT is on.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 335e8a7e0537..f734f1127ff3 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -1,8 +1,8 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
- JsonEncodeMetricGroupDescriptions, LoadEvents, Metric,
- MetricGroup, Select)
+ JsonEncodeMetricGroupDescriptions, Literal, LoadEvents,
+ Metric, MetricGroup, Select)
import argparse
import json
import math
@@ -10,8 +10,17 @@ import os
# Global command line arguments.
_args = None
-
+_zen_model: int = 1
interval_sec = Event("duration_time")
+cycles = Event("cycles")
+# Number of CPU cycles scaled for SMT.
+smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles)
+
+def AmdUpc() -> Metric:
+ ops = Event("ex_ret_ops", "ex_ret_cops")
+ upc = d_ratio(ops, smt_cycles)
+ return Metric("lpm_upc", "Micro-ops retired per core cycle (higher is better)",
+ upc, "uops/cycle")
def Idle() -> Metric:
cyc = Event("msr/mperf/")
@@ -45,6 +54,7 @@ def Rapl() -> MetricGroup:
def main() -> None:
global _args
+ global _zen_model
def dir_path(path: str) -> str:
"""Validate path is a directory for argparse."""
@@ -65,7 +75,10 @@ def main() -> None:
directory = f"{_args.events_path}/x86/{_args.model}/"
LoadEvents(directory)
+ _zen_model = int(_args.model[6:])
+
all_metrics = MetricGroup("", [
+ AmdUpc(),
Idle(),
Rapl(),
])
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 04/13] perf jevents: Add br metric group for branch statistics on AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (2 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 03/13] perf jevents: Add upc metric for uops per cycle for AMD Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 05/13] perf jevents: Add software prefetch (swpf) metric group for AMD Ian Rogers
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
The br metric group for branches itself comprises metric groups for
total, taken, conditional, fused and far metric groups using json
events. The lack of conditional events on anything but zen2 means this
category is lacking on zen1, zen3 and zen4.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 105 +++++++++++++++++++++++++++
1 file changed, 105 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index f734f1127ff3..172f04b38d78 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -7,15 +7,119 @@ import argparse
import json
import math
import os
+from typing import Optional
# Global command line arguments.
_args = None
_zen_model: int = 1
interval_sec = Event("duration_time")
+ins = Event("instructions")
cycles = Event("cycles")
# Number of CPU cycles scaled for SMT.
smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles)
+def AmdBr():
+ def Total() -> MetricGroup:
+ br = Event("ex_ret_brn")
+ br_m_all = Event("ex_ret_brn_misp")
+ br_clr = Event("ex_ret_msprd_brnch_instr_dir_msmtch", "ex_ret_brn_resync")
+
+ br_r = d_ratio(br, interval_sec)
+ ins_r = d_ratio(ins, br)
+ misp_r = d_ratio(br_m_all, br)
+ clr_r = d_ratio(br_clr, interval_sec)
+
+ return MetricGroup("lpm_br_total", [
+ Metric("lpm_br_total_retired",
+ "The number of branch instructions retired per second.", br_r,
+ "insn/s"),
+ Metric(
+ "lpm_br_total_mispred",
+ "The number of branch instructions retired, of any type, that were "
+ "not correctly predicted as a percentage of all branch instrucions.",
+ misp_r, "100%"),
+ Metric("lpm_br_total_insn_between_branches",
+ "The number of instructions divided by the number of branches.",
+ ins_r, "insn"),
+ Metric("lpm_br_total_insn_fe_resteers",
+ "The number of resync branches per second.", clr_r, "req/s")
+ ])
+
+ def Taken() -> MetricGroup:
+ br = Event("ex_ret_brn_tkn")
+ br_m_tk = Event("ex_ret_brn_tkn_misp")
+ br_r = d_ratio(br, interval_sec)
+ ins_r = d_ratio(ins, br)
+ misp_r = d_ratio(br_m_tk, br)
+ return MetricGroup("lpm_br_taken", [
+ Metric("lpm_br_taken_retired",
+ "The number of taken branches that were retired per second.",
+ br_r, "insn/s"),
+ Metric(
+ "lpm_br_taken_mispred",
+ "The number of retired taken branch instructions that were "
+ "mispredicted as a percentage of all taken branches.", misp_r,
+ "100%"),
+ Metric(
+ "lpm_br_taken_insn_between_branches",
+ "The number of instructions divided by the number of taken branches.",
+ ins_r, "insn"),
+ ])
+
+ def Conditional() -> Optional[MetricGroup]:
+ global _zen_model
+ br = Event("ex_ret_cond")
+ br_r = d_ratio(br, interval_sec)
+ ins_r = d_ratio(ins, br)
+
+ metrics = [
+ Metric("lpm_br_cond_retired", "Retired conditional branch instructions.",
+ br_r, "insn/s"),
+ Metric("lpm_br_cond_insn_between_branches",
+ "The number of instructions divided by the number of conditional "
+ "branches.", ins_r, "insn"),
+ ]
+ if _zen_model == 2:
+ br_m_cond = Event("ex_ret_cond_misp")
+ misp_r = d_ratio(br_m_cond, br)
+ metrics += [
+ Metric("lpm_br_cond_mispred",
+ "Retired conditional branch instructions mispredicted as a "
+ "percentage of all conditional branches.", misp_r, "100%"),
+ ]
+
+ return MetricGroup("lpm_br_cond", metrics)
+
+ def Fused() -> MetricGroup:
+ br = Event("ex_ret_fused_instr", "ex_ret_fus_brnch_inst")
+ br_r = d_ratio(br, interval_sec)
+ ins_r = d_ratio(ins, br)
+ return MetricGroup("lpm_br_cond", [
+ Metric("lpm_br_fused_retired",
+ "Retired fused branch instructions per second.", br_r, "insn/s"),
+ Metric(
+ "lpm_br_fused_insn_between_branches",
+ "The number of instructions divided by the number of fused "
+ "branches.", ins_r, "insn"),
+ ])
+
+ def Far() -> MetricGroup:
+ br = Event("ex_ret_brn_far")
+ br_r = d_ratio(br, interval_sec)
+ ins_r = d_ratio(ins, br)
+ return MetricGroup("lpm_br_far", [
+ Metric("lpm_br_far_retired", "Retired far control transfers per second.",
+ br_r, "insn/s"),
+ Metric(
+ "lpm_br_far_insn_between_branches",
+ "The number of instructions divided by the number of far branches.",
+ ins_r, "insn"),
+ ])
+
+ return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Fused(), Far()],
+ description="breakdown of retired branch instructions")
+
+
def AmdUpc() -> Metric:
ops = Event("ex_ret_ops", "ex_ret_cops")
upc = d_ratio(ops, smt_cycles)
@@ -78,6 +182,7 @@ def main() -> None:
_zen_model = int(_args.model[6:])
all_metrics = MetricGroup("", [
+ AmdBr(),
AmdUpc(),
Idle(),
Rapl(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 05/13] perf jevents: Add software prefetch (swpf) metric group for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (3 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 04/13] perf jevents: Add br metric group for branch statistics on AMD Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 06/13] perf jevents: Add hardware prefetch (hwpf) " Ian Rogers
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add metrics that give the utility of software prefetches on zen2, zen3
and zen4.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 96 ++++++++++++++++++++++++++++
1 file changed, 96 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 172f04b38d78..acbb4e962814 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,101 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdSwpf() -> Optional[MetricGroup]:
+ """Returns a MetricGroup representing AMD software prefetch metrics."""
+ global _zen_model
+ if _zen_model <= 1:
+ return None
+
+ swp_ld = Event("ls_dispatch.ld_dispatch")
+ swp_t0 = Event("ls_pref_instr_disp.prefetch")
+ swp_w = Event("ls_pref_instr_disp.prefetch_w") # Missing on Zen1
+ swp_nt = Event("ls_pref_instr_disp.prefetch_nta")
+ swp_mab = Event("ls_inef_sw_pref.mab_mch_cnt")
+ swp_l2 = Event("ls_sw_pf_dc_fills.local_l2",
+ "ls_sw_pf_dc_fills.lcl_l2",
+ "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2")
+ swp_lc = Event("ls_sw_pf_dc_fills.local_ccx",
+ "ls_sw_pf_dc_fills.int_cache",
+ "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache")
+ swp_lm = Event("ls_sw_pf_dc_fills.dram_io_near",
+ "ls_sw_pf_dc_fills.mem_io_local",
+ "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram")
+ swp_rc = Event("ls_sw_pf_dc_fills.far_cache",
+ "ls_sw_pf_dc_fills.ext_cache_remote",
+ "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache")
+ swp_rm = Event("ls_sw_pf_dc_fills.dram_io_far",
+ "ls_sw_pf_dc_fills.mem_io_remote",
+ "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram")
+
+ # All the swpf that were satisfied beyond L1D are good.
+ all_pf = swp_t0 + swp_w + swp_nt
+ good_pf = swp_l2 + swp_lc + swp_lm + swp_rc + swp_rm
+ bad_pf = max(all_pf - good_pf, 0)
+
+ loc_pf = swp_l2 + swp_lc + swp_lm
+ rem_pf = swp_rc + swp_rm
+
+ req_pend = max(0, bad_pf - swp_mab)
+
+ r1 = d_ratio(ins, all_pf)
+ r2 = d_ratio(swp_ld, all_pf)
+ r3 = d_ratio(swp_t0, interval_sec)
+ r4 = d_ratio(swp_w, interval_sec)
+ r5 = d_ratio(swp_nt, interval_sec)
+ overview = MetricGroup("lpm_swpf_overview", [
+ Metric("lpm_swpf_ov_insn_bt_swpf", "Insn between SWPF", r1, "insns"),
+ Metric("lpm_swpf_ov_loads_bt_swpf", "Loads between SWPF", r2, "loads"),
+ Metric("lpm_swpf_ov_rate_prefetch_t0_t1_t2", "Rate prefetch TO_T1_T2", r3,
+ "insns/sec"),
+ Metric("lpm_swpf_ov_rate_prefetch_w", "Rate prefetch W", r4, "insns/sec"),
+ Metric("lpm_swpf_ov_rate_preftech_nta", "Rate prefetch NTA", r5, "insns/sec"),
+ ])
+
+ r1 = d_ratio(swp_mab, all_pf)
+ r2 = d_ratio(req_pend, all_pf)
+ usefulness_bad = MetricGroup("lpm_swpf_usefulness_bad", [
+ Metric("lpm_swpf_use_bad_hit_l1", "Usefulness bad hit L1", r1, "100%"),
+ Metric("lpm_swpf_use_bad_req_pend", "Usefulness bad req pending", r2, "100%"),
+ ])
+
+ r1 = d_ratio(good_pf, all_pf)
+ usefulness_good = MetricGroup("lpm_swpf_usefulness_good", [
+ Metric("lpm_swpf_use_good_other_src", "Usefulness good other src", r1,
+ "100%"),
+ ])
+
+ usefulness = MetricGroup("lpm_swpf_usefulness", [
+ usefulness_bad,
+ usefulness_good,
+ ])
+
+ r1 = d_ratio(swp_l2, good_pf)
+ r2 = d_ratio(swp_lc, good_pf)
+ r3 = d_ratio(swp_lm, good_pf)
+ data_src_local = MetricGroup("lpm_swpf_data_src_local", [
+ Metric("lpm_swpf_data_src_local_l2", "Data source local l2", r1, "100%"),
+ Metric("lpm_swpf_data_src_local_ccx_l3_loc_ccx",
+ "Data source local ccx l3 loc ccx", r2, "100%"),
+ Metric("lpm_swpf_data_src_local_memory_or_io",
+ "Data source local memory or IO", r3, "100%"),
+ ])
+
+ r1 = d_ratio(swp_rc, good_pf)
+ r2 = d_ratio(swp_rm, good_pf)
+ data_src_remote = MetricGroup("lpm_swpf_data_src_remote", [
+ Metric("lpm_swpf_data_src_remote_cache", "Data source remote cache", r1,
+ "100%"),
+ Metric("lpm_swpf_data_src_remote_memory_or_io",
+ "Data source remote memory or IO", r2, "100%"),
+ ])
+
+ data_src = MetricGroup("lpm_swpf_data_src", [data_src_local, data_src_remote])
+
+ return MetricGroup("lpm_swpf", [overview, usefulness, data_src],
+ description="Software prefetch breakdown (CCX L3 = L3 of current thread, Loc CCX = CCX cache on some socket)")
+
+
def AmdUpc() -> Metric:
ops = Event("ex_ret_ops", "ex_ret_cops")
upc = d_ratio(ops, smt_cycles)
@@ -183,6 +278,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdSwpf(),
AmdUpc(),
Idle(),
Rapl(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 06/13] perf jevents: Add hardware prefetch (hwpf) metric group for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (4 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 05/13] perf jevents: Add software prefetch (swpf) metric group for AMD Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 07/13] perf jevents: Add itlb " Ian Rogers
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add metrics that give the utility of hardware prefetches on zen2, zen3
and zen4.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 62 ++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index acbb4e962814..cecc0a706558 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,67 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdHwpf():
+ """Returns a MetricGroup representing AMD hardware prefetch metrics."""
+ global _zen_model
+ if _zen_model <= 1:
+ return None
+
+ hwp_ld = Event("ls_dispatch.ld_dispatch")
+ hwp_l2 = Event("ls_hw_pf_dc_fills.local_l2",
+ "ls_hw_pf_dc_fills.lcl_l2",
+ "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2")
+ hwp_lc = Event("ls_hw_pf_dc_fills.local_ccx",
+ "ls_hw_pf_dc_fills.int_cache",
+ "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache")
+ hwp_lm = Event("ls_hw_pf_dc_fills.dram_io_near",
+ "ls_hw_pf_dc_fills.mem_io_local",
+ "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram")
+ hwp_rc = Event("ls_hw_pf_dc_fills.far_cache",
+ "ls_hw_pf_dc_fills.ext_cache_remote",
+ "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache")
+ hwp_rm = Event("ls_hw_pf_dc_fills.dram_io_far",
+ "ls_hw_pf_dc_fills.mem_io_remote",
+ "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram")
+
+ loc_pf = hwp_l2 + hwp_lc + hwp_lm
+ rem_pf = hwp_rc + hwp_rm
+ all_pf = loc_pf + rem_pf
+
+ r1 = d_ratio(ins, all_pf)
+ r2 = d_ratio(hwp_ld, all_pf)
+ r3 = d_ratio(all_pf, interval_sec)
+
+ overview = MetricGroup("lpm_hwpf_overview", [
+ Metric("lpm_hwpf_ov_insn_bt_hwpf", "Insn between HWPF", r1, "insns"),
+ Metric("lpm_hwpf_ov_loads_bt_hwpf", "Loads between HWPF", r2, "loads"),
+ Metric("lpm_hwpf_ov_rate", "HWPF per second", r3, "hwpf/s"),
+ ])
+ r1 = d_ratio(hwp_l2, all_pf)
+ r2 = d_ratio(hwp_lc, all_pf)
+ r3 = d_ratio(hwp_lm, all_pf)
+ data_src_local = MetricGroup("lpm_hwpf_data_src_local", [
+ Metric("lpm_hwpf_data_src_local_l2", "Data source local l2", r1, "100%"),
+ Metric("lpm_hwpf_data_src_local_ccx_l3_loc_ccx",
+ "Data source local ccx l3 loc ccx", r2, "100%"),
+ Metric("lpm_hwpf_data_src_local_memory_or_io",
+ "Data source local memory or IO", r3, "100%"),
+ ])
+
+ r1 = d_ratio(hwp_rc, all_pf)
+ r2 = d_ratio(hwp_rm, all_pf)
+ data_src_remote = MetricGroup("lpm_hwpf_data_src_remote", [
+ Metric("lpm_hwpf_data_src_remote_cache", "Data source remote cache", r1,
+ "100%"),
+ Metric("lpm_hwpf_data_src_remote_memory_or_io",
+ "Data source remote memory or IO", r2, "100%"),
+ ])
+
+ data_src = MetricGroup("lpm_hwpf_data_src", [data_src_local, data_src_remote])
+ return MetricGroup("lpm_hwpf", [overview, data_src],
+ description="Hardware prefetch breakdown (CCX L3 = L3 of current thread, Loc CCX = CCX cache on some socket)")
+
+
def AmdSwpf() -> Optional[MetricGroup]:
"""Returns a MetricGroup representing AMD software prefetch metrics."""
global _zen_model
@@ -278,6 +339,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdHwpf(),
AmdSwpf(),
AmdUpc(),
Idle(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 07/13] perf jevents: Add itlb metric group for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (5 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 06/13] perf jevents: Add hardware prefetch (hwpf) " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 08/13] perf jevents: Add dtlb " Ian Rogers
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add metrics that give an overview and details of the l1 itlb (zen1,
zen2, zen3) and l2 itlb (all zens).
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 48 ++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index cecc0a706558..40128903eb7d 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,53 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdItlb():
+ global _zen_model
+ l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
+ l2m = Event("l2_itlb_misses")
+ l2r = l2h + l2m
+
+ itlb_l1_mg = None
+ l1m = l2r
+ if _zen_model <= 3:
+ l1r = Event("ic_fw32")
+ l1h = max(l1r - l1m, 0)
+ itlb_l1_mg = MetricGroup("lpm_itlb_l1", [
+ Metric("lpm_itlb_l1_hits",
+ "L1 ITLB hits as a perecentage of L1 ITLB accesses.",
+ d_ratio(l1h, l1h + l1m), "100%"),
+ Metric("lpm_itlb_l1_miss",
+ "L1 ITLB misses as a perecentage of L1 ITLB accesses.",
+ d_ratio(l1m, l1h + l1m), "100%"),
+ Metric("lpm_itlb_l1_reqs",
+ "The number of 32B fetch windows transferred from IC pipe to DE "
+ "instruction decoder per second.", d_ratio(l1r, interval_sec),
+ "windows/sec"),
+ ])
+
+ return MetricGroup("lpm_itlb", [
+ MetricGroup("lpm_itlb_ov", [
+ Metric("lpm_itlb_ov_insn_bt_l1_miss",
+ "Number of instructions between l1 misses", d_ratio(
+ ins, l1m), "insns"),
+ Metric("lpm_itlb_ov_insn_bt_l2_miss",
+ "Number of instructions between l2 misses", d_ratio(
+ ins, l2m), "insns"),
+ ]),
+ itlb_l1_mg,
+ MetricGroup("lpm_itlb_l2", [
+ Metric("lpm_itlb_l2_hits",
+ "L2 ITLB hits as a percentage of all L2 ITLB accesses.",
+ d_ratio(l2h, l2r), "100%"),
+ Metric("lpm_itlb_l2_miss",
+ "L2 ITLB misses as a percentage of all L2 ITLB accesses.",
+ d_ratio(l2m, l2r), "100%"),
+ Metric("lpm_itlb_l2_reqs", "ITLB accesses per second.",
+ d_ratio(l2r, interval_sec), "accesses/sec"),
+ ]),
+ ], description="Instruction TLB breakdown")
+
+
def AmdHwpf():
"""Returns a MetricGroup representing AMD hardware prefetch metrics."""
global _zen_model
@@ -339,6 +386,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdItlb(),
AmdHwpf(),
AmdSwpf(),
AmdUpc(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 08/13] perf jevents: Add dtlb metric group for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (6 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 07/13] perf jevents: Add itlb " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 09/13] perf jevents: Add uncore l3 " Ian Rogers
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add metrics that give an overview and details of the dtlb (zen1, zen2,
zen3).
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 109 +++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 40128903eb7d..c3f0f1439c01 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,114 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdDtlb() -> Optional[MetricGroup]:
+ global _zen_model
+ if _zen_model >= 4:
+ return None
+
+ d_dat = Event("ls_dc_accesses") if _zen_model <= 3 else None
+ d_h4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit")
+ d_hcoal = Event("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit") if _zen_model >= 2 else 0
+ d_h2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit")
+ d_h1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit")
+
+ d_m4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss")
+ d_mcoal = Event("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss") if _zen_model >= 2 else 0
+ d_m2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss")
+ d_m1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss")
+
+ d_w0 = Event("ls_tablewalker.dc_type0") if _zen_model <= 3 else None
+ d_w1 = Event("ls_tablewalker.dc_type1") if _zen_model <= 3 else None
+ walks = d_w0 + d_w1
+ walks_r = d_ratio(walks, interval_sec)
+ ins_w = d_ratio(ins, walks)
+ l1 = d_dat
+ l1_r = d_ratio(l1, interval_sec)
+ l2_hits = d_h4k + d_hcoal + d_h2m + d_h1g
+ l2_miss = d_m4k + d_mcoal + d_m2m + d_m1g
+ l2_r = d_ratio(l2_hits + l2_miss, interval_sec)
+ l1_miss = l2_hits + l2_miss + walks
+ l1_hits = max(l1 - l1_miss, 0)
+ ins_l = d_ratio(ins, l1_miss)
+
+ return MetricGroup("lpm_dtlb", [
+ MetricGroup("lpm_dtlb_ov", [
+ Metric("lpm_dtlb_ov_insn_bt_l1_miss",
+ "DTLB overview: instructions between l1 misses.", ins_l,
+ "insns"),
+ Metric("lpm_dtlb_ov_insn_bt_walks",
+ "DTLB overview: instructions between dtlb page table walks.",
+ ins_w, "insns"),
+ ]),
+ MetricGroup("lpm_dtlb_l1", [
+ Metric("lpm_dtlb_l1_hits",
+ "DTLB L1 hits as percentage of all DTLB L1 accesses.",
+ d_ratio(l1_hits, l1), "100%"),
+ Metric("lpm_dtlb_l1_miss",
+ "DTLB L1 misses as percentage of all DTLB L1 accesses.",
+ d_ratio(l1_miss, l1), "100%"),
+ Metric("lpm_dtlb_l1_reqs", "DTLB L1 accesses per second.", l1_r,
+ "insns/s"),
+ ]),
+ MetricGroup("lpm_dtlb_l2", [
+ Metric("lpm_dtlb_l2_hits",
+ "DTLB L2 hits as percentage of all DTLB L2 accesses.",
+ d_ratio(l2_hits, l2_hits + l2_miss), "100%"),
+ Metric("lpm_dtlb_l2_miss",
+ "DTLB L2 misses as percentage of all DTLB L2 accesses.",
+ d_ratio(l2_miss, l2_hits + l2_miss), "100%"),
+ Metric("lpm_dtlb_l2_reqs", "DTLB L2 accesses per second.", l2_r,
+ "insns/s"),
+ MetricGroup("lpm_dtlb_l2_4kb", [
+ Metric(
+ "lpm_dtlb_l2_4kb_hits",
+ "DTLB L2 4kb page size hits as percentage of all DTLB L2 4kb "
+ "accesses.", d_ratio(d_h4k, d_h4k + d_m4k), "100%"),
+ Metric(
+ "lpm_dtlb_l2_4kb_miss",
+ "DTLB L2 4kb page size misses as percentage of all DTLB L2 4kb"
+ "accesses.", d_ratio(d_m4k, d_h4k + d_m4k), "100%")
+ ]),
+ MetricGroup("lpm_dtlb_l2_coalesced", [
+ Metric(
+ "lpm_dtlb_l2_coal_hits",
+ "DTLB L2 coalesced page (16kb) hits as percentage of all DTLB "
+ "L2 coalesced accesses.", d_ratio(d_hcoal,
+ d_hcoal + d_mcoal), "100%"),
+ Metric(
+ "lpm_dtlb_l2_coal_miss",
+ "DTLB L2 coalesced page (16kb) misses as percentage of all "
+ "DTLB L2 coalesced accesses.",
+ d_ratio(d_mcoal, d_hcoal + d_mcoal), "100%")
+ ]),
+ MetricGroup("lpm_dtlb_l2_2mb", [
+ Metric(
+ "lpm_dtlb_l2_2mb_hits",
+ "DTLB L2 2mb page size hits as percentage of all DTLB L2 2mb "
+ "accesses.", d_ratio(d_h2m, d_h2m + d_m2m), "100%"),
+ Metric(
+ "lpm_dtlb_l2_2mb_miss",
+ "DTLB L2 2mb page size misses as percentage of all DTLB L2 "
+ "accesses.", d_ratio(d_m2m, d_h2m + d_m2m), "100%")
+ ]),
+ MetricGroup("lpm_dtlb_l2_1g", [
+ Metric(
+ "lpm_dtlb_l2_1g_hits",
+ "DTLB L2 1gb page size hits as percentage of all DTLB L2 1gb "
+ "accesses.", d_ratio(d_h1g, d_h1g + d_m1g), "100%"),
+ Metric(
+ "lpm_dtlb_l2_1g_miss",
+ "DTLB L2 1gb page size misses as percentage of all DTLB L2 "
+ "1gb accesses.", d_ratio(d_m1g, d_h1g + d_m1g), "100%")
+ ]),
+ ]),
+ MetricGroup("lpm_dtlb_walks", [
+ Metric("lpm_dtlb_walks_reqs", "DTLB page table walks per second.",
+ walks_r, "walks/s"),
+ ]),
+ ], description="Data TLB metrics")
+
+
def AmdItlb():
global _zen_model
l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
@@ -386,6 +494,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdDtlb(),
AmdItlb(),
AmdHwpf(),
AmdSwpf(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 09/13] perf jevents: Add uncore l3 metric group for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (7 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 08/13] perf jevents: Add dtlb " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 10/13] perf jevents: Add load store breakdown metrics ldst " Ian Rogers
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Metrics use the amd_l3 PMU for access/miss/hit information.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index c3f0f1439c01..a2d1b642b62e 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -466,6 +466,22 @@ def Rapl() -> MetricGroup:
return MetricGroup("lpm_cpu_power", metrics,
description="Processor socket power consumption estimates")
+def UncoreL3():
+ acc = Event("l3_lookup_state.all_coherent_accesses_to_l3",
+ "l3_lookup_state.all_l3_req_typs")
+ miss = Event("l3_lookup_state.l3_miss",
+ "l3_comb_clstr_state.request_miss")
+ acc = max(acc, miss)
+ hits = acc - miss
+
+ return MetricGroup("lpm_l3", [
+ Metric("lpm_l3_accesses", "L3 victim cache accesses",
+ d_ratio(acc, interval_sec), "accesses/sec"),
+ Metric("lpm_l3_hits", "L3 victim cache hit rate", d_ratio(hits, acc), "100%"),
+ Metric("lpm_l3_miss", "L3 victim cache miss rate", d_ratio(miss, acc),
+ "100%"),
+ ], description="L3 cache breakdown per CCX")
+
def main() -> None:
global _args
@@ -501,6 +517,7 @@ def main() -> None:
AmdUpc(),
Idle(),
Rapl(),
+ UncoreL3(),
])
if _args.metricgroups:
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 10/13] perf jevents: Add load store breakdown metrics ldst for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (8 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 09/13] perf jevents: Add uncore l3 " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 11/13] perf jevents: Add ILP metrics " Ian Rogers
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Give breakdown of number of instructions. Use the counter mask (cmask)
to show the number of cycles taken to retire the instructions.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 75 ++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index a2d1b642b62e..20dcab5b856d 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -275,6 +275,80 @@ def AmdItlb():
], description="Instruction TLB breakdown")
+def AmdLdSt() -> MetricGroup:
+ ldst_ld = Event("ls_dispatch.ld_dispatch")
+ ldst_st = Event("ls_dispatch.store_dispatch")
+ ldst_ldc1 = Event(f"{ldst_ld}/cmask=1/")
+ ldst_stc1 = Event(f"{ldst_st}/cmask=1/")
+ ldst_ldc2 = Event(f"{ldst_ld}/cmask=2/")
+ ldst_stc2 = Event(f"{ldst_st}/cmask=2/")
+ ldst_ldc3 = Event(f"{ldst_ld}/cmask=3/")
+ ldst_stc3 = Event(f"{ldst_st}/cmask=3/")
+ ldst_cyc = Event("ls_not_halted_cyc")
+
+ ld_rate = d_ratio(ldst_ld, interval_sec)
+ st_rate = d_ratio(ldst_st, interval_sec)
+
+ ld_v1 = max(ldst_ldc1 - ldst_ldc2, 0)
+ ld_v2 = max(ldst_ldc2 - ldst_ldc3, 0)
+ ld_v3 = ldst_ldc3
+
+ st_v1 = max(ldst_stc1 - ldst_stc2, 0)
+ st_v2 = max(ldst_stc2 - ldst_stc3, 0)
+ st_v3 = ldst_stc3
+
+ return MetricGroup("lpm_ldst", [
+ MetricGroup("lpm_ldst_total", [
+ Metric("lpm_ldst_total_ld", "Number of loads dispatched per second.",
+ ld_rate, "insns/sec"),
+ Metric("lpm_ldst_total_st", "Number of stores dispatched per second.",
+ st_rate, "insns/sec"),
+ ]),
+ MetricGroup("lpm_ldst_percent_insn", [
+ Metric("lpm_ldst_percent_insn_ld",
+ "Load instructions as a percentage of all instructions.",
+ d_ratio(ldst_ld, ins), "100%"),
+ Metric("lpm_ldst_percent_insn_st",
+ "Store instructions as a percentage of all instructions.",
+ d_ratio(ldst_st, ins), "100%"),
+ ]),
+ MetricGroup("lpm_ldst_ret_loads_per_cycle", [
+ Metric(
+ "lpm_ldst_ret_loads_per_cycle_1",
+ "Load instructions retiring in 1 cycle as a percentage of all "
+ "unhalted cycles.", d_ratio(ld_v1, ldst_cyc), "100%"),
+ Metric(
+ "lpm_ldst_ret_loads_per_cycle_2",
+ "Load instructions retiring in 2 cycles as a percentage of all "
+ "unhalted cycles.", d_ratio(ld_v2, ldst_cyc), "100%"),
+ Metric(
+ "lpm_ldst_ret_loads_per_cycle_3",
+ "Load instructions retiring in 3 or more cycles as a percentage"
+ "of all unhalted cycles.", d_ratio(ld_v3, ldst_cyc), "100%"),
+ ]),
+ MetricGroup("lpm_ldst_ret_stores_per_cycle", [
+ Metric(
+ "lpm_ldst_ret_stores_per_cycle_1",
+ "Store instructions retiring in 1 cycle as a percentage of all "
+ "unhalted cycles.", d_ratio(st_v1, ldst_cyc), "100%"),
+ Metric(
+ "lpm_ldst_ret_stores_per_cycle_2",
+ "Store instructions retiring in 2 cycles as a percentage of all "
+ "unhalted cycles.", d_ratio(st_v2, ldst_cyc), "100%"),
+ Metric(
+ "lpm_ldst_ret_stores_per_cycle_3",
+ "Store instructions retiring in 3 or more cycles as a percentage"
+ "of all unhalted cycles.", d_ratio(st_v3, ldst_cyc), "100%"),
+ ]),
+ MetricGroup("lpm_ldst_insn_bt", [
+ Metric("lpm_ldst_insn_bt_ld", "Number of instructions between loads.",
+ d_ratio(ins, ldst_ld), "insns"),
+ Metric("lpm_ldst_insn_bt_st", "Number of instructions between stores.",
+ d_ratio(ins, ldst_st), "insns"),
+ ])
+ ], description="Breakdown of load/store instructions")
+
+
def AmdHwpf():
"""Returns a MetricGroup representing AMD hardware prefetch metrics."""
global _zen_model
@@ -512,6 +586,7 @@ def main() -> None:
AmdBr(),
AmdDtlb(),
AmdItlb(),
+ AmdLdSt(),
AmdHwpf(),
AmdSwpf(),
AmdUpc(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 11/13] perf jevents: Add ILP metrics for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (9 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 10/13] perf jevents: Add load store breakdown metrics ldst " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 12/13] perf jevents: Add context switch " Ian Rogers
2025-09-04 4:40 ` [PATCH v6 13/13] perf jevents: Add uop cache hit/miss rates " Ian Rogers
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Use the counter mask (cmask) to see how many cycles an instruction
takes to retire. Present as a set of ILP metrics.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 36 ++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 20dcab5b856d..6f1259796c8d 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,41 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdIlp() -> MetricGroup:
+ tsc = Event("msr/tsc/")
+ c0 = Event("msr/mperf/")
+ low = tsc - c0
+ inst_ret = Event("ex_ret_instr")
+ inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)]
+ ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), cycles) for x in range(0, 4)]
+ ilp.append(d_ratio(inst_ret_c[4], cycles))
+ ilp0 = 1
+ for x in ilp:
+ ilp0 -= x
+ return MetricGroup("lpm_ilp", [
+ Metric("lpm_ilp_idle", "Lower power cycles as a percentage of all cycles",
+ d_ratio(low, tsc), "100%"),
+ Metric("lpm_ilp_inst_ret_0",
+ "Instructions retired in 0 cycles as a percentage of all cycles",
+ ilp0, "100%"),
+ Metric("lpm_ilp_inst_ret_1",
+ "Instructions retired in 1 cycles as a percentage of all cycles",
+ ilp[0], "100%"),
+ Metric("lpm_ilp_inst_ret_2",
+ "Instructions retired in 2 cycles as a percentage of all cycles",
+ ilp[1], "100%"),
+ Metric("lpm_ilp_inst_ret_3",
+ "Instructions retired in 3 cycles as a percentage of all cycles",
+ ilp[2], "100%"),
+ Metric("lpm_ilp_inst_ret_4",
+ "Instructions retired in 4 cycles as a percentage of all cycles",
+ ilp[3], "100%"),
+ Metric("lpm_ilp_inst_ret_5",
+ "Instructions retired in 5 or more cycles as a percentage of all cycles",
+ ilp[4], "100%"),
+ ])
+
+
def AmdDtlb() -> Optional[MetricGroup]:
global _zen_model
if _zen_model >= 4:
@@ -584,6 +619,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdIlp(),
AmdDtlb(),
AmdItlb(),
AmdLdSt(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 12/13] perf jevents: Add context switch metrics for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (10 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 11/13] perf jevents: Add ILP metrics " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
2025-09-04 4:40 ` [PATCH v6 13/13] perf jevents: Add uop cache hit/miss rates " Ian Rogers
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Metrics break down context switches for different kinds of
instruction.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 32 ++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index 6f1259796c8d..a782e41dedca 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -120,6 +120,37 @@ def AmdBr():
description="breakdown of retired branch instructions")
+def AmdCtxSw() -> MetricGroup:
+ cs = Event("context\\-switches")
+ metrics = [
+ Metric("lpm_cs_rate", "Context switches per second", d_ratio(cs, interval_sec), "ctxsw/s")
+ ]
+
+ ev = Event("instructions")
+ metrics.append(Metric("lpm_cs_instr", "Instructions per context switch",
+ d_ratio(ev, cs), "instr/cs"))
+
+ ev = Event("cycles")
+ metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch",
+ d_ratio(ev, cs), "cycles/cs"))
+
+ ev = Event("ls_dispatch.ld_dispatch")
+ metrics.append(Metric("lpm_cs_loads", "Loads per context switch",
+ d_ratio(ev, cs), "loads/cs"))
+
+ ev = Event("ls_dispatch.store_dispatch")
+ metrics.append(Metric("lpm_cs_stores", "Stores per context switch",
+ d_ratio(ev, cs), "stores/cs"))
+
+ ev = Event("ex_ret_brn_tkn")
+ metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch",
+ d_ratio(ev, cs), "br_taken/cs"))
+
+ return MetricGroup("lpm_cs", metrics,
+ description = ("Number of context switches per second, instructions "
+ "retired & core cycles between context switches"))
+
+
def AmdIlp() -> MetricGroup:
tsc = Event("msr/tsc/")
c0 = Event("msr/mperf/")
@@ -619,6 +650,7 @@ def main() -> None:
all_metrics = MetricGroup("", [
AmdBr(),
+ AmdCtxSw(),
AmdIlp(),
AmdDtlb(),
AmdItlb(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v6 13/13] perf jevents: Add uop cache hit/miss rates for AMD
2025-09-04 4:40 [PATCH v6 00/13] Python generated AMD Zen metrics Ian Rogers
` (11 preceding siblings ...)
2025-09-04 4:40 ` [PATCH v6 12/13] perf jevents: Add context switch " Ian Rogers
@ 2025-09-04 4:40 ` Ian Rogers
12 siblings, 0 replies; 14+ messages in thread
From: Ian Rogers @ 2025-09-04 4:40 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Kan Liang, James Clark, Xu Yang,
linux-kernel, linux-perf-users, John Garry, Jing Zhang,
Sandipan Das, Benjamin Gray
Add metrics giving ratio of uop cache hits to misses.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/pmu-events/amd_metrics.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
index a782e41dedca..d971fbf40318 100755
--- a/tools/perf/pmu-events/amd_metrics.py
+++ b/tools/perf/pmu-events/amd_metrics.py
@@ -571,6 +571,23 @@ def AmdSwpf() -> Optional[MetricGroup]:
description="Software prefetch breakdown (CCX L3 = L3 of current thread, Loc CCX = CCX cache on some socket)")
+def AmdUopCache() -> Optional[MetricGroup]:
+ try:
+ op_cache_hit = Event("op_cache_hit_miss.op_cache_hit")
+ op_cache_miss = Event("op_cache_hit_miss.op_cache_miss")
+ except:
+ return None
+ op_cache_total = op_cache_hit + op_cache_miss
+ return MetricGroup("lpm_uop_cache", [
+ Metric("lpm_uop_cache_hit_ratio", "Uop cache full or partial hits rate",
+ d_ratio(op_cache_hit, op_cache_total),
+ "100%"),
+ Metric("lpm_uop_cache_miss_ratio", "Uop cache misses rate",
+ d_ratio(op_cache_miss, op_cache_total),
+ "100%"),
+ ], description="Micro-op (uop) hit and miss rates.")
+
+
def AmdUpc() -> Metric:
ops = Event("ex_ret_ops", "ex_ret_cops")
upc = d_ratio(ops, smt_cycles)
@@ -657,6 +674,7 @@ def main() -> None:
AmdLdSt(),
AmdHwpf(),
AmdSwpf(),
+ AmdUopCache(),
AmdUpc(),
Idle(),
Rapl(),
--
2.51.0.338.gd7d06c2dae-goog
^ permalink raw reply related [flat|nested] 14+ messages in thread