* [PATCH v2 01/25] perf arm_spe: Fix memset subclass in operation
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 02/25] perf arm_spe: Unify operation naming Leo Yan
` (23 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
The operation subclass is extracted from bits [7..1] of the payload.
Since bit [0] is not parsed, there is no chance to match the memset type
(0x25). As a result, the memset payload is never parsed successfully.
Instead of extracting a unified bit field, change to extract the
specific bits for each operation subclass.
Fixes: 34fb60400e32 ("perf arm-spe: Add raw decoding for SPEv1.3 MTE and MOPS load/store")
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
.../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 25 ++++++----------------
.../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 15 ++++++-------
2 files changed, 14 insertions(+), 26 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 80561630253dd5c46f7e99b24fc13b99f346459f..1a1ffe50ee73ab4400fd1163d0e84e54f4d8ab0b 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -371,31 +371,20 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
}
- switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
- case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+ if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMSET:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
- break;
- default:
- break;
- }
if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
/* SVE effective vector length */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index d00c2481712dcc457eab2f5e9848ffc3150e6236..75e355fe3438cc07704cb61a66ca162bd0904042 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -125,14 +125,13 @@ enum arm_spe_events {
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
-#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
-#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
-#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
-#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10
-#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30
-#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30)
+#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25)
#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 02/25] perf arm_spe: Unify operation naming
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
2025-10-17 10:04 ` [PATCH v2 01/25] perf arm_spe: Fix memset subclass in operation Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 03/25] perf arm_spe: Decode GCS operation Leo Yan
` (22 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Rename extended subclass and SVE/SME register access subclass, so that
the naming can be consistent cross all sub classes.
Add an log "SVE-SME-REG" for the SVE/SME register access, this is easier
for parsing.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 +-
.../perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 21 ++++++++++-----------
.../perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++--
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 96eb7cced6fd1574f5d823e4c67b9051dcf183ed..b0fb896abad48de93aa1ed560029f9bc9ae969e9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -200,7 +200,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ST;
else
decoder->record.op |= ARM_SPE_OP_LD;
- if (SPE_OP_PKT_IS_LDST_SVE(payload))
+ if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
decoder->record.op |= ARM_SPE_OP_SVE_LDST;
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 1a1ffe50ee73ab4400fd1163d0e84e54f4d8ab0b..f6e9c58ce62f3ae227a79d91caefaef4bd87d98a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -362,31 +362,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len,
payload & 0x1 ? "ST" : "LD");
- if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+ if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
if (payload & SPE_OP_PKT_AT)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
if (payload & SPE_OP_PKT_EXCL)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
if (payload & SPE_OP_PKT_AR)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
- }
-
- if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
- else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG");
- if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
/* SVE effective vector length */
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
SPE_OP_PKG_SVE_EVL(payload));
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 75e355fe3438cc07704cb61a66ca162bd0904042..cb947e625918922dc1fa25cf8843b09661197782 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -133,14 +133,14 @@ enum arm_spe_events {
#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20)
#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25)
-#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
#define SPE_OP_PKT_AR BIT(4)
#define SPE_OP_PKT_EXCL BIT(3)
#define SPE_OP_PKT_AT BIT(2)
#define SPE_OP_PKT_ST BIT(0)
-#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
+#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
#define SPE_OP_PKT_SVE_SG BIT(7)
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 03/25] perf arm_spe: Decode GCS operation
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
2025-10-17 10:04 ` [PATCH v2 01/25] perf arm_spe: Fix memset subclass in operation Leo Yan
2025-10-17 10:04 ` [PATCH v2 02/25] perf arm_spe: Unify operation naming Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro Leo Yan
` (21 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Decode a load or store from a GCS operation and the associated "common"
field.
After:
. 00000000: 49 44 LD GCS COMM
. 00000002: b2 18 3c d7 83 00 80 ff ff VA 0xffff800083d73c18
. 0000000b: 9a 00 00 LAT 0 XLAT
. 0000000e: 43 00 DATA-SOURCE 0
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index f6e9c58ce62f3ae227a79d91caefaef4bd87d98a..0046079edaccdd4d0c093f73395d1ecdc0007621 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -394,6 +394,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
if (payload & SPE_OP_PKT_SVE_SG)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM");
}
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index cb947e625918922dc1fa25cf8843b09661197782..94333e7bc382deef119414e4f1de7c2878620035 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -154,6 +154,10 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
+#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40)
+
+#define SPE_OP_PKT_GCS_COMM BIT(2)
+
#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (2 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 03/25] perf arm_spe: Decode GCS operation Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 05/25] perf arm_spe: Decode ASE and FP fields in other operation Leo Yan
` (20 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Rename the macro to SPE_OP_PKT_OTHER_SUBCLASS_SVE to unify naming.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 +-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index b0fb896abad48de93aa1ed560029f9bc9ae969e9..847c29385bea8618e14b2eb21a08896041890d89 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -205,7 +205,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload))
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 0046079edaccdd4d0c093f73395d1ecdc0007621..533920b738cbcb39136d1ba3d88e99f9d8009e74 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -340,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
switch (packet->index) {
case SPE_OP_PKT_HDR_CLASS_OTHER:
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
/* SVE effective vector length */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 94333e7bc382deef119414e4f1de7c2878620035..48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -123,7 +123,7 @@ enum arm_spe_events {
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
-#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 05/25] perf arm_spe: Decode ASE and FP fields in other operation
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (3 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 06/25] perf arm_spe: Decode SME data processing packet Leo Yan
` (19 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Add a check for other operation, which prevents any incorrectly
classifying. Parse the ASE and FP fields.
After:
. 0000002f: 48 06 OTHER ASE FP INSN-OTHER
. 00000031: b2 08 80 48 01 08 00 ff ff VA 0xffff000801488008
. 0000003a: 9a 00 00 LAT 0 XLAT
. 0000003d: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 533920b738cbcb39136d1ba3d88e99f9d8009e74..21b65a9b40f481b6cb25aaf01ab627ade046ff72 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -351,8 +351,12 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
if (payload & SPE_OP_PKT_SVE_PRED)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
- } else {
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE");
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
payload & SPE_OP_PKT_COND ?
"COND-SELECT" : "INSN-OTHER");
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92..704601c6dbe30e93f83a82670d0d60344a22222a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -123,8 +123,12 @@ enum arm_spe_events {
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
+#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_ASE BIT(2)
+#define SPE_OP_PKT_OTHER_FP BIT(1)
+
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 06/25] perf arm_spe: Decode SME data processing packet
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (4 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 05/25] perf arm_spe: Decode ASE and FP fields in other operation Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 07/25] perf arm_spe: Remove unused operation types Leo Yan
` (18 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
For SME data processing, decode its Effective vector length or Tile Size
(ETS), and print out if a floating-point operation.
After:
. 00000000: 49 00 SME-OTHER ETS 1024 FP
. 00000002: b2 18 3c d7 83 00 80 ff ff VA 0xffff800083d73c18
. 0000000b: 9a 00 00 LAT 0 XLAT
. 0000000e: 43 00 DATA-SOURCE 0
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 9 +++++++++
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 11 +++++++++++
2 files changed, 20 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 21b65a9b40f481b6cb25aaf01ab627ade046ff72..5769ba2f414049161f271fd8b8f40c440d15a75a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -351,6 +351,15 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
if (payload & SPE_OP_PKT_SVE_PRED)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER");
+
+ /* SME effective vector length or tile size */
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d",
+ SPE_OP_PKG_SME_ETS(payload));
+
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
} else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
if (payload & SPE_OP_PKT_OTHER_ASE)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 704601c6dbe30e93f83a82670d0d60344a22222a..adf4cde320aad01265b5232b0d6ff6b1f752f35f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -125,10 +125,21 @@ enum arm_spe_events {
#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88)
#define SPE_OP_PKT_OTHER_ASE BIT(2)
#define SPE_OP_PKT_OTHER_FP BIT(1)
+/*
+ * SME effective vector length or tile size (ETS) is stored in byte 0
+ * bits [6:4,2]; the length is rounded up to a power of two and use 128
+ * as one step, so ETS calculation is:
+ *
+ * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2])
+ */
+#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \
+ (FIELD_GET(BIT(2), (v)))))
+
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 07/25] perf arm_spe: Remove unused operation types
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (5 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 06/25] perf arm_spe: Decode SME data processing packet Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 08/25] perf arm_spe: Consolidate " Leo Yan
` (17 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Remove unused SVE operation types. These operations will be reintroduced
in subsequent refactoring, but with a different format.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index fbb57f8052371e51d562d9dd6098e97fc099461c..1259cbadfdc8098019afcd4cf65e733475310392 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -39,8 +39,6 @@ enum arm_spe_op_type {
/* Second level operation type for OTHER */
ARM_SPE_OP_SVE_OTHER = 1 << 16,
- ARM_SPE_OP_SVE_FP = 1 << 17,
- ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18,
/* Second level operation type for LDST */
ARM_SPE_OP_LD = 1 << 16,
@@ -53,8 +51,6 @@ enum arm_spe_op_type {
ARM_SPE_OP_UNSPEC_REG = 1 << 23,
ARM_SPE_OP_NV_SYSREG = 1 << 24,
ARM_SPE_OP_SVE_LDST = 1 << 25,
- ARM_SPE_OP_SVE_PRED_LDST = 1 << 26,
- ARM_SPE_OP_SVE_SG = 1 << 27,
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 16,
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 08/25] perf arm_spe: Consolidate operation types
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (6 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 07/25] perf arm_spe: Remove unused operation types Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 09/25] perf arm_spe: Introduce data processing macro for SVE operations Leo Yan
` (16 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Consolidate operation types in a way:
(a) Extract the second-level types into separate enums.
(b) The second-level types for memory and SIMD operations are classified
by modules. E.g., an operation may relate to general register,
SIMD/FP, SVE, etc.
(c) The associated information tells details. E.g., an operation is
load or store, whether it is atomic operation, etc.
Start the enum items for the second-level types from 8 to accommodate
more entries within a 32-bit integer.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 46 ++++++++++++-----------
1 file changed, 24 insertions(+), 22 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1259cbadfdc8098019afcd4cf65e733475310392..b555e2cc1dc36f209c23b0d84378da0ee65c1ab3 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -36,29 +36,31 @@ enum arm_spe_op_type {
ARM_SPE_OP_OTHER = 1 << 0,
ARM_SPE_OP_LDST = 1 << 1,
ARM_SPE_OP_BRANCH_ERET = 1 << 2,
+};
+
+enum arm_spe_2nd_op_ldst {
+ ARM_SPE_OP_GP_REG = 1 << 8,
+ ARM_SPE_OP_UNSPEC_REG = 1 << 9,
+ ARM_SPE_OP_NV_SYSREG = 1 << 10,
+ ARM_SPE_OP_SIMD_FP = 1 << 11,
+ ARM_SPE_OP_SVE_OTHER = 1 << 12,
+ ARM_SPE_OP_SVE_LDST = 1 << 13,
+
+ /* Assisted information for memory / SIMD */
+ ARM_SPE_OP_LD = 1 << 20,
+ ARM_SPE_OP_ST = 1 << 21,
+ ARM_SPE_OP_ATOMIC = 1 << 22,
+ ARM_SPE_OP_EXCL = 1 << 23,
+ ARM_SPE_OP_AR = 1 << 24,
+};
- /* Second level operation type for OTHER */
- ARM_SPE_OP_SVE_OTHER = 1 << 16,
-
- /* Second level operation type for LDST */
- ARM_SPE_OP_LD = 1 << 16,
- ARM_SPE_OP_ST = 1 << 17,
- ARM_SPE_OP_ATOMIC = 1 << 18,
- ARM_SPE_OP_EXCL = 1 << 19,
- ARM_SPE_OP_AR = 1 << 20,
- ARM_SPE_OP_SIMD_FP = 1 << 21,
- ARM_SPE_OP_GP_REG = 1 << 22,
- ARM_SPE_OP_UNSPEC_REG = 1 << 23,
- ARM_SPE_OP_NV_SYSREG = 1 << 24,
- ARM_SPE_OP_SVE_LDST = 1 << 25,
-
- /* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
- ARM_SPE_OP_BR_GCS = 1 << 18,
- ARM_SPE_OP_BR_CR_BL = 1 << 19,
- ARM_SPE_OP_BR_CR_RET = 1 << 20,
- ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
+enum arm_spe_2nd_op_branch {
+ ARM_SPE_OP_BR_COND = 1 << 8,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 9,
+ ARM_SPE_OP_BR_GCS = 1 << 10,
+ ARM_SPE_OP_BR_CR_BL = 1 << 11,
+ ARM_SPE_OP_BR_CR_RET = 1 << 12,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13,
};
enum arm_spe_common_data_source {
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 09/25] perf arm_spe: Introduce data processing macro for SVE operations
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (7 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 08/25] perf arm_spe: Consolidate " Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:04 ` [PATCH v2 10/25] perf arm_spe: Report register access in record Leo Yan
` (15 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Introduce the ARM_SPE_OP_DP (data processing) macro as associated
information for SVE operations. For SVE register access, only
ARM_SPE_OP_SVE is set; for SVE data processing, both ARM_SPE_OP_SVE and
ARM_SPE_OP_DP are set together.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++--
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ++--
tools/perf/util/arm-spe.c | 5 +----
3 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 847c29385bea8618e14b2eb21a08896041890d89..6974f594f37c9916fff591ced1e9c2d60cf84f14 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -201,12 +201,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
else
decoder->record.op |= ARM_SPE_OP_LD;
if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_LDST;
+ decoder->record.op |= ARM_SPE_OP_SVE;
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
+ decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index b555e2cc1dc36f209c23b0d84378da0ee65c1ab3..acab6d11096b19b1d31a553c83cba9732ecf5ddb 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -43,8 +43,7 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_UNSPEC_REG = 1 << 9,
ARM_SPE_OP_NV_SYSREG = 1 << 10,
ARM_SPE_OP_SIMD_FP = 1 << 11,
- ARM_SPE_OP_SVE_OTHER = 1 << 12,
- ARM_SPE_OP_SVE_LDST = 1 << 13,
+ ARM_SPE_OP_SVE = 1 << 12,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -52,6 +51,7 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_ATOMIC = 1 << 22,
ARM_SPE_OP_EXCL = 1 << 23,
ARM_SPE_OP_AR = 1 << 24,
+ ARM_SPE_OP_DP = 1 << 25, /* Data processing */
};
enum arm_spe_2nd_op_branch {
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 71be979f507718caadc091714c40bcee073c1d60..88f24a8626861393defc89540e4126a124479699 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -346,10 +346,7 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *
{
struct simd_flags simd_flags = {};
- if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
- simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
-
- if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
+ if (record->op & ARM_SPE_OP_SVE)
simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 10/25] perf arm_spe: Report register access in record
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (8 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 09/25] perf arm_spe: Introduce data processing macro for SVE operations Leo Yan
@ 2025-10-17 10:04 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 11/25] perf arm_spe: Report MTE allocation tag " Leo Yan
` (14 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:04 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Record register access info for load / store operations.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6974f594f37c9916fff591ced1e9c2d60cf84f14..804dce129121b9d2600be01af7f1f2780a9d0fc9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -200,8 +200,19 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ST;
else
decoder->record.op |= ARM_SPE_OP_LD;
- if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
+
+ if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GP_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SIMD_FP;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
+ }
+
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 11/25] perf arm_spe: Report MTE allocation tag in record
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (9 preceding siblings ...)
2025-10-17 10:04 ` [PATCH v2 10/25] perf arm_spe: Report register access in record Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 12/25] perf arm_spe: Report extended memory operations in records Leo Yan
` (13 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Save MTE tag info in memory record.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 804dce129121b9d2600be01af7f1f2780a9d0fc9..6696448bdf4f347e2032a1b4da46fcdd4016f9fc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -209,6 +209,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MTE_TAG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index acab6d11096b19b1d31a553c83cba9732ecf5ddb..7b4d26f2ebfe17a9187f959e5ec0b2479f70cc2f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -44,6 +44,7 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_NV_SYSREG = 1 << 10,
ARM_SPE_OP_SIMD_FP = 1 << 11,
ARM_SPE_OP_SVE = 1 << 12,
+ ARM_SPE_OP_MTE_TAG = 1 << 13,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 12/25] perf arm_spe: Report extended memory operations in records
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (10 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 11/25] perf arm_spe: Report MTE allocation tag " Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 13/25] perf arm_spe: Report associated info for SVE / SME operations Leo Yan
` (12 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Extended memory operations include atomic (AT), acquire/release (AR),
and exclusive (EXCL) operations. Save the relevant information
in the records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6696448bdf4f347e2032a1b4da46fcdd4016f9fc..949c20816826a4d45d6cf9a5efaa125acea90b0a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -211,6 +211,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
decoder->record.op |= ARM_SPE_OP_MTE_TAG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
+ if (payload & SPE_OP_PKT_AR)
+ decoder->record.op |= ARM_SPE_OP_AR;
+ if (payload & SPE_OP_PKT_EXCL)
+ decoder->record.op |= ARM_SPE_OP_EXCL;
+ if (payload & SPE_OP_PKT_AT)
+ decoder->record.op |= ARM_SPE_OP_ATOMIC;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 13/25] perf arm_spe: Report associated info for SVE / SME operations
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (11 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 12/25] perf arm_spe: Report extended memory operations in records Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 14/25] perf arm_spe: Report memset and memcpy in records Leo Yan
` (11 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
SVE / SME operations can be predicated or Gather load / scatter store,
save the relevant info into record.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 949c20816826a4d45d6cf9a5efaa125acea90b0a..5b214fc4ca9f67cf11700a18939f37f4a5400b84 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -220,6 +220,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ATOMIC;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ if (payload & SPE_OP_PKT_SVE_SG)
+ decoder->record.op |= ARM_SPE_OP_SG;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 7b4d26f2ebfe17a9187f959e5ec0b2479f70cc2f..d14cf6b95507bcbdb1f7e6d4908e6c40c4a8279c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -53,6 +53,8 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_EXCL = 1 << 23,
ARM_SPE_OP_AR = 1 << 24,
ARM_SPE_OP_DP = 1 << 25, /* Data processing */
+ ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
+ ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
};
enum arm_spe_2nd_op_branch {
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 14/25] perf arm_spe: Report memset and memcpy in records
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (12 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 13/25] perf arm_spe: Report associated info for SVE / SME operations Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 15/25] perf arm_spe: Report GCS in record Leo Yan
` (10 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Expose memset and memcpy related info in records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 5b214fc4ca9f67cf11700a18939f37f4a5400b84..6f0390d4089599cd6bbf1357fa4cd6ec8190a58c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -224,6 +224,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_PRED;
if (payload & SPE_OP_PKT_SVE_SG)
decoder->record.op |= ARM_SPE_OP_SG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMCPY;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMSET;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index d14cf6b95507bcbdb1f7e6d4908e6c40c4a8279c..3f4dae589c062d927d286b73ce53fa39795daffd 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -45,6 +45,8 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_SIMD_FP = 1 << 11,
ARM_SPE_OP_SVE = 1 << 12,
ARM_SPE_OP_MTE_TAG = 1 << 13,
+ ARM_SPE_OP_MEMCPY = 1 << 14,
+ ARM_SPE_OP_MEMSET = 1 << 15,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 15/25] perf arm_spe: Report GCS in record
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (13 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 14/25] perf arm_spe: Report memset and memcpy in records Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 16/25] perf arm_spe: Expose SIMD information in other operations Leo Yan
` (9 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Report GCS related info in records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6f0390d4089599cd6bbf1357fa4cd6ec8190a58c..649471abef6a4386e1b250a19cda4f4caeb0a2ff 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -228,6 +228,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_MEMCPY;
} else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
decoder->record.op |= ARM_SPE_OP_MEMSET;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GCS;
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ decoder->record.op |= ARM_SPE_OP_COMM;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 3f4dae589c062d927d286b73ce53fa39795daffd..b838e9c6168c6b7c20bb63b8e7c9d27c35f416dc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -47,6 +47,7 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_MTE_TAG = 1 << 13,
ARM_SPE_OP_MEMCPY = 1 << 14,
ARM_SPE_OP_MEMSET = 1 << 15,
+ ARM_SPE_OP_GCS = 1 << 16,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -57,6 +58,7 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_DP = 1 << 25, /* Data processing */
ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
+ ARM_SPE_OP_COMM = 1 << 28, /* Common */
};
enum arm_spe_2nd_op_branch {
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 16/25] perf arm_spe: Expose SIMD information in other operations
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (14 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 15/25] perf arm_spe: Report GCS in record Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 17/25] perf arm_spe: Synthesize memory samples for SIMD operations Leo Yan
` (8 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
The other operations contain SME data processing, ASE (Advanced SIMD)
and floating-point operations. Expose these info in the records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 18 +++++++++++++++++-
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 649471abef6a4386e1b250a19cda4f4caeb0a2ff..9e02b2bdd1177193996d071dd88f969e25b1ad86 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -237,8 +237,24 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
- if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SME;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ decoder->record.op |= ARM_SPE_OP_ASE;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_COND;
+ }
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index b838e9c6168c6b7c20bb63b8e7c9d27c35f416dc..3310e05122f02e8ef32f79f8ed3c6932cc43eecc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -48,6 +48,8 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_MEMCPY = 1 << 14,
ARM_SPE_OP_MEMSET = 1 << 15,
ARM_SPE_OP_GCS = 1 << 16,
+ ARM_SPE_OP_SME = 1 << 17,
+ ARM_SPE_OP_ASE = 1 << 18,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -59,6 +61,8 @@ enum arm_spe_2nd_op_ldst {
ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
ARM_SPE_OP_COMM = 1 << 28, /* Common */
+ ARM_SPE_OP_FP = 1 << 29, /* Floating-point */
+ ARM_SPE_OP_COND = 1 << 30, /* Conditional */
};
enum arm_spe_2nd_op_branch {
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 17/25] perf arm_spe: Synthesize memory samples for SIMD operations
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (15 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 16/25] perf arm_spe: Expose SIMD information in other operations Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 18/25] perf/uapi: Extend data source fields Leo Yan
` (7 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Synthesize memory samples for SIMD operations (including Advanced SIMD,
SVE, and SME). To provide complete information, also generate data
source entries for SIMD operations.
Since memory operations are not limited to load and store, set
PERF_MEM_OP_STORE if the operation does not fall into these cases.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 88f24a8626861393defc89540e4126a124479699..bc233a5007d20e4dec11eeb1554adc1580f43718 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -39,6 +39,11 @@
#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \
+ ARM_SPE_OP_SME | ARM_SPE_OP_ASE)))
+
+#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op))
+
#define ARM_SPE_CACHE_EVENT(lvl) \
(ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS)
@@ -985,8 +990,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
{
union perf_mem_data_src data_src = {};
- /* Only synthesize data source for LDST operations */
- if (!is_ldst_op(record->op))
+ if (!is_mem_op(record->op))
return data_src;
if (record->op & ARM_SPE_OP_LD)
@@ -994,7 +998,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
else if (record->op & ARM_SPE_OP_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
- return data_src;
+ data_src.mem_op = PERF_MEM_OP_NA;
arm_spe__synth_ds(speq, record, &data_src);
arm_spe__synth_memory_level(speq, record, &data_src);
@@ -1095,11 +1099,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
- /*
- * When data_src is zero it means the record is not a memory operation,
- * skip to synthesize memory sample for this case.
- */
- if (spe->sample_memory && is_ldst_op(record->op)) {
+ if (spe->sample_memory && is_mem_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 18/25] perf/uapi: Extend data source fields
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (16 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 17/25] perf arm_spe: Synthesize memory samples for SIMD operations Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 19/25] tools/include: Sync uapi/linux/perf.h with the kernel sources Leo Yan
` (6 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Arm CPUs introduce several new types of memory operations, like MTE tag
accessing, system register access for nested virtualization, memcpy &
memset, and Guarded Control Stack (GCS).
For memory operation details, Arm SPE provides information like data
(parallel) processing, floating-point, predicated, atomic, exclusive,
acquire/release, gather/scatter, and conditional.
This commit introduces a field 'mem_op_ext' for extended operation type.
The extended operation type can be combined with the existed operation
type to express a memory type, for examples, a PERF_MEM_OP_GCS type can
be set along with PERF_MEM_OP_LOAD to present a load operation for
GCS register access.
Bit fields are also added to represent detailed operation attributes.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 78a362b8002776e5ce83a0d7816601638c61ecc6..9b9fa59fd828756b5e8e93520da5a269f0dfff52 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1309,14 +1309,32 @@ union perf_mem_data_src {
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_op_ext : 4, /* Extended type of opcode */
+ mem_dp : 1, /* Data processing */
+ mem_fp : 1, /* Floating-point */
+ mem_pred : 1, /* Predicated */
+ mem_atomic : 1, /* Atomic operation */
+ mem_excl : 1, /* Exclusive */
+ mem_ar : 1, /* Acquire/release */
+ mem_sg : 1, /* Scatter/Gather */
+ mem_cond : 1, /* Conditional */
+ mem_rsvd : 6;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 6,
+ mem_cond : 1, /* Conditional */
+ mem_sg : 1, /* Scatter/Gather */
+ mem_ar : 1, /* Acquire/release */
+ mem_excl : 1, /* Exclusive */
+ mem_atomic : 1, /* Atomic operation */
+ mem_pred : 1, /* Predicated */
+ mem_fp : 1, /* Floating-point */
+ mem_dp : 1, /* Data processing */
+ mem_op_ext : 4, /* Extended type of opcode */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
@@ -1426,6 +1444,16 @@ union perf_mem_data_src {
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Extended type of memory opcode: */
+#define PERF_MEM_EXT_OP_NA 0x0 /* Not available */
+#define PERF_MEM_EXT_OP_MTE_TAG 0x1 /* MTE tag */
+#define PERF_MEM_EXT_OP_NESTED_VIRT 0x2 /* Nested virtualization */
+#define PERF_MEM_EXT_OP_MEMCPY 0x3 /* Memory copy */
+#define PERF_MEM_EXT_OP_MEMSET 0x4 /* Memory set */
+#define PERF_MEM_EXT_OP_SIMD 0x5 /* SIMD */
+#define PERF_MEM_EXT_OP_GCS 0x6 /* Guarded Control Stack */
+#define PERF_MEM_EXT_OP_SHIFT 46
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 19/25] tools/include: Sync uapi/linux/perf.h with the kernel sources
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (17 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 18/25] perf/uapi: Extend data source fields Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 20/25] perf mem: Print extended fields Leo Yan
` (5 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Sync for extended memory operation bit fields.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b8002776e5ce83a0d7816601638c61ecc6..9b9fa59fd828756b5e8e93520da5a269f0dfff52 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1309,14 +1309,32 @@ union perf_mem_data_src {
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_op_ext : 4, /* Extended type of opcode */
+ mem_dp : 1, /* Data processing */
+ mem_fp : 1, /* Floating-point */
+ mem_pred : 1, /* Predicated */
+ mem_atomic : 1, /* Atomic operation */
+ mem_excl : 1, /* Exclusive */
+ mem_ar : 1, /* Acquire/release */
+ mem_sg : 1, /* Scatter/Gather */
+ mem_cond : 1, /* Conditional */
+ mem_rsvd : 6;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 6,
+ mem_cond : 1, /* Conditional */
+ mem_sg : 1, /* Scatter/Gather */
+ mem_ar : 1, /* Acquire/release */
+ mem_excl : 1, /* Exclusive */
+ mem_atomic : 1, /* Atomic operation */
+ mem_pred : 1, /* Predicated */
+ mem_fp : 1, /* Floating-point */
+ mem_dp : 1, /* Data processing */
+ mem_op_ext : 4, /* Extended type of opcode */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
@@ -1426,6 +1444,16 @@ union perf_mem_data_src {
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Extended type of memory opcode: */
+#define PERF_MEM_EXT_OP_NA 0x0 /* Not available */
+#define PERF_MEM_EXT_OP_MTE_TAG 0x1 /* MTE tag */
+#define PERF_MEM_EXT_OP_NESTED_VIRT 0x2 /* Nested virtualization */
+#define PERF_MEM_EXT_OP_MEMCPY 0x3 /* Memory copy */
+#define PERF_MEM_EXT_OP_MEMSET 0x4 /* Memory set */
+#define PERF_MEM_EXT_OP_SIMD 0x5 /* SIMD */
+#define PERF_MEM_EXT_OP_GCS 0x6 /* Guarded Control Stack */
+#define PERF_MEM_EXT_OP_SHIFT 46
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 20/25] perf mem: Print extended fields
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (18 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 19/25] tools/include: Sync uapi/linux/perf.h with the kernel sources Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 21/25] perf arm_spe: Set extended fields in data source Leo Yan
` (4 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Print the extended operation types and affiliate info.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/mem-events.c | 66 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 60 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 80b3069427bc4bb5ffc3ab0856c01c76d9ba3ba6..c0aee982fb4f1a849c28a8bb01693855922832f6 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -413,11 +413,15 @@ static const char * const mem_hops[] = {
static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
{
- u64 op = PERF_MEM_LOCK_NA;
+ union perf_mem_data_src data_src;
+ u64 op = PERF_MEM_OP_NA, ext_op = 0;
int l;
- if (mem_info)
- op = mem_info__const_data_src(mem_info)->mem_op;
+ if (mem_info) {
+ data_src = *mem_info__const_data_src(mem_info);
+ op = data_src.mem_op;
+ ext_op = data_src.mem_op_ext;
+ }
if (op & PERF_MEM_OP_NA)
l = scnprintf(out, sz, "N/A");
@@ -432,6 +436,19 @@ static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *m
else
l = scnprintf(out, sz, "No");
+ if (ext_op == PERF_MEM_EXT_OP_MTE_TAG)
+ l += scnprintf(out + l, sz - l, " MTE");
+ else if (ext_op == PERF_MEM_EXT_OP_NESTED_VIRT)
+ l += scnprintf(out + l, sz - l, " NV");
+ else if (ext_op == PERF_MEM_EXT_OP_MEMCPY)
+ l += scnprintf(out + l, sz - l, " MEMCPY");
+ else if (ext_op == PERF_MEM_EXT_OP_MEMSET)
+ l += scnprintf(out + l, sz - l, " MEMSET");
+ else if (ext_op == PERF_MEM_EXT_OP_SIMD)
+ l += scnprintf(out + l, sz - l, " SIMD");
+ else if (ext_op == PERF_MEM_EXT_OP_GCS)
+ l += scnprintf(out + l, sz - l, " GCS");
+
return l;
}
@@ -582,9 +599,6 @@ int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf
size_t l = 0;
u64 mask = PERF_MEM_BLK_NA;
- sz -= 1; /* -1 for null termination */
- out[0] = '\0';
-
if (mem_info)
mask = mem_info__const_data_src(mem_info)->mem_blk;
@@ -600,6 +614,44 @@ int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf
return l;
}
+static int perf_mem__aff_scnprintf(char *out, size_t sz,
+ const struct mem_info *mem_info)
+{
+ union perf_mem_data_src data_src;
+ size_t l = 0;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (!mem_info)
+ goto out;
+
+ data_src = *mem_info__const_data_src(mem_info);
+
+ if (data_src.mem_dp)
+ l += scnprintf(out + l, sz - l, " DP");
+ if (data_src.mem_fp)
+ l += scnprintf(out + l, sz - l, " FP");
+ if (data_src.mem_pred)
+ l += scnprintf(out + l, sz - l, " PRED");
+ if (data_src.mem_atomic)
+ l += scnprintf(out + l, sz - l, " ATOMIC");
+ if (data_src.mem_excl)
+ l += scnprintf(out + l, sz - l, " EX");
+ if (data_src.mem_ar)
+ l += scnprintf(out + l, sz - l, " AR");
+ if (data_src.mem_sg)
+ l += scnprintf(out + l, sz - l, " SG");
+ if (data_src.mem_cond)
+ l += scnprintf(out + l, sz - l, " COND");
+
+out:
+ if (!l)
+ l += scnprintf(out + l, sz - l, " N/A");
+
+ return l;
+}
+
int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
{
int i = 0;
@@ -616,6 +668,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *
i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
i += scnprintf(out + i, sz - i, "|BLK ");
i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|AFF");
+ i += perf_mem__aff_scnprintf(out + i, sz - i, mem_info);
return i;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 21/25] perf arm_spe: Set extended fields in data source
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (19 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 20/25] perf mem: Print extended fields Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 22/25] perf sort: Support sort ASE and SME Leo Yan
` (3 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Set extended operation type and affiliate info in the data source.
Before:
perf script -F,dso,sym,data_src
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
After:
perf script -F,dso,sym,data_src
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index bc233a5007d20e4dec11eeb1554adc1580f43718..e9df50b2cb807185c939f6712e9d65a41aacce87 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -1000,6 +1000,36 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
else
data_src.mem_op = PERF_MEM_OP_NA;
+ if (record->op & ARM_SPE_OP_MTE_TAG)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MTE_TAG;
+ else if (record->op & ARM_SPE_OP_NV_SYSREG)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_NESTED_VIRT;
+ else if (record->op & ARM_SPE_OP_MEMCPY)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MEMCPY;
+ else if (record->op & ARM_SPE_OP_MEMSET)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MEMSET;
+ else if (record->op & ARM_SPE_OP_GCS)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_GCS;
+ else if (is_simd_op(record->op))
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_SIMD;
+
+ if (record->op & ARM_SPE_OP_DP)
+ data_src.mem_dp = 1;
+ if (record->op & ARM_SPE_OP_FP)
+ data_src.mem_fp = 1;
+ if (record->op & ARM_SPE_OP_PRED)
+ data_src.mem_pred = 1;
+ if (record->op & ARM_SPE_OP_ATOMIC)
+ data_src.mem_atomic = 1;
+ if (record->op & ARM_SPE_OP_EXCL)
+ data_src.mem_excl = 1;
+ if (record->op & ARM_SPE_OP_AR)
+ data_src.mem_ar = 1;
+ if (record->op & ARM_SPE_OP_SG)
+ data_src.mem_sg = 1;
+ if (record->op & ARM_SPE_OP_COND)
+ data_src.mem_cond = 1;
+
arm_spe__synth_ds(speq, record, &data_src);
arm_spe__synth_memory_level(speq, record, &data_src);
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 22/25] perf sort: Support sort ASE and SME
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (20 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 21/25] perf arm_spe: Set extended fields in data source Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 23/25] perf sort: Sort disabled and full predicated flags Leo Yan
` (2 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Support sort Advance SIMD extension (ASE) and SME.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/sample.h | 12 +++++++++---
tools/perf/util/sort.c | 6 +++++-
2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef42105d08a59704ee75cd4852bbc5a..3b22cc2f7ad6ba1dc53b5d9ffff2a4e47c89612a 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,12 +67,18 @@ struct aux_sample {
};
struct simd_flags {
- u8 arch:1, /* architecture (isa) */
- pred:2; /* predication */
+ u8 arch: 2, /* architecture (isa) */
+ pred: 2, /* predication */
+ resv: 4; /* reserved */
};
/* simd architecture flags */
-#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
+enum simd_op_flags {
+ SIMD_OP_FLAGS_ARCH_NONE = 0x0, /* No SIMD operation */
+ SIMD_OP_FLAGS_ARCH_SVE, /* Arm SVE */
+ SIMD_OP_FLAGS_ARCH_SME, /* Arm SME */
+ SIMD_OP_FLAGS_ARCH_ASE, /* Arm Advanced SIMD */
+};
/* simd predicate flags */
#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f3a565b0e2307a8adf159725f803df5fef0dff83..ad98d5b75f34b49cd39e30cba997a614654eebcb 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -193,8 +193,12 @@ static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
{
u64 arch = simd_flags->arch;
- if (arch & SIMD_OP_FLAGS_ARCH_SVE)
+ if (arch == SIMD_OP_FLAGS_ARCH_SVE)
return "SVE";
+ else if (arch == SIMD_OP_FLAGS_ARCH_SME)
+ return "SME";
+ else if (arch == SIMD_OP_FLAGS_ARCH_ASE)
+ return "ASE";
else
return "n/a";
}
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 23/25] perf sort: Sort disabled and full predicated flags
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (21 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 22/25] perf sort: Support sort ASE and SME Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 24/25] perf report: Update document for SIMD flags Leo Yan
2025-10-17 10:05 ` [PATCH v2 25/25] perf arm_spe: Improve SIMD flags setting Leo Yan
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
According to the Arm ARM (ARM DDI 0487, L.a), section D18.2.6
"Events packet", apart from the empty predicate and partial
predicates, an SVE or SME operation can be predicate-disabled
or full predicated.
To provide complete results, introduce two predicate types for
these cases.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/sample.h | 13 +++++++++----
tools/perf/util/sort.c | 15 ++++++++++-----
2 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 3b22cc2f7ad6ba1dc53b5d9ffff2a4e47c89612a..9477fe9c87402dd9135f25a6eee2e7539e6fdbca 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -68,8 +68,8 @@ struct aux_sample {
struct simd_flags {
u8 arch: 2, /* architecture (isa) */
- pred: 2, /* predication */
- resv: 4; /* reserved */
+ pred: 3, /* predication */
+ resv: 3; /* reserved */
};
/* simd architecture flags */
@@ -81,8 +81,13 @@ enum simd_op_flags {
};
/* simd predicate flags */
-#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
-#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
+enum simd_pred_flags {
+ SIMD_OP_FLAGS_PRED_NONE = 0x0, /* Not available */
+ SIMD_OP_FLAGS_PRED_PARTIAL, /* partial predicate */
+ SIMD_OP_FLAGS_PRED_EMPTY, /* empty predicate */
+ SIMD_OP_FLAGS_PRED_FULL, /* full predicate */
+ SIMD_OP_FLAGS_PRED_DISABLED, /* disabled predicate */
+};
struct perf_sample {
u64 ip;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ad98d5b75f34b49cd39e30cba997a614654eebcb..fe203458c6ffbc340450f6fffc13d0e256dfe7cf 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -207,18 +207,23 @@ static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
const char *name;
+ const char *pred_str = ".";
if (!he->simd_flags.arch)
return repsep_snprintf(bf, size, "");
name = hist_entry__get_simd_name(&he->simd_flags);
- if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
- return repsep_snprintf(bf, size, "[e] %s", name);
- else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
- return repsep_snprintf(bf, size, "[p] %s", name);
+ if (he->simd_flags.pred == SIMD_OP_FLAGS_PRED_EMPTY)
+ pred_str = "e";
+ else if (he->simd_flags.pred == SIMD_OP_FLAGS_PRED_PARTIAL)
+ pred_str = "p";
+ else if (he->simd_flags.pred == SIMD_OP_FLAGS_PRED_DISABLED)
+ pred_str = "d";
+ else if (he->simd_flags.pred == SIMD_OP_FLAGS_PRED_FULL)
+ pred_str = "f";
- return repsep_snprintf(bf, size, "[.] %s", name);
+ return repsep_snprintf(bf, size, "[%s] %s", pred_str, name);
}
struct sort_entry sort_simd = {
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 24/25] perf report: Update document for SIMD flags
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (22 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 23/25] perf sort: Sort disabled and full predicated flags Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
2025-10-17 10:05 ` [PATCH v2 25/25] perf arm_spe: Improve SIMD flags setting Leo Yan
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Update SIMD architecture and predicate flags.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/Documentation/perf-report.txt | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index acef3ff4178eff66e8f876ae16cdac7b1387f07b..f361081a65dbe9cead539c7cb81d6ed86eb0acc6 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -136,7 +136,10 @@ OPTIONS
- addr: (Full) virtual address of the sampled instruction
- retire_lat: On X86, this reports pipeline stall of this instruction compared
to the previous instruction in cycles. And currently supported only on X86
- - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
+ - simd: Flags describing a SIMD operation. The architecture type can be Arm's
+ ASE (Advanced SIMD extension), SVE, SME. It provides an extra tag for
+ predicate: "e" for empty predicate, "p" for partial predicate, "d" for
+ predicate disabled, and "f" for full predicate.
- type: Data type of sample memory access.
- typeoff: Offset in the data type of sample memory access.
- symoff: Offset in the symbol.
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread* [PATCH v2 25/25] perf arm_spe: Improve SIMD flags setting
2025-10-17 10:04 [PATCH v2 00/25] perf arm_spe: Extend operations Leo Yan
` (23 preceding siblings ...)
2025-10-17 10:05 ` [PATCH v2 24/25] perf report: Update document for SIMD flags Leo Yan
@ 2025-10-17 10:05 ` Leo Yan
24 siblings, 0 replies; 26+ messages in thread
From: Leo Yan @ 2025-10-17 10:05 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Fill in ASE and SME operations for the SIMD arch field.
Also set the predicate flags for SVE and SME, but differences between
them: SME does not have a predicate flag, so the setting is based on
events. SVE provides a predicate flag to indicate whether the predicate
is disabled, which allows it to be distinguished into four cases: full
predicates, empty predicates, fully predicated, and disabled predicates.
After:
perf report -s +simd
...
0.06% 0.06% sve-test sve-test [.] setz [p] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] do_raw_spin_lock
0.06% 0.06% sve-test sve-test [.] getz [p] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] timekeeping_advance
0.06% 0.06% sve-test sve-test [.] getz [d] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] update_load_avg
0.06% 0.06% sve-test sve-test [.] getz [e] SVE
0.05% 0.05% sve-test sve-test [.] setz [e] SVE
0.05% 0.05% sve-test [kernel.kallsyms] [k] update_curr
0.05% 0.05% sve-test sve-test [.] setz [d] SVE
0.05% 0.05% sve-test [kernel.kallsyms] [k] do_raw_spin_unlock
0.05% 0.05% sve-test [kernel.kallsyms] [k] timekeeping_update_from_shadow.constprop.0
0.05% 0.05% sve-test sve-test [.] getz [f] SVE
0.05% 0.05% sve-test sve-test [.] setz [f] SVE
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index e9df50b2cb807185c939f6712e9d65a41aacce87..0ffb6c684f4dc55e6029eff6fdd7bef3ffa36352 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -353,12 +353,26 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *
if (record->op & ARM_SPE_OP_SVE)
simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
-
- if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
- simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
-
- if (record->type & ARM_SPE_SVE_EMPTY_PRED)
- simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
+ else if (record->op & ARM_SPE_OP_SME)
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SME;
+ else if (record->op & (ARM_SPE_OP_ASE | ARM_SPE_OP_SIMD_FP))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_ASE;
+
+ if (record->op & ARM_SPE_OP_SVE) {
+ if (!(record->op & ARM_SPE_OP_PRED))
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_DISABLED;
+ else if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL;
+ else if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY;
+ else
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_FULL;
+ } else {
+ if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL;
+ else if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY;
+ }
return simd_flags;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 26+ messages in thread