* [PATCH 01/25] perf arm_spe: Fix memset subclass in operation
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 02/25] perf arm_spe: Unify operation naming Leo Yan
` (23 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
The operation subclass is extracted from bits [7..1] of the payload.
Since bit [0] is not parsed, there is no chance to match the memset type
(0x25). As a result, the memset payload is never parsed successfully.
Instead of extracting a unified bit field, change to extract the
specific bits for each operation subclass.
Fixes: 34fb60400e32 ("perf arm-spe: Add raw decoding for SPEv1.3 MTE and MOPS load/store")
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
.../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 25 ++++++----------------
.../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 15 ++++++-------
2 files changed, 14 insertions(+), 26 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 80561630253dd5c46f7e99b24fc13b99f346459f..1a1ffe50ee73ab4400fd1163d0e84e54f4d8ab0b 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -371,31 +371,20 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
}
- switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
- case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+ if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
- break;
- case SPE_OP_PKT_LDST_SUBCLASS_MEMSET:
+ else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload))
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
- break;
- default:
- break;
- }
if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
/* SVE effective vector length */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index d00c2481712dcc457eab2f5e9848ffc3150e6236..75e355fe3438cc07704cb61a66ca162bd0904042 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -125,14 +125,13 @@ enum arm_spe_events {
#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
-#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1))
-#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0
-#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4
-#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10
-#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30
-#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20
-#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30)
+#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20)
+#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25)
#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 02/25] perf arm_spe: Unify operation naming
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
2025-09-29 16:37 ` [PATCH 01/25] perf arm_spe: Fix memset subclass in operation Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 03/25] perf arm_spe: Decode GCS operation Leo Yan
` (22 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Rename extended subclass and SVE/SME register access subclass, so that
the naming can be consistent cross all sub classes.
Add an log "SVE-SME-REG" for the SVE/SME register access, this is easier
for parsing.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 +-
.../perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 21 ++++++++++-----------
.../perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++--
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 96eb7cced6fd1574f5d823e4c67b9051dcf183ed..b0fb896abad48de93aa1ed560029f9bc9ae969e9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -200,7 +200,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ST;
else
decoder->record.op |= ARM_SPE_OP_LD;
- if (SPE_OP_PKT_IS_LDST_SVE(payload))
+ if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
decoder->record.op |= ARM_SPE_OP_SVE_LDST;
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 1a1ffe50ee73ab4400fd1163d0e84e54f4d8ab0b..f6e9c58ce62f3ae227a79d91caefaef4bd87d98a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -362,31 +362,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len,
payload & 0x1 ? "ST" : "LD");
- if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+ if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
if (payload & SPE_OP_PKT_AT)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
if (payload & SPE_OP_PKT_EXCL)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
if (payload & SPE_OP_PKT_AR)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
- }
-
- if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
- else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY");
- else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload))
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET");
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG");
- if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
/* SVE effective vector length */
arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
SPE_OP_PKG_SVE_EVL(payload));
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 75e355fe3438cc07704cb61a66ca162bd0904042..cb947e625918922dc1fa25cf8843b09661197782 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -133,14 +133,14 @@ enum arm_spe_events {
#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20)
#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25)
-#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
#define SPE_OP_PKT_AR BIT(4)
#define SPE_OP_PKT_EXCL BIT(3)
#define SPE_OP_PKT_AT BIT(2)
#define SPE_OP_PKT_ST BIT(0)
-#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
+#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8)
#define SPE_OP_PKT_SVE_SG BIT(7)
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 03/25] perf arm_spe: Decode GCS operation
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
2025-09-29 16:37 ` [PATCH 01/25] perf arm_spe: Fix memset subclass in operation Leo Yan
2025-09-29 16:37 ` [PATCH 02/25] perf arm_spe: Unify operation naming Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro Leo Yan
` (21 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Decode a load or store from a GCS operation and the associated "common"
field.
After:
. 00000000: 49 44 LD GCS COMM
. 00000002: b2 18 3c d7 83 00 80 ff ff VA 0xffff800083d73c18
. 0000000b: 9a 00 00 LAT 0 XLAT
. 0000000e: 43 00 DATA-SOURCE 0
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index f6e9c58ce62f3ae227a79d91caefaef4bd87d98a..0046079edaccdd4d0c093f73395d1ecdc0007621 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -394,6 +394,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
if (payload & SPE_OP_PKT_SVE_SG)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS");
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM");
}
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index cb947e625918922dc1fa25cf8843b09661197782..94333e7bc382deef119414e4f1de7c2878620035 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -154,6 +154,10 @@ enum arm_spe_events {
#define SPE_OP_PKT_SVE_PRED BIT(2)
#define SPE_OP_PKT_SVE_FP BIT(1)
+#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40)
+
+#define SPE_OP_PKT_GCS_COMM BIT(2)
+
#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3)
#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1)
#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (2 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 03/25] perf arm_spe: Decode GCS operation Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation Leo Yan
` (20 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Rename the macro to SPE_OP_PKT_OTHER_SUBCLASS_SVE to unify naming.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 +-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index b0fb896abad48de93aa1ed560029f9bc9ae969e9..847c29385bea8618e14b2eb21a08896041890d89 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -205,7 +205,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload))
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 0046079edaccdd4d0c093f73395d1ecdc0007621..533920b738cbcb39136d1ba3d88e99f9d8009e74 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -340,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
switch (packet->index) {
case SPE_OP_PKT_HDR_CLASS_OTHER:
- if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
/* SVE effective vector length */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 94333e7bc382deef119414e4f1de7c2878620035..48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -123,7 +123,7 @@ enum arm_spe_events {
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
-#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (3 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 04/25] perf arm_spe: Rename SPE_OP_PKT_IS_OTHER_SVE_OP macro Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 9:04 ` James Clark
2025-09-29 16:37 ` [PATCH 06/25] perf arm_spe: Decode SME data processing packet Leo Yan
` (19 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Add a check for other operation, which prevents any incorrectly
classifying. Parse the ASE and FP fields.
After:
. 0000002f: 48 06 OTHER ASE FP INSN-OTHER
. 00000031: b2 08 80 48 01 08 00 ff ff VA 0xffff000801488008
. 0000003a: 9a 00 00 LAT 0 XLAT
. 0000003d: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++-
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 533920b738cbcb39136d1ba3d88e99f9d8009e74..21b65a9b40f481b6cb25aaf01ab627ade046ff72 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -351,8 +351,12 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
if (payload & SPE_OP_PKT_SVE_PRED)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
- } else {
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE");
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
payload & SPE_OP_PKT_COND ?
"COND-SELECT" : "INSN-OTHER");
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92..704601c6dbe30e93f83a82670d0d60344a22222a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -123,8 +123,12 @@ enum arm_spe_events {
#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
+#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_ASE BIT(2)
+#define SPE_OP_PKT_OTHER_FP BIT(1)
+
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation
2025-09-29 16:37 ` [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation Leo Yan
@ 2025-10-09 9:04 ` James Clark
2025-10-09 9:06 ` James Clark
0 siblings, 1 reply; 36+ messages in thread
From: James Clark @ 2025-10-09 9:04 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Add a check for other operation, which prevents any incorrectly
> classifying. Parse the ASE and FP fields.
>
> After:
>
> . 0000002f: 48 06 OTHER ASE FP INSN-OTHER
> . 00000031: b2 08 80 48 01 08 00 ff ff VA 0xffff000801488008
> . 0000003a: 9a 00 00 LAT 0 XLAT
> . 0000003d: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++-
> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
> 2 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> index 533920b738cbcb39136d1ba3d88e99f9d8009e74..21b65a9b40f481b6cb25aaf01ab627ade046ff72 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
> @@ -351,8 +351,12 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
> arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
> if (payload & SPE_OP_PKT_SVE_PRED)
> arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
> - } else {
> + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
> arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
> + if (payload & SPE_OP_PKT_OTHER_ASE)
> + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE");
> + if (payload & SPE_OP_PKT_OTHER_FP)
> + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
> arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
> payload & SPE_OP_PKT_COND ?
> "COND-SELECT" : "INSN-OTHER");
A warning for unknown packet type would be useful here now that there is
no final else catch-all.
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
> index 48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92..704601c6dbe30e93f83a82670d0d60344a22222a 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
> @@ -123,8 +123,12 @@ enum arm_spe_events {
> #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
> #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
>
> +#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
> #define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
>
> +#define SPE_OP_PKT_OTHER_ASE BIT(2)
> +#define SPE_OP_PKT_OTHER_FP BIT(1)
> +
> #define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
> #define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
> #define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation
2025-10-09 9:04 ` James Clark
@ 2025-10-09 9:06 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 9:06 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 09/10/2025 10:04 am, James Clark wrote:
>
>
> On 29/09/2025 5:37 pm, Leo Yan wrote:
>> Add a check for other operation, which prevents any incorrectly
>> classifying. Parse the ASE and FP fields.
>>
>> After:
>>
>> . 0000002f: 48 06 OTHER
>> ASE FP INSN-OTHER
>> . 00000031: b2 08 80 48 01 08 00 ff ff VA
>> 0xffff000801488008
>> . 0000003a: 9a 00 00 LAT 0
>> XLAT
>> . 0000003d: 42 16 EV
>> RETIRED L1D-ACCESS TLB-ACCESS
>>
>> Signed-off-by: Leo Yan <leo.yan@arm.com>
>> ---
>> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++-
>> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 4 ++++
>> 2 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/
>> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> index
>> 533920b738cbcb39136d1ba3d88e99f9d8009e74..21b65a9b40f481b6cb25aaf01ab627ade046ff72 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
>> @@ -351,8 +351,12 @@ static int arm_spe_pkt_desc_op_type(const struct
>> arm_spe_pkt *packet,
>> arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
>> if (payload & SPE_OP_PKT_SVE_PRED)
>> arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
>> - } else {
>> + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
>> arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
>> + if (payload & SPE_OP_PKT_OTHER_ASE)
>> + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE");
>> + if (payload & SPE_OP_PKT_OTHER_FP)
>> + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
>> arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
>> payload & SPE_OP_PKT_COND ?
>> "COND-SELECT" : "INSN-OTHER");
>
> A warning for unknown packet type would be useful here now that there is
> no final else catch-all.
>
Although I see it's consistent with other cases now. Maybe it could be a
later fix to add unknown packet type warnings for all cases.
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/
>> tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
>> index
>> 48bd9e9ef132b11b79ffe2e2fbc7cfe4c340ff92..704601c6dbe30e93f83a82670d0d60344a22222a 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
>> @@ -123,8 +123,12 @@ enum arm_spe_events {
>> #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1
>> #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2
>> +#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7,
>> 3)) == 0x0)
>> #define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3)
>> | BIT(0))) == 0x8)
>> +#define SPE_OP_PKT_OTHER_ASE BIT(2)
>> +#define SPE_OP_PKT_OTHER_FP BIT(1)
>> +
>> #define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7,
>> 1)) == 0x0)
>> #define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) &
>> GENMASK_ULL(7, 1)) == 0x4)
>> #define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) &
>> GENMASK_ULL(7, 1)) == 0x10)
>>
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 06/25] perf arm_spe: Decode SME data processing packet
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (4 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 05/25] perf arm_spe: Decode ASE and FP fields in other operation Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 07/25] perf arm_spe: Remove unused operation types Leo Yan
` (18 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
For SME data processing, decode its Effective vector length or Tile Size
(ETS), and print out if a floating-point operation.
After:
. 00000000: 49 00 SME-OTHER ETS 1024 FP
. 00000002: b2 18 3c d7 83 00 80 ff ff VA 0xffff800083d73c18
. 0000000b: 9a 00 00 LAT 0 XLAT
. 0000000e: 43 00 DATA-SOURCE 0
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 9 +++++++++
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 11 +++++++++++
2 files changed, 20 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 21b65a9b40f481b6cb25aaf01ab627ade046ff72..5769ba2f414049161f271fd8b8f40c440d15a75a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -351,6 +351,15 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
if (payload & SPE_OP_PKT_SVE_PRED)
arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER");
+
+ /* SME effective vector length or tile size */
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d",
+ SPE_OP_PKG_SME_ETS(payload));
+
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
} else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
if (payload & SPE_OP_PKT_OTHER_ASE)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 704601c6dbe30e93f83a82670d0d60344a22222a..adf4cde320aad01265b5232b0d6ff6b1f752f35f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -125,10 +125,21 @@ enum arm_spe_events {
#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0)
#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88)
#define SPE_OP_PKT_OTHER_ASE BIT(2)
#define SPE_OP_PKT_OTHER_FP BIT(1)
+/*
+ * SME effective vector length or tile size (ETS) is stored in byte 0
+ * bits [6:4,2]; the length is rounded up to a power of two and use 128
+ * as one step, so ETS calculation is:
+ *
+ * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2])
+ */
+#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \
+ (FIELD_GET(BIT(2), (v)))))
+
#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0)
#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4)
#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 07/25] perf arm_spe: Remove unused operation types
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (5 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 06/25] perf arm_spe: Decode SME data processing packet Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 08/25] perf arm_spe: Consolidate " Leo Yan
` (17 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Remove unused SVE operation types. These operations will be reintroduced
in subsequent refactoring, but with a different format.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index fbb57f8052371e51d562d9dd6098e97fc099461c..1259cbadfdc8098019afcd4cf65e733475310392 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -39,8 +39,6 @@ enum arm_spe_op_type {
/* Second level operation type for OTHER */
ARM_SPE_OP_SVE_OTHER = 1 << 16,
- ARM_SPE_OP_SVE_FP = 1 << 17,
- ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18,
/* Second level operation type for LDST */
ARM_SPE_OP_LD = 1 << 16,
@@ -53,8 +51,6 @@ enum arm_spe_op_type {
ARM_SPE_OP_UNSPEC_REG = 1 << 23,
ARM_SPE_OP_NV_SYSREG = 1 << 24,
ARM_SPE_OP_SVE_LDST = 1 << 25,
- ARM_SPE_OP_SVE_PRED_LDST = 1 << 26,
- ARM_SPE_OP_SVE_SG = 1 << 27,
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 16,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 08/25] perf arm_spe: Consolidate operation types
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (6 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 07/25] perf arm_spe: Remove unused operation types Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 9:18 ` James Clark
2025-09-29 16:37 ` [PATCH 09/25] perf arm_spe: Introduce data processing macro for SVE operations Leo Yan
` (16 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Consolidate operation types in a way:
(a) The second-level types for memory and SIMD operations are classified
by modules. E.g., an operation may relate to general register,
SIMD/FP, SVE, etc.
(b) The associated information tells details. E.g., an operation is
load or store, whether it is atomic operation, etc.
Start the enum items for the second-level types from 8 to accommodate
more entries within a 32-bit integer.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 40 +++++++++++------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1259cbadfdc8098019afcd4cf65e733475310392..8156aa04f82e59ce345fb44223d3d22ecbc149a7 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -37,28 +37,28 @@ enum arm_spe_op_type {
ARM_SPE_OP_LDST = 1 << 1,
ARM_SPE_OP_BRANCH_ERET = 1 << 2,
- /* Second level operation type for OTHER */
- ARM_SPE_OP_SVE_OTHER = 1 << 16,
-
- /* Second level operation type for LDST */
- ARM_SPE_OP_LD = 1 << 16,
- ARM_SPE_OP_ST = 1 << 17,
- ARM_SPE_OP_ATOMIC = 1 << 18,
- ARM_SPE_OP_EXCL = 1 << 19,
- ARM_SPE_OP_AR = 1 << 20,
- ARM_SPE_OP_SIMD_FP = 1 << 21,
- ARM_SPE_OP_GP_REG = 1 << 22,
- ARM_SPE_OP_UNSPEC_REG = 1 << 23,
- ARM_SPE_OP_NV_SYSREG = 1 << 24,
- ARM_SPE_OP_SVE_LDST = 1 << 25,
+ /* Second level operation type for memory / SIMD */
+ ARM_SPE_OP_GP_REG = 1 << 8,
+ ARM_SPE_OP_UNSPEC_REG = 1 << 9,
+ ARM_SPE_OP_NV_SYSREG = 1 << 10,
+ ARM_SPE_OP_SIMD_FP = 1 << 11,
+ ARM_SPE_OP_SVE_OTHER = 1 << 12,
+ ARM_SPE_OP_SVE_LDST = 1 << 13,
+
+ /* Assisted information for memory / SIMD */
+ ARM_SPE_OP_LD = 1 << 20,
+ ARM_SPE_OP_ST = 1 << 21,
+ ARM_SPE_OP_ATOMIC = 1 << 22,
+ ARM_SPE_OP_EXCL = 1 << 23,
+ ARM_SPE_OP_AR = 1 << 24,
/* Second level operation type for BRANCH_ERET */
- ARM_SPE_OP_BR_COND = 1 << 16,
- ARM_SPE_OP_BR_INDIRECT = 1 << 17,
- ARM_SPE_OP_BR_GCS = 1 << 18,
- ARM_SPE_OP_BR_CR_BL = 1 << 19,
- ARM_SPE_OP_BR_CR_RET = 1 << 20,
- ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
+ ARM_SPE_OP_BR_COND = 1 << 8,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 9,
+ ARM_SPE_OP_BR_GCS = 1 << 10,
+ ARM_SPE_OP_BR_CR_BL = 1 << 11,
+ ARM_SPE_OP_BR_CR_RET = 1 << 12,
+ ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13,
};
enum arm_spe_common_data_source {
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 08/25] perf arm_spe: Consolidate operation types
2025-09-29 16:37 ` [PATCH 08/25] perf arm_spe: Consolidate " Leo Yan
@ 2025-10-09 9:18 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 9:18 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Consolidate operation types in a way:
>
> (a) The second-level types for memory and SIMD operations are classified
> by modules. E.g., an operation may relate to general register,
> SIMD/FP, SVE, etc.
>
> (b) The associated information tells details. E.g., an operation is
> load or store, whether it is atomic operation, etc.
>
> Start the enum items for the second-level types from 8 to accommodate
> more entries within a 32-bit integer.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 40 +++++++++++------------
> 1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1259cbadfdc8098019afcd4cf65e733475310392..8156aa04f82e59ce345fb44223d3d22ecbc149a7 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -37,28 +37,28 @@ enum arm_spe_op_type {
> ARM_SPE_OP_LDST = 1 << 1,
> ARM_SPE_OP_BRANCH_ERET = 1 << 2,
>
> - /* Second level operation type for OTHER */
> - ARM_SPE_OP_SVE_OTHER = 1 << 16,
> -
> - /* Second level operation type for LDST */
> - ARM_SPE_OP_LD = 1 << 16,
> - ARM_SPE_OP_ST = 1 << 17,
> - ARM_SPE_OP_ATOMIC = 1 << 18,
> - ARM_SPE_OP_EXCL = 1 << 19,
> - ARM_SPE_OP_AR = 1 << 20,
> - ARM_SPE_OP_SIMD_FP = 1 << 21,
> - ARM_SPE_OP_GP_REG = 1 << 22,
> - ARM_SPE_OP_UNSPEC_REG = 1 << 23,
> - ARM_SPE_OP_NV_SYSREG = 1 << 24,
> - ARM_SPE_OP_SVE_LDST = 1 << 25,
> + /* Second level operation type for memory / SIMD */
> + ARM_SPE_OP_GP_REG = 1 << 8,
> + ARM_SPE_OP_UNSPEC_REG = 1 << 9,
> + ARM_SPE_OP_NV_SYSREG = 1 << 10,
> + ARM_SPE_OP_SIMD_FP = 1 << 11,
> + ARM_SPE_OP_SVE_OTHER = 1 << 12,
> + ARM_SPE_OP_SVE_LDST = 1 << 13,
> +
> + /* Assisted information for memory / SIMD */
> + ARM_SPE_OP_LD = 1 << 20,
> + ARM_SPE_OP_ST = 1 << 21,
> + ARM_SPE_OP_ATOMIC = 1 << 22,
> + ARM_SPE_OP_EXCL = 1 << 23,
> + ARM_SPE_OP_AR = 1 << 24,
>
> /* Second level operation type for BRANCH_ERET */
> - ARM_SPE_OP_BR_COND = 1 << 16,
> - ARM_SPE_OP_BR_INDIRECT = 1 << 17,
> - ARM_SPE_OP_BR_GCS = 1 << 18,
> - ARM_SPE_OP_BR_CR_BL = 1 << 19,
> - ARM_SPE_OP_BR_CR_RET = 1 << 20,
> - ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21,
> + ARM_SPE_OP_BR_COND = 1 << 8,
I know it was already like this, but this should be multiple enums
stored in a union. Having an enum with duplicate values is a bit of an
abuse of the language. It takes more effort to understand it to
carefully make future modifications too.
With multiple enums you don't need to rely on a comment to describe
them, because that info would be in the name, like "enum
arm_spe_2nd_op_ldst", "enum arm_spe_2nd_op_mem" etc.
> + ARM_SPE_OP_BR_INDIRECT = 1 << 9,
> + ARM_SPE_OP_BR_GCS = 1 << 10,
> + ARM_SPE_OP_BR_CR_BL = 1 << 11,
> + ARM_SPE_OP_BR_CR_RET = 1 << 12,
> + ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13,
> };
>
> enum arm_spe_common_data_source {
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 09/25] perf arm_spe: Introduce data processing macro for SVE operations
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (7 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 08/25] perf arm_spe: Consolidate " Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 10/25] perf arm_spe: Report register access in record Leo Yan
` (15 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Introduce the ARM_SPE_OP_DP (data processing) macro as associated
information for SVE operations. For SVE register access, only
ARM_SPE_OP_SVE is set; for SVE data processing, both ARM_SPE_OP_SVE and
ARM_SPE_OP_DP are set together.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++--
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ++--
tools/perf/util/arm-spe.c | 5 +----
3 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 847c29385bea8618e14b2eb21a08896041890d89..6974f594f37c9916fff591ced1e9c2d60cf84f14 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -201,12 +201,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
else
decoder->record.op |= ARM_SPE_OP_LD;
if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_LDST;
+ decoder->record.op |= ARM_SPE_OP_SVE;
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
- decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
+ decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 8156aa04f82e59ce345fb44223d3d22ecbc149a7..08ef83cb8bf00be75459c78b9ee8a6cbf1971986 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -42,8 +42,7 @@ enum arm_spe_op_type {
ARM_SPE_OP_UNSPEC_REG = 1 << 9,
ARM_SPE_OP_NV_SYSREG = 1 << 10,
ARM_SPE_OP_SIMD_FP = 1 << 11,
- ARM_SPE_OP_SVE_OTHER = 1 << 12,
- ARM_SPE_OP_SVE_LDST = 1 << 13,
+ ARM_SPE_OP_SVE = 1 << 12,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -51,6 +50,7 @@ enum arm_spe_op_type {
ARM_SPE_OP_ATOMIC = 1 << 22,
ARM_SPE_OP_EXCL = 1 << 23,
ARM_SPE_OP_AR = 1 << 24,
+ ARM_SPE_OP_DP = 1 << 25, /* Data processing */
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 8,
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 71be979f507718caadc091714c40bcee073c1d60..88f24a8626861393defc89540e4126a124479699 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -346,10 +346,7 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *
{
struct simd_flags simd_flags = {};
- if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
- simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
-
- if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
+ if (record->op & ARM_SPE_OP_SVE)
simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 10/25] perf arm_spe: Report register access in record
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (8 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 09/25] perf arm_spe: Introduce data processing macro for SVE operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 11/25] perf arm_spe: Report MTE allocation tag " Leo Yan
` (14 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Record register access info for load / store operations.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6974f594f37c9916fff591ced1e9c2d60cf84f14..804dce129121b9d2600be01af7f1f2780a9d0fc9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -200,8 +200,19 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ST;
else
decoder->record.op |= ARM_SPE_OP_LD;
- if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload))
+
+ if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GP_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SIMD_FP;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
+ }
+
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 11/25] perf arm_spe: Report MTE allocation tag in record
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (9 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 10/25] perf arm_spe: Report register access in record Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 9:32 ` James Clark
2025-09-29 16:37 ` [PATCH 12/25] perf arm_spe: Report extended memory operations in records Leo Yan
` (13 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Save MTE tag info in memory record.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 804dce129121b9d2600be01af7f1f2780a9d0fc9..6696448bdf4f347e2032a1b4da46fcdd4016f9fc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -209,6 +209,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MTE_TAG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 08ef83cb8bf00be75459c78b9ee8a6cbf1971986..9b7b6c42505f43c32a188b6e9769390f057adce8 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -42,7 +42,8 @@ enum arm_spe_op_type {
ARM_SPE_OP_UNSPEC_REG = 1 << 9,
ARM_SPE_OP_NV_SYSREG = 1 << 10,
ARM_SPE_OP_SIMD_FP = 1 << 11,
- ARM_SPE_OP_SVE = 1 << 12,
+ ARM_SPE_OP_MTE_TAG = 1 << 12,
+ ARM_SPE_OP_SVE = 1 << 13,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 11/25] perf arm_spe: Report MTE allocation tag in record
2025-09-29 16:37 ` [PATCH 11/25] perf arm_spe: Report MTE allocation tag " Leo Yan
@ 2025-10-09 9:32 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 9:32 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Save MTE tag info in memory record.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 3 ++-
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> index 804dce129121b9d2600be01af7f1f2780a9d0fc9..6696448bdf4f347e2032a1b4da46fcdd4016f9fc 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> @@ -209,6 +209,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
> decoder->record.op |= ARM_SPE_OP_UNSPEC_REG;
> } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) {
> decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
> + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
> + decoder->record.op |= ARM_SPE_OP_MTE_TAG;
> } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
> decoder->record.op |= ARM_SPE_OP_SVE;
> }
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 08ef83cb8bf00be75459c78b9ee8a6cbf1971986..9b7b6c42505f43c32a188b6e9769390f057adce8 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -42,7 +42,8 @@ enum arm_spe_op_type {
> ARM_SPE_OP_UNSPEC_REG = 1 << 9,
> ARM_SPE_OP_NV_SYSREG = 1 << 10,
> ARM_SPE_OP_SIMD_FP = 1 << 11,
> - ARM_SPE_OP_SVE = 1 << 12,
> + ARM_SPE_OP_MTE_TAG = 1 << 12,
> + ARM_SPE_OP_SVE = 1 << 13,
Changing the bit position of ARM_SPE_OP_SVE looks like a mistake. Don't
we just need to add ARM_SPE_OP_MTE_TAG at bit 13?
>
> /* Assisted information for memory / SIMD */
> ARM_SPE_OP_LD = 1 << 20,
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 12/25] perf arm_spe: Report extended memory operations in records
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (10 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 11/25] perf arm_spe: Report MTE allocation tag " Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 13/25] perf arm_spe: Report associated info for SVE / SME operations Leo Yan
` (12 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Extended memory operations include atomic (AT), acquire/release (AR),
and exclusive (EXCL) operations. Save the relevant information
in the records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6696448bdf4f347e2032a1b4da46fcdd4016f9fc..949c20816826a4d45d6cf9a5efaa125acea90b0a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -211,6 +211,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_NV_SYSREG;
} else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) {
decoder->record.op |= ARM_SPE_OP_MTE_TAG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) {
+ if (payload & SPE_OP_PKT_AR)
+ decoder->record.op |= ARM_SPE_OP_AR;
+ if (payload & SPE_OP_PKT_EXCL)
+ decoder->record.op |= ARM_SPE_OP_EXCL;
+ if (payload & SPE_OP_PKT_AT)
+ decoder->record.op |= ARM_SPE_OP_ATOMIC;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 13/25] perf arm_spe: Report associated info for SVE / SME operations
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (11 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 12/25] perf arm_spe: Report extended memory operations in records Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 14/25] perf arm_spe: Report memset and memcpy in records Leo Yan
` (11 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
SVE / SME operations can be predicated or Gather load / scatter store,
save the relevant info into record.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 949c20816826a4d45d6cf9a5efaa125acea90b0a..5b214fc4ca9f67cf11700a18939f37f4a5400b84 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -220,6 +220,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ATOMIC;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ if (payload & SPE_OP_PKT_SVE_SG)
+ decoder->record.op |= ARM_SPE_OP_SG;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 9b7b6c42505f43c32a188b6e9769390f057adce8..87d45c11ad7483c167b4d9cb2e481a001f54e5cc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -52,6 +52,8 @@ enum arm_spe_op_type {
ARM_SPE_OP_EXCL = 1 << 23,
ARM_SPE_OP_AR = 1 << 24,
ARM_SPE_OP_DP = 1 << 25, /* Data processing */
+ ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
+ ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 8,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 14/25] perf arm_spe: Report memset and memcpy in records
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (12 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 13/25] perf arm_spe: Report associated info for SVE / SME operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 9:33 ` James Clark
2025-09-29 16:37 ` [PATCH 15/25] perf arm_spe: Report GCS in record Leo Yan
` (10 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Expose memset and memcpy related info in records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 +++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 5b214fc4ca9f67cf11700a18939f37f4a5400b84..6f0390d4089599cd6bbf1357fa4cd6ec8190a58c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -224,6 +224,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_PRED;
if (payload & SPE_OP_PKT_SVE_SG)
decoder->record.op |= ARM_SPE_OP_SG;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMCPY;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
+ decoder->record.op |= ARM_SPE_OP_MEMSET;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 87d45c11ad7483c167b4d9cb2e481a001f54e5cc..565458a7a6cbd94d67803cc122154cb5cab4f308 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -43,7 +43,9 @@ enum arm_spe_op_type {
ARM_SPE_OP_NV_SYSREG = 1 << 10,
ARM_SPE_OP_SIMD_FP = 1 << 11,
ARM_SPE_OP_MTE_TAG = 1 << 12,
- ARM_SPE_OP_SVE = 1 << 13,
+ ARM_SPE_OP_MEMCPY = 1 << 13,
+ ARM_SPE_OP_MEMSET = 1 << 14,
+ ARM_SPE_OP_SVE = 1 << 15,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 14/25] perf arm_spe: Report memset and memcpy in records
2025-09-29 16:37 ` [PATCH 14/25] perf arm_spe: Report memset and memcpy in records Leo Yan
@ 2025-10-09 9:33 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 9:33 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Expose memset and memcpy related info in records.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 +++-
> 2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> index 5b214fc4ca9f67cf11700a18939f37f4a5400b84..6f0390d4089599cd6bbf1357fa4cd6ec8190a58c 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> @@ -224,6 +224,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
> decoder->record.op |= ARM_SPE_OP_PRED;
> if (payload & SPE_OP_PKT_SVE_SG)
> decoder->record.op |= ARM_SPE_OP_SG;
> + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) {
> + decoder->record.op |= ARM_SPE_OP_MEMCPY;
> + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
> + decoder->record.op |= ARM_SPE_OP_MEMSET;
> }
>
> break;
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 87d45c11ad7483c167b4d9cb2e481a001f54e5cc..565458a7a6cbd94d67803cc122154cb5cab4f308 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -43,7 +43,9 @@ enum arm_spe_op_type {
> ARM_SPE_OP_NV_SYSREG = 1 << 10,
> ARM_SPE_OP_SIMD_FP = 1 << 11,
> ARM_SPE_OP_MTE_TAG = 1 << 12,
> - ARM_SPE_OP_SVE = 1 << 13,
> + ARM_SPE_OP_MEMCPY = 1 << 13,
> + ARM_SPE_OP_MEMSET = 1 << 14,
> + ARM_SPE_OP_SVE = 1 << 15,
Same comment as patch 11
>
> /* Assisted information for memory / SIMD */
> ARM_SPE_OP_LD = 1 << 20,
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 15/25] perf arm_spe: Report GCS in record
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (13 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 14/25] perf arm_spe: Report memset and memcpy in records Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 16/25] perf arm_spe: Expose SIMD information in other operations Leo Yan
` (9 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Report GCS related info in records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 +++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 6f0390d4089599cd6bbf1357fa4cd6ec8190a58c..649471abef6a4386e1b250a19cda4f4caeb0a2ff 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -228,6 +228,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_MEMCPY;
} else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) {
decoder->record.op |= ARM_SPE_OP_MEMSET;
+ } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) {
+ decoder->record.op |= ARM_SPE_OP_GCS;
+ if (payload & SPE_OP_PKT_GCS_COMM)
+ decoder->record.op |= ARM_SPE_OP_COMM;
}
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 565458a7a6cbd94d67803cc122154cb5cab4f308..40d734f284eb8d742de9c53e55a9a541d6d462b2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -45,7 +45,8 @@ enum arm_spe_op_type {
ARM_SPE_OP_MTE_TAG = 1 << 12,
ARM_SPE_OP_MEMCPY = 1 << 13,
ARM_SPE_OP_MEMSET = 1 << 14,
- ARM_SPE_OP_SVE = 1 << 15,
+ ARM_SPE_OP_GCS = 1 << 15,
+ ARM_SPE_OP_SVE = 1 << 16,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -56,6 +57,7 @@ enum arm_spe_op_type {
ARM_SPE_OP_DP = 1 << 25, /* Data processing */
ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
+ ARM_SPE_OP_COMM = 1 << 28, /* Common */
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 8,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 16/25] perf arm_spe: Expose SIMD information in other operations
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (14 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 15/25] perf arm_spe: Report GCS in record Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 17/25] perf arm_spe: Expose length for SVE and SME operations Leo Yan
` (8 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
The other operations contain SME data processing, ASE (Advanced SIMD)
and floating-point operations. Expose these info in the records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 18 +++++++++++++++++-
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 4 ++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 649471abef6a4386e1b250a19cda4f4caeb0a2ff..9e02b2bdd1177193996d071dd88f969e25b1ad86 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -237,8 +237,24 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case SPE_OP_PKT_HDR_CLASS_OTHER:
decoder->record.op |= ARM_SPE_OP_OTHER;
- if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload))
+ if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_SVE_PRED)
+ decoder->record.op |= ARM_SPE_OP_PRED;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
+ decoder->record.op |= ARM_SPE_OP_SME;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
+ if (payload & SPE_OP_PKT_OTHER_ASE)
+ decoder->record.op |= ARM_SPE_OP_ASE;
+ if (payload & SPE_OP_PKT_OTHER_FP)
+ decoder->record.op |= ARM_SPE_OP_FP;
+ if (payload & SPE_OP_PKT_COND)
+ decoder->record.op |= ARM_SPE_OP_COND;
+ }
break;
case SPE_OP_PKT_HDR_CLASS_BR_ERET:
decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 40d734f284eb8d742de9c53e55a9a541d6d462b2..d4574268cf793efe154f252b3e2af9c721bada97 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -47,6 +47,8 @@ enum arm_spe_op_type {
ARM_SPE_OP_MEMSET = 1 << 14,
ARM_SPE_OP_GCS = 1 << 15,
ARM_SPE_OP_SVE = 1 << 16,
+ ARM_SPE_OP_SME = 1 << 17,
+ ARM_SPE_OP_ASE = 1 << 18,
/* Assisted information for memory / SIMD */
ARM_SPE_OP_LD = 1 << 20,
@@ -58,6 +60,8 @@ enum arm_spe_op_type {
ARM_SPE_OP_PRED = 1 << 26, /* Predicated */
ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */
ARM_SPE_OP_COMM = 1 << 28, /* Common */
+ ARM_SPE_OP_FP = 1 << 29, /* Floating-point */
+ ARM_SPE_OP_COND = 1 << 30, /* Conditional */
/* Second level operation type for BRANCH_ERET */
ARM_SPE_OP_BR_COND = 1 << 8,
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 17/25] perf arm_spe: Expose length for SVE and SME operations
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (15 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 16/25] perf arm_spe: Expose SIMD information in other operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 9:45 ` James Clark
2025-09-29 16:37 ` [PATCH 18/25] perf arm_spe: Synthesize memory samples for SIMD operations Leo Yan
` (7 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Record length for SVE and SME operations into records.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 3 +++
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 +
2 files changed, 4 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 9e02b2bdd1177193996d071dd88f969e25b1ad86..82e3053131db62603553bd092388879ce9a9bcd4 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_ATOMIC;
} else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE;
+ decoder->record.length = SPE_OP_PKG_SVE_EVL(payload);
if (payload & SPE_OP_PKT_SVE_PRED)
decoder->record.op |= ARM_SPE_OP_PRED;
if (payload & SPE_OP_PKT_SVE_SG)
@@ -239,12 +240,14 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.op |= ARM_SPE_OP_OTHER;
if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
+ decoder->record.length = SPE_OP_PKG_SVE_EVL(payload);
if (payload & SPE_OP_PKT_OTHER_FP)
decoder->record.op |= ARM_SPE_OP_FP;
if (payload & SPE_OP_PKT_SVE_PRED)
decoder->record.op |= ARM_SPE_OP_PRED;
} else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
decoder->record.op |= ARM_SPE_OP_SME;
+ decoder->record.length = SPE_OP_PKG_SME_ETS(payload);
if (payload & SPE_OP_PKT_OTHER_FP)
decoder->record.op |= ARM_SPE_OP_FP;
} else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index d4574268cf793efe154f252b3e2af9c721bada97..b9288cd774bea2534d9991213c942dcbaf2a7232 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -122,6 +122,7 @@ struct arm_spe_record {
u64 phys_addr;
u64 context_id;
u16 source;
+ u32 length;
};
struct arm_spe_insn;
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 17/25] perf arm_spe: Expose length for SVE and SME operations
2025-09-29 16:37 ` [PATCH 17/25] perf arm_spe: Expose length for SVE and SME operations Leo Yan
@ 2025-10-09 9:45 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 9:45 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Record length for SVE and SME operations into records.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 3 +++
> tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 +
> 2 files changed, 4 insertions(+)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> index 9e02b2bdd1177193996d071dd88f969e25b1ad86..82e3053131db62603553bd092388879ce9a9bcd4 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
> @@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
> decoder->record.op |= ARM_SPE_OP_ATOMIC;
> } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) {
> decoder->record.op |= ARM_SPE_OP_SVE;
> + decoder->record.length = SPE_OP_PKG_SVE_EVL(payload);
> if (payload & SPE_OP_PKT_SVE_PRED)
> decoder->record.op |= ARM_SPE_OP_PRED;
> if (payload & SPE_OP_PKT_SVE_SG)
> @@ -239,12 +240,14 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
> decoder->record.op |= ARM_SPE_OP_OTHER;
> if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) {
> decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP;
> + decoder->record.length = SPE_OP_PKG_SVE_EVL(payload);
> if (payload & SPE_OP_PKT_OTHER_FP)
> decoder->record.op |= ARM_SPE_OP_FP;
> if (payload & SPE_OP_PKT_SVE_PRED)
> decoder->record.op |= ARM_SPE_OP_PRED;
> } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) {
> decoder->record.op |= ARM_SPE_OP_SME;
> + decoder->record.length = SPE_OP_PKG_SME_ETS(payload);
These are all assigned but never read. I'm not sure if that's a mistake
or they're just there for completeness. I don't think there's much point
in going for completeness unless it actually makes it all the way to the
user though. It would be better to leave it out and finish it another time.
> if (payload & SPE_OP_PKT_OTHER_FP)
> decoder->record.op |= ARM_SPE_OP_FP;
> } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) {
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index d4574268cf793efe154f252b3e2af9c721bada97..b9288cd774bea2534d9991213c942dcbaf2a7232 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -122,6 +122,7 @@ struct arm_spe_record {
> u64 phys_addr;
> u64 context_id;
> u16 source;
> + u32 length;
> };
>
> struct arm_spe_insn;
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 18/25] perf arm_spe: Synthesize memory samples for SIMD operations
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (16 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 17/25] perf arm_spe: Expose length for SVE and SME operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 19/25] perf/uapi: Extend data source fields Leo Yan
` (6 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Synthesize memory samples for SIMD operations (including Advanced SIMD,
SVE, and SME). To provide complete information, also generate data
source entries for SIMD operations.
Since memory operations are not limited to load and store, set
PERF_MEM_OP_STORE if the operation does not fall into these cases.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 88f24a8626861393defc89540e4126a124479699..bc233a5007d20e4dec11eeb1554adc1580f43718 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -39,6 +39,11 @@
#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \
+ ARM_SPE_OP_SME | ARM_SPE_OP_ASE)))
+
+#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op))
+
#define ARM_SPE_CACHE_EVENT(lvl) \
(ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS)
@@ -985,8 +990,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
{
union perf_mem_data_src data_src = {};
- /* Only synthesize data source for LDST operations */
- if (!is_ldst_op(record->op))
+ if (!is_mem_op(record->op))
return data_src;
if (record->op & ARM_SPE_OP_LD)
@@ -994,7 +998,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
else if (record->op & ARM_SPE_OP_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
- return data_src;
+ data_src.mem_op = PERF_MEM_OP_NA;
arm_spe__synth_ds(speq, record, &data_src);
arm_spe__synth_memory_level(speq, record, &data_src);
@@ -1095,11 +1099,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
- /*
- * When data_src is zero it means the record is not a memory operation,
- * skip to synthesize memory sample for this case.
- */
- if (spe->sample_memory && is_ldst_op(record->op)) {
+ if (spe->sample_memory && is_mem_op(record->op)) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 19/25] perf/uapi: Extend data source fields
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (17 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 18/25] perf arm_spe: Synthesize memory samples for SIMD operations Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 10:00 ` James Clark
2025-09-29 16:37 ` [PATCH 20/25] perf mem: Print extended fields Leo Yan
` (5 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Arm CPUs introduce several new types of memory operations, like MTE tag
accessing, system register access for nested virtualization, memcpy &
memset, and Guarded Control Stack (GCS).
For memory operation details, Arm SPE provides information like data
(parallel) processing, floating-point, predicated, atomic, exclusive,
acquire/release, gather/scatter, and conditional.
This commit introduces a field 'mem_op_ext' for extended operation type.
The extended operation type can be combined with the existed operation
type to express a memory type, for examples, a PERF_MEM_OP_GCS type can
be set along with PERF_MEM_OP_LOAD to present a load operation for
GCS register access.
Also use a field 'mem_aff' to store affiliate information.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
include/uapi/linux/perf_event.h | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 78a362b8002776e5ce83a0d7816601638c61ecc6..51ab37d44ac31fcdc4bc919c14d5f97e560d9339 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1309,14 +1309,18 @@ union perf_mem_data_src {
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_op_ext : 6, /* Extended type of opcode */
+ mem_aff : 8, /* Affiliate info */
+ mem_rsvd : 4;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 4,
+ mem_aff : 8, /* Affiliate info */
+ mem_op_ext : 6, /* Extended type of opcode */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
@@ -1426,6 +1430,26 @@ union perf_mem_data_src {
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Extended type of memory opcode: */
+#define PERF_MEM_EXT_OP_MTE_TAG 0x0001 /* MTE tag */
+#define PERF_MEM_EXT_OP_NESTED_VIRT 0x0002 /* Nested virtualization */
+#define PERF_MEM_EXT_OP_MEMCPY 0x0004 /* Memory copy */
+#define PERF_MEM_EXT_OP_MEMSET 0x0008 /* Memory set */
+#define PERF_MEM_EXT_OP_SIMD 0x0010 /* SIMD */
+#define PERF_MEM_EXT_OP_GCS 0x0020 /* Guarded Control Stack */
+#define PERF_MEM_EXT_OP_SHIFT 46
+
+/* Affiliate info */
+#define PERF_MEM_AFF_DP 0x0001 /* Data processing */
+#define PERF_MEM_AFF_FP 0x0002 /* Floating-point */
+#define PERF_MEM_AFF_PRED 0x0004 /* Predicated */
+#define PERF_MEM_AFF_ATOMIC 0x0008 /* Atomic */
+#define PERF_MEM_AFF_EXCLUSIVE 0x0010 /* Exclusive */
+#define PERF_MEM_AFF_AR 0x0020 /* Acquire/release */
+#define PERF_MEM_AFF_SG 0x0040 /* Gather/Scatter */
+#define PERF_MEM_AFF_CONDITIONAL 0x0080 /* Conditional */
+#define PERF_MEM_AFF_SHIFT 52
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 19/25] perf/uapi: Extend data source fields
2025-09-29 16:37 ` [PATCH 19/25] perf/uapi: Extend data source fields Leo Yan
@ 2025-10-09 10:00 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 10:00 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Arm CPUs introduce several new types of memory operations, like MTE tag
> accessing, system register access for nested virtualization, memcpy &
> memset, and Guarded Control Stack (GCS).
>
> For memory operation details, Arm SPE provides information like data
> (parallel) processing, floating-point, predicated, atomic, exclusive,
> acquire/release, gather/scatter, and conditional.
>
> This commit introduces a field 'mem_op_ext' for extended operation type.
> The extended operation type can be combined with the existed operation
> type to express a memory type, for examples, a PERF_MEM_OP_GCS type can
> be set along with PERF_MEM_OP_LOAD to present a load operation for
> GCS register access.
>
> Also use a field 'mem_aff' to store affiliate information.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> include/uapi/linux/perf_event.h | 28 ++++++++++++++++++++++++++--
> 1 file changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 78a362b8002776e5ce83a0d7816601638c61ecc6..51ab37d44ac31fcdc4bc919c14d5f97e560d9339 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -1309,14 +1309,18 @@ union perf_mem_data_src {
> mem_snoopx : 2, /* Snoop mode, ext */
> mem_blk : 3, /* Access blocked */
> mem_hops : 3, /* Hop level */
> - mem_rsvd : 18;
> + mem_op_ext : 6, /* Extended type of opcode */
Why have a 6 bit field containing 1 bit per thing when you can have 6
named 1 bit fields? That way you don't have to do a load of bitwise
magic to access it and you don't need separate #defines.
Also, when you set these, you never set more than one bit so they're
exclusive. Would a 3 bit enum be better than a 6 bit bitfield in this case?
> + mem_aff : 8, /* Affiliate info */
> + mem_rsvd : 4;
> };
> };
> #elif defined(__BIG_ENDIAN_BITFIELD)
> union perf_mem_data_src {
> __u64 val;
> struct {
> - __u64 mem_rsvd : 18,
> + __u64 mem_rsvd : 4,
> + mem_aff : 8, /* Affiliate info */
> + mem_op_ext : 6, /* Extended type of opcode */
> mem_hops : 3, /* Hop level */
> mem_blk : 3, /* Access blocked */
> mem_snoopx : 2, /* Snoop mode, ext */
> @@ -1426,6 +1430,26 @@ union perf_mem_data_src {
> /* 5-7 available */
> #define PERF_MEM_HOPS_SHIFT 43
>
> +/* Extended type of memory opcode: */
> +#define PERF_MEM_EXT_OP_MTE_TAG 0x0001 /* MTE tag */
> +#define PERF_MEM_EXT_OP_NESTED_VIRT 0x0002 /* Nested virtualization */
> +#define PERF_MEM_EXT_OP_MEMCPY 0x0004 /* Memory copy */
> +#define PERF_MEM_EXT_OP_MEMSET 0x0008 /* Memory set */
> +#define PERF_MEM_EXT_OP_SIMD 0x0010 /* SIMD */
> +#define PERF_MEM_EXT_OP_GCS 0x0020 /* Guarded Control Stack */
> +#define PERF_MEM_EXT_OP_SHIFT 46
> +
> +/* Affiliate info */
"Affiliate info" doesn't really describe what these are supposed to be,
or why they are separate. Is it implying that they're always set in
addition to another flag? Like "details" or "category"?
Either way, I feel that limitation might be a bit strict for the generic
uapi, or it needs to be described in more detail. If we change this
field to be individual bits like the other one, then maybe we can drop
that it's a separate group and it's just a bunch of bits you can set
however you like.
> +#define PERF_MEM_AFF_DP 0x0001 /* Data processing */
> +#define PERF_MEM_AFF_FP 0x0002 /* Floating-point */
> +#define PERF_MEM_AFF_PRED 0x0004 /* Predicated */
> +#define PERF_MEM_AFF_ATOMIC 0x0008 /* Atomic */
> +#define PERF_MEM_AFF_EXCLUSIVE 0x0010 /* Exclusive */
> +#define PERF_MEM_AFF_AR 0x0020 /* Acquire/release */
> +#define PERF_MEM_AFF_SG 0x0040 /* Gather/Scatter */
> +#define PERF_MEM_AFF_CONDITIONAL 0x0080 /* Conditional */
> +#define PERF_MEM_AFF_SHIFT 52
> +
> #define PERF_MEM_S(a, s) \
> (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 20/25] perf mem: Print extended fields
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (18 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 19/25] perf/uapi: Extend data source fields Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 10:02 ` James Clark
2025-09-29 16:37 ` [PATCH 21/25] perf arm_spe: Set extended fields in data source Leo Yan
` (4 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Print the extended operation types and affiliate info.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/include/uapi/linux/perf_event.h | 28 +++++++++++++++--
tools/perf/util/mem-events.c | 58 +++++++++++++++++++++++++++++++++--
2 files changed, 82 insertions(+), 4 deletions(-)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b8002776e5ce83a0d7816601638c61ecc6..51ab37d44ac31fcdc4bc919c14d5f97e560d9339 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1309,14 +1309,18 @@ union perf_mem_data_src {
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_op_ext : 6, /* Extended type of opcode */
+ mem_aff : 8, /* Affiliate info */
+ mem_rsvd : 4;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 4,
+ mem_aff : 8, /* Affiliate info */
+ mem_op_ext : 6, /* Extended type of opcode */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
@@ -1426,6 +1430,26 @@ union perf_mem_data_src {
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Extended type of memory opcode: */
+#define PERF_MEM_EXT_OP_MTE_TAG 0x0001 /* MTE tag */
+#define PERF_MEM_EXT_OP_NESTED_VIRT 0x0002 /* Nested virtualization */
+#define PERF_MEM_EXT_OP_MEMCPY 0x0004 /* Memory copy */
+#define PERF_MEM_EXT_OP_MEMSET 0x0008 /* Memory set */
+#define PERF_MEM_EXT_OP_SIMD 0x0010 /* SIMD */
+#define PERF_MEM_EXT_OP_GCS 0x0020 /* Guarded Control Stack */
+#define PERF_MEM_EXT_OP_SHIFT 46
+
+/* Affiliate info */
+#define PERF_MEM_AFF_DP 0x0001 /* Data processing */
+#define PERF_MEM_AFF_FP 0x0002 /* Floating-point */
+#define PERF_MEM_AFF_PRED 0x0004 /* Predicated */
+#define PERF_MEM_AFF_ATOMIC 0x0008 /* Atomic */
+#define PERF_MEM_AFF_EXCLUSIVE 0x0010 /* Exclusive */
+#define PERF_MEM_AFF_AR 0x0020 /* Acquire/release */
+#define PERF_MEM_AFF_SG 0x0040 /* Gather/Scatter */
+#define PERF_MEM_AFF_CONDITIONAL 0x0080 /* Conditional */
+#define PERF_MEM_AFF_SHIFT 52
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 80b3069427bc4bb5ffc3ab0856c01c76d9ba3ba6..2d052abfa39d841f75b4b16143641841d8577d0c 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -413,11 +413,13 @@ static const char * const mem_hops[] = {
static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
{
- u64 op = PERF_MEM_LOCK_NA;
+ u64 op = PERF_MEM_OP_NA, ext_op = 0;
int l;
- if (mem_info)
+ if (mem_info) {
op = mem_info__const_data_src(mem_info)->mem_op;
+ ext_op = mem_info__const_data_src(mem_info)->mem_op_ext;
+ }
if (op & PERF_MEM_OP_NA)
l = scnprintf(out, sz, "N/A");
@@ -432,6 +434,19 @@ static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *m
else
l = scnprintf(out, sz, "No");
+ if (ext_op & PERF_MEM_EXT_OP_MTE_TAG)
+ l += scnprintf(out + l, sz - l, " MTE");
+ else if (ext_op & PERF_MEM_EXT_OP_NESTED_VIRT)
+ l += scnprintf(out + l, sz - l, " NV");
+ else if (ext_op & PERF_MEM_EXT_OP_MEMCPY)
+ l += scnprintf(out + l, sz - l, " MEMCPY");
+ else if (ext_op & PERF_MEM_EXT_OP_MEMSET)
+ l += scnprintf(out + l, sz - l, " MEMSET");
+ else if (ext_op & PERF_MEM_EXT_OP_SIMD)
+ l += scnprintf(out + l, sz - l, " SIMD");
+ else if (ext_op & PERF_MEM_EXT_OP_GCS)
+ l += scnprintf(out + l, sz - l, " GCS");
+
return l;
}
@@ -600,6 +615,43 @@ int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf
return l;
}
+static int perf_mem__aff_scnprintf(char *out, size_t sz,
+ const struct mem_info *mem_info)
+{
+ size_t l = 0;
+ u64 mask = 0;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ mask = mem_info__const_data_src(mem_info)->mem_aff;
+
+ if (!mask) {
+ l += scnprintf(out + l, sz - l, " N/A");
+ return l;
+ }
+
+ if (mask & PERF_MEM_AFF_DP)
+ l += scnprintf(out + l, sz - l, " DP");
+ if (mask & PERF_MEM_AFF_FP)
+ l += scnprintf(out + l, sz - l, " FP");
+ if (mask & PERF_MEM_AFF_PRED)
+ l += scnprintf(out + l, sz - l, " PRED");
+ if (mask & PERF_MEM_AFF_ATOMIC)
+ l += scnprintf(out + l, sz - l, " ATOMIC");
+ if (mask & PERF_MEM_AFF_EXCLUSIVE)
+ l += scnprintf(out + l, sz - l, " EX");
+ if (mask & PERF_MEM_AFF_AR)
+ l += scnprintf(out + l, sz - l, " AR");
+ if (mask & PERF_MEM_AFF_SG)
+ l += scnprintf(out + l, sz - l, " SG");
+ if (mask & PERF_MEM_AFF_CONDITIONAL)
+ l += scnprintf(out + l, sz - l, " COND");
+
+ return l;
+}
+
int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *mem_info)
{
int i = 0;
@@ -616,6 +668,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *
i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
i += scnprintf(out + i, sz - i, "|BLK ");
i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|AFF ");
+ i += perf_mem__aff_scnprintf(out + i, sz - i, mem_info);
return i;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 20/25] perf mem: Print extended fields
2025-09-29 16:37 ` [PATCH 20/25] perf mem: Print extended fields Leo Yan
@ 2025-10-09 10:02 ` James Clark
2025-10-09 12:49 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 36+ messages in thread
From: James Clark @ 2025-10-09 10:02 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Print the extended operation types and affiliate info.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/include/uapi/linux/perf_event.h | 28 +++++++++++++++--
> tools/perf/util/mem-events.c | 58 +++++++++++++++++++++++++++++++++--
Minor nit: I would separate the commit to update the header and the
tools change. That way you can describe that it's a straight header
copy, rather than something else.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 20/25] perf mem: Print extended fields
2025-10-09 10:02 ` James Clark
@ 2025-10-09 12:49 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 36+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-10-09 12:49 UTC (permalink / raw)
To: James Clark
Cc: Leo Yan, Arnaldo Carvalho de Melo, linux-perf-users,
linux-arm-kernel, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On Thu, Oct 09, 2025 at 11:02:35AM +0100, James Clark wrote:
> On 29/09/2025 5:37 pm, Leo Yan wrote:
> > Print the extended operation types and affiliate info.
> >
> > Signed-off-by: Leo Yan <leo.yan@arm.com>
> > ---
> > tools/include/uapi/linux/perf_event.h | 28 +++++++++++++++--
> > tools/perf/util/mem-events.c | 58 +++++++++++++++++++++++++++++++++--
>
> Minor nit: I would separate the commit to update the header and the tools
> change. That way you can describe that it's a straight header copy, rather
> than something else.
Agreed.
- Arnaldo
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 21/25] perf arm_spe: Set extended fields in data source
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (19 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 20/25] perf mem: Print extended fields Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:37 ` [PATCH 22/25] perf sort: Support sort ASE and SME Leo Yan
` (3 subsequent siblings)
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Set extended operation type and affiliate info in the data source.
Before:
perf script -F,dso,sym,data_src | grep SIMD
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714341: 288100144 |OP STORE|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
After:
perf script -F,dso,sym,data_src | grep SIMD
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714341: 444000288100144 |OP STORE SIMD|LVL L1 hit|SNP None|TLB Walker hit|LCK No|BLK N/A|AFF PRED SG
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
sve-test 6516696.714344: 288800142 |OP LOAD|LVL L1 hit|SNP HitM|TLB Walker hit|LCK No|BLK N/A|AFF N/A
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index bc233a5007d20e4dec11eeb1554adc1580f43718..5e920fe77ccda6f398cab13fa1e4269ddab12fd9 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -1000,6 +1000,36 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq,
else
data_src.mem_op = PERF_MEM_OP_NA;
+ if (record->op & ARM_SPE_OP_MTE_TAG)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MTE_TAG;
+ else if (record->op & ARM_SPE_OP_NV_SYSREG)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_NESTED_VIRT;
+ else if (record->op & ARM_SPE_OP_MEMCPY)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MEMCPY;
+ else if (record->op & ARM_SPE_OP_MEMSET)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_MEMSET;
+ else if (record->op & ARM_SPE_OP_GCS)
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_GCS;
+ else if (is_simd_op(record->op))
+ data_src.mem_op_ext = PERF_MEM_EXT_OP_SIMD;
+
+ if (record->op & ARM_SPE_OP_DP)
+ data_src.mem_aff |= PERF_MEM_AFF_DP;
+ if (record->op & ARM_SPE_OP_FP)
+ data_src.mem_aff |= PERF_MEM_AFF_FP;
+ if (record->op & ARM_SPE_OP_PRED)
+ data_src.mem_aff |= PERF_MEM_AFF_PRED;
+ if (record->op & ARM_SPE_OP_ATOMIC)
+ data_src.mem_aff |= PERF_MEM_AFF_ATOMIC;
+ if (record->op & ARM_SPE_OP_EXCL)
+ data_src.mem_aff |= PERF_MEM_AFF_EXCLUSIVE;
+ if (record->op & ARM_SPE_OP_AR)
+ data_src.mem_aff |= PERF_MEM_AFF_AR;
+ if (record->op & ARM_SPE_OP_SG)
+ data_src.mem_aff |= PERF_MEM_AFF_SG;
+ if (record->op & ARM_SPE_OP_COND)
+ data_src.mem_aff |= PERF_MEM_AFF_CONDITIONAL;
+
arm_spe__synth_ds(speq, record, &data_src);
arm_spe__synth_memory_level(speq, record, &data_src);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 22/25] perf sort: Support sort ASE and SME
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (20 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 21/25] perf arm_spe: Set extended fields in data source Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-10-09 10:05 ` James Clark
2025-09-29 16:37 ` [PATCH 23/25] perf sort: Sort disabled and full predicated flags Leo Yan
` (2 subsequent siblings)
24 siblings, 1 reply; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Support sort Advance SIMD extension (ASE) and SME.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/sample.h | 9 ++++++---
tools/perf/util/sort.c | 4 ++++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef42105d08a59704ee75cd4852bbc5a..405876885e1f273c039cb67187c6e5c39c91a612 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,12 +67,15 @@ struct aux_sample {
};
struct simd_flags {
- u8 arch:1, /* architecture (isa) */
- pred:2; /* predication */
+ u8 arch: 3, /* architecture (isa) */
+ pred: 2, /* predication */
+ resv: 3; /* reserved */
};
/* simd architecture flags */
-#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
+#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* Arm SVE */
+#define SIMD_OP_FLAGS_ARCH_SME 0x02 /* Arm SME */
+#define SIMD_OP_FLAGS_ARCH_ASE 0x04 /* Arm Advanced SIMD */
/* simd predicate flags */
#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f3a565b0e2307a8adf159725f803df5fef0dff83..c7596e9ca089c170988dbc43a00f6973039261cd 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -195,6 +195,10 @@ static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
if (arch & SIMD_OP_FLAGS_ARCH_SVE)
return "SVE";
+ if (arch & SIMD_OP_FLAGS_ARCH_SME)
+ return "SME";
+ if (arch & SIMD_OP_FLAGS_ARCH_ASE)
+ return "ASE";
else
return "n/a";
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 22/25] perf sort: Support sort ASE and SME
2025-09-29 16:37 ` [PATCH 22/25] perf sort: Support sort ASE and SME Leo Yan
@ 2025-10-09 10:05 ` James Clark
0 siblings, 0 replies; 36+ messages in thread
From: James Clark @ 2025-10-09 10:05 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter
On 29/09/2025 5:37 pm, Leo Yan wrote:
> Support sort Advance SIMD extension (ASE) and SME.
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> tools/perf/util/sample.h | 9 ++++++---
> tools/perf/util/sort.c | 4 ++++
> 2 files changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index fae834144ef42105d08a59704ee75cd4852bbc5a..405876885e1f273c039cb67187c6e5c39c91a612 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -67,12 +67,15 @@ struct aux_sample {
> };
>
> struct simd_flags {
> - u8 arch:1, /* architecture (isa) */
> - pred:2; /* predication */
> + u8 arch: 3, /* architecture (isa) */
> + pred: 2, /* predication */
> + resv: 3; /* reserved */
> };
>
> /* simd architecture flags */
> -#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
> +#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* Arm SVE */
> +#define SIMD_OP_FLAGS_ARCH_SME 0x02 /* Arm SME */
> +#define SIMD_OP_FLAGS_ARCH_ASE 0x04 /* Arm Advanced SIMD */
Same comment as the UAPI change. These are exclusive so it can be an
enum rather than separate bits.
>
> /* simd predicate flags */
> #define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index f3a565b0e2307a8adf159725f803df5fef0dff83..c7596e9ca089c170988dbc43a00f6973039261cd 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -195,6 +195,10 @@ static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
>
> if (arch & SIMD_OP_FLAGS_ARCH_SVE)
> return "SVE";
> + if (arch & SIMD_OP_FLAGS_ARCH_SME)
> + return "SME";
> + if (arch & SIMD_OP_FLAGS_ARCH_ASE)
> + return "ASE";
> else
> return "n/a";
> }
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 23/25] perf sort: Sort disabled and full predicated flags
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (21 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 22/25] perf sort: Support sort ASE and SME Leo Yan
@ 2025-09-29 16:37 ` Leo Yan
2025-09-29 16:38 ` [PATCH 24/25] perf report: Update document for SIMD flags Leo Yan
2025-09-29 16:38 ` [PATCH 25/25] perf arm_spe: Improve SIMD flags setting Leo Yan
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
According to the Arm ARM (ARM DDI 0487, L.a), section D18.2.6
"Events packet", apart from the empty predicate and partial
predicates, an SVE or SME operation can be predicate-disabled
or fully predicated.
To provide reliable results, introduce two predicate types for
these cases.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/sample.h | 6 ++++--
tools/perf/util/sort.c | 11 ++++++++---
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 405876885e1f273c039cb67187c6e5c39c91a612..d38810021fb9d6cb29f02267ae2a223cbe55839d 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -68,8 +68,8 @@ struct aux_sample {
struct simd_flags {
u8 arch: 3, /* architecture (isa) */
- pred: 2, /* predication */
- resv: 3; /* reserved */
+ pred: 4, /* predication */
+ resv: 1; /* reserved */
};
/* simd architecture flags */
@@ -80,6 +80,8 @@ struct simd_flags {
/* simd predicate flags */
#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
+#define SIMD_OP_FLAGS_PRED_FULL 0x04 /* full predicate */
+#define SIMD_OP_FLAGS_PRED_DISABLED 0x08 /* disabled predicate */
struct perf_sample {
u64 ip;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c7596e9ca089c170988dbc43a00f6973039261cd..44c2d2623d529cde78ea16f281a6501c2bde9c66 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -207,6 +207,7 @@ static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
const char *name;
+ const char *pred_str = ".";
if (!he->simd_flags.arch)
return repsep_snprintf(bf, size, "");
@@ -214,11 +215,15 @@ static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
name = hist_entry__get_simd_name(&he->simd_flags);
if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
- return repsep_snprintf(bf, size, "[e] %s", name);
+ pred_str = "e";
else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
- return repsep_snprintf(bf, size, "[p] %s", name);
+ pred_str = "p";
+ else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_DISABLED)
+ pred_str = "d";
+ else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_FULL)
+ pred_str = "f";
- return repsep_snprintf(bf, size, "[.] %s", name);
+ return repsep_snprintf(bf, size, "[%s] %s", pred_str, name);
}
struct sort_entry sort_simd = {
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 24/25] perf report: Update document for SIMD flags
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (22 preceding siblings ...)
2025-09-29 16:37 ` [PATCH 23/25] perf sort: Sort disabled and full predicated flags Leo Yan
@ 2025-09-29 16:38 ` Leo Yan
2025-09-29 16:38 ` [PATCH 25/25] perf arm_spe: Improve SIMD flags setting Leo Yan
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Update SIMD architecture and predicate flags.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/Documentation/perf-report.txt | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index acef3ff4178eff66e8f876ae16cdac7b1387f07b..f361081a65dbe9cead539c7cb81d6ed86eb0acc6 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -136,7 +136,10 @@ OPTIONS
- addr: (Full) virtual address of the sampled instruction
- retire_lat: On X86, this reports pipeline stall of this instruction compared
to the previous instruction in cycles. And currently supported only on X86
- - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
+ - simd: Flags describing a SIMD operation. The architecture type can be Arm's
+ ASE (Advanced SIMD extension), SVE, SME. It provides an extra tag for
+ predicate: "e" for empty predicate, "p" for partial predicate, "d" for
+ predicate disabled, and "f" for full predicate.
- type: Data type of sample memory access.
- typeoff: Offset in the data type of sample memory access.
- symoff: Offset in the symbol.
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 25/25] perf arm_spe: Improve SIMD flags setting
2025-09-29 16:37 [PATCH 00/25] perf arm_spe: Extend operations Leo Yan
` (23 preceding siblings ...)
2025-09-29 16:38 ` [PATCH 24/25] perf report: Update document for SIMD flags Leo Yan
@ 2025-09-29 16:38 ` Leo Yan
24 siblings, 0 replies; 36+ messages in thread
From: Leo Yan @ 2025-09-29 16:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark
Cc: Arnaldo Carvalho de Melo, linux-perf-users, linux-arm-kernel,
Leo Yan
Fill in ASE and SME operations for the SIMD arch field.
Also set the predicate flags for SVE and SME, but differences between
them: SME does not have a predicate flag, so the setting is based on
events. SVE provides a predicate flag to indicate whether the predicate
is disabled, which allows it to be distinguished into four cases: full
predicates, empty predicates, fully predicated, and disabled predicates.
After:
perf report -s,+smid
...
0.06% 0.06% sve-test sve-test [.] setz [p] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] do_raw_spin_lock
0.06% 0.06% sve-test sve-test [.] getz [p] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] timekeeping_advance
0.06% 0.06% sve-test sve-test [.] getz [d] SVE
0.06% 0.06% sve-test [kernel.kallsyms] [k] update_load_avg
0.06% 0.06% sve-test sve-test [.] getz [e] SVE
0.05% 0.05% sve-test sve-test [.] setz [e] SVE
0.05% 0.05% sve-test [kernel.kallsyms] [k] update_curr
0.05% 0.05% sve-test sve-test [.] setz [d] SVE
0.05% 0.05% sve-test [kernel.kallsyms] [k] do_raw_spin_unlock
0.05% 0.05% sve-test [kernel.kallsyms] [k] timekeeping_update_from_shadow.constprop.0
0.05% 0.05% sve-test sve-test [.] getz [f] SVE
0.05% 0.05% sve-test sve-test [.] setz [f] SVE
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
tools/perf/util/arm-spe.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 5e920fe77ccda6f398cab13fa1e4269ddab12fd9..969166dcc956cfe191b8d40fb388ce78e797649b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -353,12 +353,26 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *
if (record->op & ARM_SPE_OP_SVE)
simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
-
- if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
- simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
-
- if (record->type & ARM_SPE_SVE_EMPTY_PRED)
- simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
+ else if (record->op & ARM_SPE_OP_SME)
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SME;
+ else if (record->op & (ARM_SPE_OP_ASE | ARM_SPE_OP_SIMD_FP))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_ASE;
+
+ if (record->op & ARM_SPE_OP_SVE) {
+ if (!(record->op & ARM_SPE_OP_PRED))
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_DISABLED;
+ else if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL;
+ else if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY;
+ else
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_FULL;
+ } else {
+ if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_PARTIAL;
+ else if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred = SIMD_OP_FLAGS_PRED_EMPTY;
+ }
return simd_flags;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread