* [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
@ 2025-04-08 12:28 Yicong Yang
2025-04-22 9:01 ` Yicong Yang
2025-04-22 9:16 ` James Clark
0 siblings, 2 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-08 12:28 UTC (permalink / raw)
To: acme, namhyung, catalin.marinas, will, peterz, mingo,
mark.rutland, jolsa, john.g.garry, james.clark, leo.yan, irogers,
linux-arm-kernel, linux-perf-users
Cc: jonathan.cameron, hejunhao3, linuxarm, wangyushan12, caijingtao,
xueshan2, prime.zeng, yangyicong
From: Yicong Yang <yangyicong@hisilicon.com>
Add data source encoding for HiSilicon HIP12 and coresponding mapping
to the perf's memory data source. This will help to synthesize the data
and support upper layer tools like perf-mem and perf-c2c.
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
arch/arm64/include/asm/cputype.h | 2 +
tools/arch/arm64/include/asm/cputype.h | 2 +
.../util/arm-spe-decoder/arm-spe-decoder.h | 17 +++
tools/perf/util/arm-spe.c | 101 ++++++++++++++++++
4 files changed, 122 insertions(+)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index d1cc0571798b..36c5bbfbb6e9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -133,6 +133,7 @@
#define HISI_CPU_PART_TSV110 0xD01
#define HISI_CPU_PART_HIP09 0xD02
+#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
@@ -220,6 +221,7 @@
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 488f8e751349..9a5d85cfd1fb 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -129,6 +129,7 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
@@ -202,6 +203,7 @@
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 5d232188643b..0410abed5009 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
ARM_SPE_AMPEREONE_L2D = 0x9,
};
+enum arm_spe_hisi_hip_data_source {
+ ARM_SPE_HISI_HIP_PEER_CPU = 0,
+ ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
+ ARM_SPE_HISI_HIP_L3 = 2,
+ ARM_SPE_HISI_HIP_L3_HITM = 3,
+ ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
+ ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
+ ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
+ ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
+ ARM_SPE_HISI_HIP_LOCAL = 8,
+ ARM_SPE_HISI_HIP_REMOTE = 9,
+ ARM_SPE_HISI_HIP_NC_DEV = 13,
+ ARM_SPE_HISI_HIP_L2 = 16,
+ ARM_SPE_HISI_HIP_L2_HITM = 17,
+ ARM_SPE_HISI_HIP_L1 = 18,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 2a9775649cc2..eceae4219601 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
{},
};
+static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
+ {},
+};
+
static void arm_spe__sample_flags(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
@@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
arm_spe__synth_data_source_common(&common_record, data_src);
}
+static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ /* Use common synthesis method to handle store operations */
+ if (record->op & ARM_SPE_OP_ST) {
+ arm_spe__synth_data_source_common(record, data_src);
+ return;
+ }
+
+ switch (record->source) {
+ case ARM_SPE_HISI_HIP_PEER_CPU:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_L3:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_L3_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CLUSTER:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_LOCAL:
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ break;
+ case ARM_SPE_HISI_HIP_NC_DEV:
+ data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_L2:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_L2_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_L1:
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ default:
+ break;
+ }
+}
+
static const struct data_source_handle data_source_handles[] = {
DS(common_ds_encoding_cpus, data_source_common),
DS(ampereone_ds_encoding_cpus, data_source_ampereone),
+ DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
};
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
@ 2025-04-22 9:01 ` Yicong Yang
2025-04-22 9:16 ` James Clark
1 sibling, 0 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-22 9:01 UTC (permalink / raw)
To: acme, namhyung, catalin.marinas, will, peterz, mingo,
mark.rutland, jolsa, john.g.garry, james.clark, leo.yan, irogers,
linux-arm-kernel, linux-perf-users
Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
caijingtao, xueshan2, prime.zeng
A gentle ping for this straightforward support.
Thanks.
On 2025/4/8 20:28, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
>
> Add data source encoding for HiSilicon HIP12 and coresponding mapping
> to the perf's memory data source. This will help to synthesize the data
> and support upper layer tools like perf-mem and perf-c2c.
>
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
> arch/arm64/include/asm/cputype.h | 2 +
> tools/arch/arm64/include/asm/cputype.h | 2 +
> .../util/arm-spe-decoder/arm-spe-decoder.h | 17 +++
> tools/perf/util/arm-spe.c | 101 ++++++++++++++++++
> 4 files changed, 122 insertions(+)
>
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index d1cc0571798b..36c5bbfbb6e9 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -133,6 +133,7 @@
>
> #define HISI_CPU_PART_TSV110 0xD01
> #define HISI_CPU_PART_HIP09 0xD02
> +#define HISI_CPU_PART_HIP12 0xD06
>
> #define APPLE_CPU_PART_M1_ICESTORM 0x022
> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
> @@ -220,6 +221,7 @@
> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
> index 488f8e751349..9a5d85cfd1fb 100644
> --- a/tools/arch/arm64/include/asm/cputype.h
> +++ b/tools/arch/arm64/include/asm/cputype.h
> @@ -129,6 +129,7 @@
> #define FUJITSU_CPU_PART_A64FX 0x001
>
> #define HISI_CPU_PART_TSV110 0xD01
> +#define HISI_CPU_PART_HIP12 0xD06
>
> #define APPLE_CPU_PART_M1_ICESTORM 0x022
> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
> @@ -202,6 +203,7 @@
> #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 5d232188643b..0410abed5009 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
> ARM_SPE_AMPEREONE_L2D = 0x9,
> };
>
> +enum arm_spe_hisi_hip_data_source {
> + ARM_SPE_HISI_HIP_PEER_CPU = 0,
> + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
> + ARM_SPE_HISI_HIP_L3 = 2,
> + ARM_SPE_HISI_HIP_L3_HITM = 3,
> + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
> + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
> + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
> + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
> + ARM_SPE_HISI_HIP_LOCAL = 8,
> + ARM_SPE_HISI_HIP_REMOTE = 9,
> + ARM_SPE_HISI_HIP_NC_DEV = 13,
> + ARM_SPE_HISI_HIP_L2 = 16,
> + ARM_SPE_HISI_HIP_L2_HITM = 17,
> + ARM_SPE_HISI_HIP_L1 = 18,
> +};
> +
> struct arm_spe_record {
> enum arm_spe_sample_type type;
> int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 2a9775649cc2..eceae4219601 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
> {},
> };
>
> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> + {},
> +};
> +
> static void arm_spe__sample_flags(struct arm_spe_queue *speq)
> {
> const struct arm_spe_record *record = &speq->decoder->record;
> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
> arm_spe__synth_data_source_common(&common_record, data_src);
> }
>
> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> + union perf_mem_data_src *data_src)
> +{
> + /* Use common synthesis method to handle store operations */
> + if (record->op & ARM_SPE_OP_ST) {
> + arm_spe__synth_data_source_common(record, data_src);
> + return;
> + }
> +
> + switch (record->source) {
> + case ARM_SPE_HISI_HIP_PEER_CPU:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_L3:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_HISI_HIP_L3_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_LOCAL:
> + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_HISI_HIP_REMOTE:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> + break;
> + case ARM_SPE_HISI_HIP_NC_DEV:
> + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_HISI_HIP_L2:
> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_HISI_HIP_L2_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_L1:
> + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + default:
> + break;
> + }
> +}
> +
> static const struct data_source_handle data_source_handles[] = {
> DS(common_ds_encoding_cpus, data_source_common),
> DS(ampereone_ds_encoding_cpus, data_source_ampereone),
> + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
> };
>
> static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
2025-04-22 9:01 ` Yicong Yang
@ 2025-04-22 9:16 ` James Clark
2025-04-22 10:11 ` Yicong Yang
2025-04-22 10:29 ` Leo Yan
1 sibling, 2 replies; 10+ messages in thread
From: James Clark @ 2025-04-22 9:16 UTC (permalink / raw)
To: Yicong Yang
Cc: jonathan.cameron, hejunhao3, linuxarm, wangyushan12, caijingtao,
xueshan2, prime.zeng, yangyicong, acme, namhyung, catalin.marinas,
will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
irogers, linux-arm-kernel, linux-perf-users
On 08/04/2025 1:28 pm, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
>
> Add data source encoding for HiSilicon HIP12 and coresponding mapping
> to the perf's memory data source. This will help to synthesize the data
> and support upper layer tools like perf-mem and perf-c2c.
>
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
> arch/arm64/include/asm/cputype.h | 2 +
> tools/arch/arm64/include/asm/cputype.h | 2 +
> .../util/arm-spe-decoder/arm-spe-decoder.h | 17 +++
> tools/perf/util/arm-spe.c | 101 ++++++++++++++++++
> 4 files changed, 122 insertions(+)
>
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index d1cc0571798b..36c5bbfbb6e9 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -133,6 +133,7 @@
>
> #define HISI_CPU_PART_TSV110 0xD01
> #define HISI_CPU_PART_HIP09 0xD02
> +#define HISI_CPU_PART_HIP12 0xD06
>
> #define APPLE_CPU_PART_M1_ICESTORM 0x022
> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
> @@ -220,6 +221,7 @@
> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
> index 488f8e751349..9a5d85cfd1fb 100644
> --- a/tools/arch/arm64/include/asm/cputype.h
> +++ b/tools/arch/arm64/include/asm/cputype.h
> @@ -129,6 +129,7 @@
> #define FUJITSU_CPU_PART_A64FX 0x001
>
> #define HISI_CPU_PART_TSV110 0xD01
> +#define HISI_CPU_PART_HIP12 0xD06
>
> #define APPLE_CPU_PART_M1_ICESTORM 0x022
> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
> @@ -202,6 +203,7 @@
> #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 5d232188643b..0410abed5009 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
> ARM_SPE_AMPEREONE_L2D = 0x9,
> };
>
> +enum arm_spe_hisi_hip_data_source {
> + ARM_SPE_HISI_HIP_PEER_CPU = 0,
> + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
> + ARM_SPE_HISI_HIP_L3 = 2,
> + ARM_SPE_HISI_HIP_L3_HITM = 3,
> + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
> + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
> + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
> + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
> + ARM_SPE_HISI_HIP_LOCAL = 8,
> + ARM_SPE_HISI_HIP_REMOTE = 9,
> + ARM_SPE_HISI_HIP_NC_DEV = 13,
> + ARM_SPE_HISI_HIP_L2 = 16,
> + ARM_SPE_HISI_HIP_L2_HITM = 17,
> + ARM_SPE_HISI_HIP_L1 = 18,
> +};
> +
> struct arm_spe_record {
> enum arm_spe_sample_type type;
> int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 2a9775649cc2..eceae4219601 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
> {},
> };
>
> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> + {},
> +};
> +
> static void arm_spe__sample_flags(struct arm_spe_queue *speq)
> {
> const struct arm_spe_record *record = &speq->decoder->record;
> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
> arm_spe__synth_data_source_common(&common_record, data_src);
> }
>
> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> + union perf_mem_data_src *data_src)
> +{
> + /* Use common synthesis method to handle store operations */
> + if (record->op & ARM_SPE_OP_ST) {
> + arm_spe__synth_data_source_common(record, data_src);
> + return;
> + }
> +
> + switch (record->source) {
> + case ARM_SPE_HISI_HIP_PEER_CPU:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_L3:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_HISI_HIP_L3_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
Hi Yicong,
Is the mem_snoop setting missing from this one?
Other than that, LGTM.
Thanks
James
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-22 9:16 ` James Clark
@ 2025-04-22 10:11 ` Yicong Yang
2025-04-22 10:29 ` Leo Yan
1 sibling, 0 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-22 10:11 UTC (permalink / raw)
To: James Clark
Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
caijingtao, xueshan2, prime.zeng, acme, namhyung, catalin.marinas,
will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
irogers, linux-arm-kernel, linux-perf-users
On 2025/4/22 17:16, James Clark wrote:
>
>
> On 08/04/2025 1:28 pm, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> Add data source encoding for HiSilicon HIP12 and coresponding mapping
>> to the perf's memory data source. This will help to synthesize the data
>> and support upper layer tools like perf-mem and perf-c2c.
>>
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>> arch/arm64/include/asm/cputype.h | 2 +
>> tools/arch/arm64/include/asm/cputype.h | 2 +
>> .../util/arm-spe-decoder/arm-spe-decoder.h | 17 +++
>> tools/perf/util/arm-spe.c | 101 ++++++++++++++++++
>> 4 files changed, 122 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index d1cc0571798b..36c5bbfbb6e9 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -133,6 +133,7 @@
>> #define HISI_CPU_PART_TSV110 0xD01
>> #define HISI_CPU_PART_HIP09 0xD02
>> +#define HISI_CPU_PART_HIP12 0xD06
>> #define APPLE_CPU_PART_M1_ICESTORM 0x022
>> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
>> @@ -220,6 +221,7 @@
>> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>> #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
>> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
>> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
>> index 488f8e751349..9a5d85cfd1fb 100644
>> --- a/tools/arch/arm64/include/asm/cputype.h
>> +++ b/tools/arch/arm64/include/asm/cputype.h
>> @@ -129,6 +129,7 @@
>> #define FUJITSU_CPU_PART_A64FX 0x001
>> #define HISI_CPU_PART_TSV110 0xD01
>> +#define HISI_CPU_PART_HIP12 0xD06
>> #define APPLE_CPU_PART_M1_ICESTORM 0x022
>> #define APPLE_CPU_PART_M1_FIRESTORM 0x023
>> @@ -202,6 +203,7 @@
>> #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
>> #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>> #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>> #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>> #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>> #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 5d232188643b..0410abed5009 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>> ARM_SPE_AMPEREONE_L2D = 0x9,
>> };
>> +enum arm_spe_hisi_hip_data_source {
>> + ARM_SPE_HISI_HIP_PEER_CPU = 0,
>> + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
>> + ARM_SPE_HISI_HIP_L3 = 2,
>> + ARM_SPE_HISI_HIP_L3_HITM = 3,
>> + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
>> + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
>> + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
>> + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
>> + ARM_SPE_HISI_HIP_LOCAL = 8,
>> + ARM_SPE_HISI_HIP_REMOTE = 9,
>> + ARM_SPE_HISI_HIP_NC_DEV = 13,
>> + ARM_SPE_HISI_HIP_L2 = 16,
>> + ARM_SPE_HISI_HIP_L2_HITM = 17,
>> + ARM_SPE_HISI_HIP_L1 = 18,
>> +};
>> +
>> struct arm_spe_record {
>> enum arm_spe_sample_type type;
>> int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 2a9775649cc2..eceae4219601 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>> {},
>> };
>> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
>> + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
>> + {},
>> +};
>> +
>> static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>> {
>> const struct arm_spe_record *record = &speq->decoder->record;
>> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>> arm_spe__synth_data_source_common(&common_record, data_src);
>> }
>> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
>> + union perf_mem_data_src *data_src)
>> +{
>> + /* Use common synthesis method to handle store operations */
>> + if (record->op & ARM_SPE_OP_ST) {
>> + arm_spe__synth_data_source_common(record, data_src);
>> + return;
>> + }
>> +
>> + switch (record->source) {
>> + case ARM_SPE_HISI_HIP_PEER_CPU:
>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_HISI_HIP_L3:
>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + break;
>> + case ARM_SPE_HISI_HIP_L3_HITM:
>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>
> Hi Yicong,
>
> Is the mem_snoop setting missing from this one?
>
it should be PERF_MEM_SNOOP_NONE, will fix. Thanks for catching this.
> Other than that, LGTM.
>
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-22 9:16 ` James Clark
2025-04-22 10:11 ` Yicong Yang
@ 2025-04-22 10:29 ` Leo Yan
2025-04-22 12:31 ` Yicong Yang
1 sibling, 1 reply; 10+ messages in thread
From: Leo Yan @ 2025-04-22 10:29 UTC (permalink / raw)
To: James Clark
Cc: Yicong Yang, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
caijingtao, xueshan2, prime.zeng, yangyicong, acme, namhyung,
catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
john.g.garry, leo.yan, irogers, linux-arm-kernel,
linux-perf-users
On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
> >
> > Add data source encoding for HiSilicon HIP12 and coresponding mapping
> > to the perf's memory data source. This will help to synthesize the data
> > and support upper layer tools like perf-mem and perf-c2c.
> >
> > Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> > ---
> > arch/arm64/include/asm/cputype.h | 2 +
> > tools/arch/arm64/include/asm/cputype.h | 2 +
Please split into two patches. One is a kernel patch and another is a
tool patch.
[...]
> > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
> > ARM_SPE_AMPEREONE_L2D = 0x9,
> > };
> > +enum arm_spe_hisi_hip_data_source {
> > + ARM_SPE_HISI_HIP_PEER_CPU = 0,
> > + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
> > + ARM_SPE_HISI_HIP_L3 = 2,
> > + ARM_SPE_HISI_HIP_L3_HITM = 3,
> > + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
> > + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
> > + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
> > + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
> > + ARM_SPE_HISI_HIP_LOCAL = 8,
> > + ARM_SPE_HISI_HIP_REMOTE = 9,
> > + ARM_SPE_HISI_HIP_NC_DEV = 13,
> > + ARM_SPE_HISI_HIP_L2 = 16,
> > + ARM_SPE_HISI_HIP_L2_HITM = 17,
> > + ARM_SPE_HISI_HIP_L1 = 18,
> > +};
> > +
> > struct arm_spe_record {
> > enum arm_spe_sample_type type;
> > int err;
> > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> > index 2a9775649cc2..eceae4219601 100644
> > --- a/tools/perf/util/arm-spe.c
> > +++ b/tools/perf/util/arm-spe.c
> > @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
> > {},
> > };
> > +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> > + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> > + {},
> > +};
> > +
> > static void arm_spe__sample_flags(struct arm_spe_queue *speq)
> > {
> > const struct arm_spe_record *record = &speq->decoder->record;
> > @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
> > arm_spe__synth_data_source_common(&common_record, data_src);
> > }
> > +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> > + union perf_mem_data_src *data_src)
> > +{
> > + /* Use common synthesis method to handle store operations */
> > + if (record->op & ARM_SPE_OP_ST) {
> > + arm_spe__synth_data_source_common(record, data_src);
> > + return;
> > + }
> > +
> > + switch (record->source) {
> > + case ARM_SPE_HISI_HIP_PEER_CPU:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
'PERF_MEM_SNOOPX_PEER'.
Remove setting 'PERF_MEM_SNOOP_NONE' in this case?
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_L3:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + break;
> > + case ARM_SPE_HISI_HIP_L3_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
Seems to me, a CPU has L3 cache, would the cluster has a higher level's
cache?
> > + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
Ditto for the confliction for the two flags 'SNOOP_NONE' and
'SNOOPX_PEER'.
> > + break;
> > + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > + break;
> > + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> > + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> > + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> > + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> > + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>
> Hi Yicong,
>
> Is the mem_snoop setting missing from this one?
The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
Thanks,
Leo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-22 10:29 ` Leo Yan
@ 2025-04-22 12:31 ` Yicong Yang
2025-04-22 13:20 ` Leo Yan
0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-22 12:31 UTC (permalink / raw)
To: Leo Yan, James Clark
Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
caijingtao, xueshan2, prime.zeng, acme, namhyung, catalin.marinas,
will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
irogers, linux-arm-kernel, linux-perf-users
On 2025/4/22 18:29, Leo Yan wrote:
> On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
>>>
>>> Add data source encoding for HiSilicon HIP12 and coresponding mapping
>>> to the perf's memory data source. This will help to synthesize the data
>>> and support upper layer tools like perf-mem and perf-c2c.
>>>
>>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>>> ---
>>> arch/arm64/include/asm/cputype.h | 2 +
>>> tools/arch/arm64/include/asm/cputype.h | 2 +
>
> Please split into two patches. One is a kernel patch and another is a
> tool patch.
will do in next version.
>
> [...]
>
>>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>>> ARM_SPE_AMPEREONE_L2D = 0x9,
>>> };
>>> +enum arm_spe_hisi_hip_data_source {
>>> + ARM_SPE_HISI_HIP_PEER_CPU = 0,
>>> + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
>>> + ARM_SPE_HISI_HIP_L3 = 2,
>>> + ARM_SPE_HISI_HIP_L3_HITM = 3,
>>> + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
>>> + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
>>> + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
>>> + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
>>> + ARM_SPE_HISI_HIP_LOCAL = 8,
>>> + ARM_SPE_HISI_HIP_REMOTE = 9,
>>> + ARM_SPE_HISI_HIP_NC_DEV = 13,
>>> + ARM_SPE_HISI_HIP_L2 = 16,
>>> + ARM_SPE_HISI_HIP_L2_HITM = 17,
>>> + ARM_SPE_HISI_HIP_L1 = 18,
>>> +};
>>> +
>>> struct arm_spe_record {
>>> enum arm_spe_sample_type type;
>>> int err;
>>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>>> index 2a9775649cc2..eceae4219601 100644
>>> --- a/tools/perf/util/arm-spe.c
>>> +++ b/tools/perf/util/arm-spe.c
>>> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>>> {},
>>> };
>>> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
>>> + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
>>> + {},
>>> +};
>>> +
>>> static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>>> {
>>> const struct arm_spe_record *record = &speq->decoder->record;
>>> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>>> arm_spe__synth_data_source_common(&common_record, data_src);
>>> }
>>> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
>>> + union perf_mem_data_src *data_src)
>>> +{
>>> + /* Use common synthesis method to handle store operations */
>>> + if (record->op & ARM_SPE_OP_ST) {
>>> + arm_spe__synth_data_source_common(record, data_src);
>>> + return;
>>> + }
>>> +
>>> + switch (record->source) {
>>> + case ARM_SPE_HISI_HIP_PEER_CPU:
>>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>
> Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
> 'PERF_MEM_SNOOPX_PEER'.
>
> Remove setting 'PERF_MEM_SNOOP_NONE' in this case?
>
>>> + break;
>>> + case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
>>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> + break;
>>> + case ARM_SPE_HISI_HIP_L3:
>>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> + break;
>>> + case ARM_SPE_HISI_HIP_L3_HITM:
>>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> + break;
>>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>
> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
> cache?
In my case, the cluster CPUs share the L3 cache and there's several clusters.
L3's the highest level cache in the system.
>
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>
> Ditto for the confliction for the two flags 'SNOOP_NONE' and
> 'SNOOPX_PEER'.
>
>>> + break;
>>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> + data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> + break;
>>> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>
>> Hi Yicong,
>>
>> Is the mem_snoop setting missing from this one?
>
> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
>
> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
>
they should not be mutal exclusive. mem_snoopx provides the information where
the cacheline comes from while mem_snoop provides the status of the cacheline.
if hardware supports we can gather both information from the data source, like
above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-22 12:31 ` Yicong Yang
@ 2025-04-22 13:20 ` Leo Yan
2025-04-23 7:57 ` Yicong Yang
0 siblings, 1 reply; 10+ messages in thread
From: Leo Yan @ 2025-04-22 13:20 UTC (permalink / raw)
To: Yicong Yang
Cc: James Clark, yangyicong, jonathan.cameron, hejunhao3, linuxarm,
wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
john.g.garry, leo.yan, irogers, linux-arm-kernel,
linux-perf-users
On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:
[...]
> >>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> >>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> >>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> >
> > Seems to me, a CPU has L3 cache, would the cluster has a higher level's
> > cache?
>
> In my case, the cluster CPUs share the L3 cache and there's several clusters.
> L3's the highest level cache in the system.
If so, you might need to revise the cache levels for:
ARM_SPE_HISI_HIP_PEER_CPU
ARM_SPE_HISI_HIP_PEER_CPU_HITM
IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
[...]
> >>> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> >>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> >>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> >>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> >>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> >>
> >> Hi Yicong,
> >>
> >> Is the mem_snoop setting missing from this one?
> >
> > The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
> >
> > If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
> >
>
> they should not be mutal exclusive. mem_snoopx provides the information where
> the cacheline comes from while mem_snoop provides the status of the cacheline.
> if hardware supports we can gather both information from the data source, like
> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
support Arm SPE. Other snoop flags were original from x86 arch.
I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
with different --display options:
perf c2c report --display tot => based on HITM flags
perf c2c report --display peer => based on SNOOPX_PEER flag
> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
It is about how to present accurate results.
E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
DRAM or any level's cache. Since more hardware details are absent, this
is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.
Thanks,
Leo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-22 13:20 ` Leo Yan
@ 2025-04-23 7:57 ` Yicong Yang
2025-04-24 11:57 ` Yicong Yang
0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-23 7:57 UTC (permalink / raw)
To: Leo Yan
Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
john.g.garry, leo.yan, irogers, linux-arm-kernel,
linux-perf-users
On 2025/4/22 21:20, Leo Yan wrote:
> On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:
>
> [...]
>
>>>>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>>
>>> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
>>> cache?
>>
>> In my case, the cluster CPUs share the L3 cache and there's several clusters.
>> L3's the highest level cache in the system.
>
> If so, you might need to revise the cache levels for:
>
> ARM_SPE_HISI_HIP_PEER_CPU
> ARM_SPE_HISI_HIP_PEER_CPU_HITM
>
> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
>
confirmed with our hardware people, should be L2 for these two data sources.
I misunderstood here, thanks for pointing it out.
> [...]
>
>>>>> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>>>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>>>
>>>> Hi Yicong,
>>>>
>>>> Is the mem_snoop setting missing from this one?
>>>
>>> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
>>>
>>> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
>>>
>>
>> they should not be mutal exclusive. mem_snoopx provides the information where
>> the cacheline comes from while mem_snoop provides the status of the cacheline.
>> if hardware supports we can gather both information from the data source, like
>> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
>
> My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
> support Arm SPE. Other snoop flags were original from x86 arch.
>
> I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
> and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
> with different --display options:
>
> perf c2c report --display tot => based on HITM flags
> perf c2c report --display peer => based on SNOOPX_PEER flag
>
that's exactly what we want to support :)
>> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
>> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
>> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
>
> It is about how to present accurate results.
>
> E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
> DRAM or any level's cache. Since more hardware details are absent, this
> is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.
>
this makes sense. will drop mem_snoop if no indications from the data source.
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-23 7:57 ` Yicong Yang
@ 2025-04-24 11:57 ` Yicong Yang
2025-04-24 12:48 ` Leo Yan
0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-24 11:57 UTC (permalink / raw)
To: Leo Yan
Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
john.g.garry, leo.yan, irogers, linux-arm-kernel,
linux-perf-users
Hi Leo,
On 2025/4/23 15:57, Yicong Yang wrote:
> On 2025/4/22 21:20, Leo Yan wrote:
>> On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:
>>
>> [...]
>>
>>>>>> + case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>>>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>>>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>>>
>>>> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
>>>> cache?
>>>
>>> In my case, the cluster CPUs share the L3 cache and there's several clusters.
>>> L3's the highest level cache in the system.
>>
>> If so, you might need to revise the cache levels for:
>>
>> ARM_SPE_HISI_HIP_PEER_CPU
>> ARM_SPE_HISI_HIP_PEER_CPU_HITM
>>
>> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
>> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
>>
>
> confirmed with our hardware people, should be L2 for these two data sources.
> I misunderstood here, thanks for pointing it out.
>
I recalled why the handling is like this. considering 2 threads have potential false
sharing issues which are running on core0 thread0 and core 1 thread 0 in the same
cluster, we'll have some ARM_SPE_HISI_HIP_PEER_CPU_HITM samples to indicate the
cacheline contention. If the cache level is L2 then we cannot observe this by
`perf c2c report -d tot`, since L2 is not counted for HITM.
does it make sense to have below change to account L2 hitm for lcl_hitm? just like
we account L2 peer snoop for lcl_peer.
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 884d9aebce91..a384a866a562 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -680,7 +680,10 @@ do { \
if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
if (lvl & P(LVL, L2)) {
- stats->ld_l2hit++;
+ if (snoop & P(SNOOP, HITM))
+ HITM_INC(lcl_hitm);
+ else
+ stats->ld_l2hit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
>> [...]
>>
>>>>>> + case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>>>>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>>>>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>>>>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>>>>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>>>>
>>>>> Hi Yicong,
>>>>>
>>>>> Is the mem_snoop setting missing from this one?
>>>>
>>>> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
>>>>
>>>> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
>>>>
>>>
>>> they should not be mutal exclusive. mem_snoopx provides the information where
>>> the cacheline comes from while mem_snoop provides the status of the cacheline.
>>> if hardware supports we can gather both information from the data source, like
>>> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
>>
>> My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
>> support Arm SPE. Other snoop flags were original from x86 arch.
>>
>> I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
>> and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
>> with different --display options:
>>
>> perf c2c report --display tot => based on HITM flags
>> perf c2c report --display peer => based on SNOOPX_PEER flag
>>
>
> that's exactly what we want to support :)
>
>>> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
>>> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
>>> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
>>
>> It is about how to present accurate results.
>>
>> E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
>> DRAM or any level's cache. Since more hardware details are absent, this
>> is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.
>>
>
> this makes sense. will drop mem_snoop if no indications from the data source.
>
> Thanks.
>
> .
>
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
2025-04-24 11:57 ` Yicong Yang
@ 2025-04-24 12:48 ` Leo Yan
0 siblings, 0 replies; 10+ messages in thread
From: Leo Yan @ 2025-04-24 12:48 UTC (permalink / raw)
To: Yicong Yang
Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
john.g.garry, leo.yan, irogers, linux-arm-kernel,
linux-perf-users
Hi Yicong,
On Thu, Apr 24, 2025 at 07:57:39PM +0800, Yicong Yang wrote:
[...]
> >> If so, you might need to revise the cache levels for:
> >>
> >> ARM_SPE_HISI_HIP_PEER_CPU
> >> ARM_SPE_HISI_HIP_PEER_CPU_HITM
> >>
> >> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
> >> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
> >>
> >
> > confirmed with our hardware people, should be L2 for these two data sources.
> > I misunderstood here, thanks for pointing it out.
>
> I recalled why the handling is like this. considering 2 threads have potential false
> sharing issues which are running on core0 thread0 and core 1 thread 0 in the same
> cluster, we'll have some ARM_SPE_HISI_HIP_PEER_CPU_HITM samples to indicate the
> cacheline contention. If the cache level is L2 then we cannot observe this by
> `perf c2c report -d tot`, since L2 is not counted for HITM.
>
> does it make sense to have below change to account L2 hitm for lcl_hitm? just like
> we account L2 peer snoop for lcl_peer.
Yes. The change below makes sense to me.
> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> index 884d9aebce91..a384a866a562 100644
> --- a/tools/perf/util/mem-events.c
> +++ b/tools/perf/util/mem-events.c
> @@ -680,7 +680,10 @@ do { \
> if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
> if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
> if (lvl & P(LVL, L2)) {
> - stats->ld_l2hit++;
> + if (snoop & P(SNOOP, HITM))
> + HITM_INC(lcl_hitm);
> + else
> + stats->ld_l2hit++;
>
> if (snoopx & P(SNOOPX, PEER))
> PEER_INC(lcl_peer);
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-04-24 12:48 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
2025-04-22 9:01 ` Yicong Yang
2025-04-22 9:16 ` James Clark
2025-04-22 10:11 ` Yicong Yang
2025-04-22 10:29 ` Leo Yan
2025-04-22 12:31 ` Yicong Yang
2025-04-22 13:20 ` Leo Yan
2025-04-23 7:57 ` Yicong Yang
2025-04-24 11:57 ` Yicong Yang
2025-04-24 12:48 ` Leo Yan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).