linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
@ 2025-04-08 12:28 Yicong Yang
  2025-04-22  9:01 ` Yicong Yang
  2025-04-22  9:16 ` James Clark
  0 siblings, 2 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-08 12:28 UTC (permalink / raw)
  To: acme, namhyung, catalin.marinas, will, peterz, mingo,
	mark.rutland, jolsa, john.g.garry, james.clark, leo.yan, irogers,
	linux-arm-kernel, linux-perf-users
  Cc: jonathan.cameron, hejunhao3, linuxarm, wangyushan12, caijingtao,
	xueshan2, prime.zeng, yangyicong

From: Yicong Yang <yangyicong@hisilicon.com>

Add data source encoding for HiSilicon HIP12 and coresponding mapping
to the perf's memory data source. This will help to synthesize the data
and support upper layer tools like perf-mem and perf-c2c.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
 arch/arm64/include/asm/cputype.h              |   2 +
 tools/arch/arm64/include/asm/cputype.h        |   2 +
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  17 +++
 tools/perf/util/arm-spe.c                     | 101 ++++++++++++++++++
 4 files changed, 122 insertions(+)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index d1cc0571798b..36c5bbfbb6e9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -133,6 +133,7 @@
 
 #define HISI_CPU_PART_TSV110		0xD01
 #define HISI_CPU_PART_HIP09			0xD02
+#define HISI_CPU_PART_HIP12		0xD06
 
 #define APPLE_CPU_PART_M1_ICESTORM	0x022
 #define APPLE_CPU_PART_M1_FIRESTORM	0x023
@@ -220,6 +221,7 @@
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
 #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
 #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
 #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
 #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
 #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 488f8e751349..9a5d85cfd1fb 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -129,6 +129,7 @@
 #define FUJITSU_CPU_PART_A64FX		0x001
 
 #define HISI_CPU_PART_TSV110		0xD01
+#define HISI_CPU_PART_HIP12		0xD06
 
 #define APPLE_CPU_PART_M1_ICESTORM	0x022
 #define APPLE_CPU_PART_M1_FIRESTORM	0x023
@@ -202,6 +203,7 @@
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
 #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
 #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
 #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
 #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 5d232188643b..0410abed5009 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
 	ARM_SPE_AMPEREONE_L2D                           = 0x9,
 };
 
+enum arm_spe_hisi_hip_data_source {
+	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
+	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
+	ARM_SPE_HISI_HIP_L3			= 2,
+	ARM_SPE_HISI_HIP_L3_HITM		= 3,
+	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
+	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
+	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
+	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
+	ARM_SPE_HISI_HIP_LOCAL			= 8,
+	ARM_SPE_HISI_HIP_REMOTE			= 9,
+	ARM_SPE_HISI_HIP_NC_DEV			= 13,
+	ARM_SPE_HISI_HIP_L2			= 16,
+	ARM_SPE_HISI_HIP_L2_HITM		= 17,
+	ARM_SPE_HISI_HIP_L1			= 18,
+};
+
 struct arm_spe_record {
 	enum arm_spe_sample_type type;
 	int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 2a9775649cc2..eceae4219601 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
 	{},
 };
 
+static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
+	{},
+};
+
 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
 {
 	const struct arm_spe_record *record = &speq->decoder->record;
@@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
 	arm_spe__synth_data_source_common(&common_record, data_src);
 }
 
+static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
+						union perf_mem_data_src *data_src)
+{
+	/* Use common synthesis method to handle store operations */
+	if (record->op & ARM_SPE_OP_ST) {
+		arm_spe__synth_data_source_common(record, data_src);
+		return;
+	}
+
+	switch (record->source) {
+	case ARM_SPE_HISI_HIP_PEER_CPU:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_L3:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_HISI_HIP_L3_HITM:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_LOCAL:
+		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_HISI_HIP_REMOTE:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		break;
+	case ARM_SPE_HISI_HIP_NC_DEV:
+		data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_HISI_HIP_L2:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_HISI_HIP_L2_HITM:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_HISI_HIP_L1:
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	default:
+		break;
+	}
+}
+
 static const struct data_source_handle data_source_handles[] = {
 	DS(common_ds_encoding_cpus, data_source_common),
 	DS(ampereone_ds_encoding_cpus, data_source_ampereone),
+	DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
 };
 
 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
@ 2025-04-22  9:01 ` Yicong Yang
  2025-04-22  9:16 ` James Clark
  1 sibling, 0 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-22  9:01 UTC (permalink / raw)
  To: acme, namhyung, catalin.marinas, will, peterz, mingo,
	mark.rutland, jolsa, john.g.garry, james.clark, leo.yan, irogers,
	linux-arm-kernel, linux-perf-users
  Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
	caijingtao, xueshan2, prime.zeng

A gentle ping for this straightforward support.

Thanks.

On 2025/4/8 20:28, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> Add data source encoding for HiSilicon HIP12 and coresponding mapping
> to the perf's memory data source. This will help to synthesize the data
> and support upper layer tools like perf-mem and perf-c2c.
> 
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
>  arch/arm64/include/asm/cputype.h              |   2 +
>  tools/arch/arm64/include/asm/cputype.h        |   2 +
>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  17 +++
>  tools/perf/util/arm-spe.c                     | 101 ++++++++++++++++++
>  4 files changed, 122 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index d1cc0571798b..36c5bbfbb6e9 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -133,6 +133,7 @@
>  
>  #define HISI_CPU_PART_TSV110		0xD01
>  #define HISI_CPU_PART_HIP09			0xD02
> +#define HISI_CPU_PART_HIP12		0xD06
>  
>  #define APPLE_CPU_PART_M1_ICESTORM	0x022
>  #define APPLE_CPU_PART_M1_FIRESTORM	0x023
> @@ -220,6 +221,7 @@
>  #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>  #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>  #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>  #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>  #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>  #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
> index 488f8e751349..9a5d85cfd1fb 100644
> --- a/tools/arch/arm64/include/asm/cputype.h
> +++ b/tools/arch/arm64/include/asm/cputype.h
> @@ -129,6 +129,7 @@
>  #define FUJITSU_CPU_PART_A64FX		0x001
>  
>  #define HISI_CPU_PART_TSV110		0xD01
> +#define HISI_CPU_PART_HIP12		0xD06
>  
>  #define APPLE_CPU_PART_M1_ICESTORM	0x022
>  #define APPLE_CPU_PART_M1_FIRESTORM	0x023
> @@ -202,6 +203,7 @@
>  #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
>  #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>  #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>  #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>  #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>  #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 5d232188643b..0410abed5009 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>  	ARM_SPE_AMPEREONE_L2D                           = 0x9,
>  };
>  
> +enum arm_spe_hisi_hip_data_source {
> +	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
> +	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
> +	ARM_SPE_HISI_HIP_L3			= 2,
> +	ARM_SPE_HISI_HIP_L3_HITM		= 3,
> +	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
> +	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
> +	ARM_SPE_HISI_HIP_LOCAL			= 8,
> +	ARM_SPE_HISI_HIP_REMOTE			= 9,
> +	ARM_SPE_HISI_HIP_NC_DEV			= 13,
> +	ARM_SPE_HISI_HIP_L2			= 16,
> +	ARM_SPE_HISI_HIP_L2_HITM		= 17,
> +	ARM_SPE_HISI_HIP_L1			= 18,
> +};
> +
>  struct arm_spe_record {
>  	enum arm_spe_sample_type type;
>  	int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 2a9775649cc2..eceae4219601 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>  	{},
>  };
>  
> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> +	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> +	{},
> +};
> +
>  static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>  {
>  	const struct arm_spe_record *record = &speq->decoder->record;
> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>  	arm_spe__synth_data_source_common(&common_record, data_src);
>  }
>  
> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> +						union perf_mem_data_src *data_src)
> +{
> +	/* Use common synthesis method to handle store operations */
> +	if (record->op & ARM_SPE_OP_ST) {
> +		arm_spe__synth_data_source_common(record, data_src);
> +		return;
> +	}
> +
> +	switch (record->source) {
> +	case ARM_SPE_HISI_HIP_PEER_CPU:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_L3:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	case ARM_SPE_HISI_HIP_L3_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_LOCAL:
> +		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	case ARM_SPE_HISI_HIP_REMOTE:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +		break;
> +	case ARM_SPE_HISI_HIP_NC_DEV:
> +		data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	case ARM_SPE_HISI_HIP_L2:
> +		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	case ARM_SPE_HISI_HIP_L2_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_L1:
> +		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  static const struct data_source_handle data_source_handles[] = {
>  	DS(common_ds_encoding_cpus, data_source_common),
>  	DS(ampereone_ds_encoding_cpus, data_source_ampereone),
> +	DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
>  };
>  
>  static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
  2025-04-22  9:01 ` Yicong Yang
@ 2025-04-22  9:16 ` James Clark
  2025-04-22 10:11   ` Yicong Yang
  2025-04-22 10:29   ` Leo Yan
  1 sibling, 2 replies; 10+ messages in thread
From: James Clark @ 2025-04-22  9:16 UTC (permalink / raw)
  To: Yicong Yang
  Cc: jonathan.cameron, hejunhao3, linuxarm, wangyushan12, caijingtao,
	xueshan2, prime.zeng, yangyicong, acme, namhyung, catalin.marinas,
	will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
	irogers, linux-arm-kernel, linux-perf-users



On 08/04/2025 1:28 pm, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> Add data source encoding for HiSilicon HIP12 and coresponding mapping
> to the perf's memory data source. This will help to synthesize the data
> and support upper layer tools like perf-mem and perf-c2c.
> 
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
>   arch/arm64/include/asm/cputype.h              |   2 +
>   tools/arch/arm64/include/asm/cputype.h        |   2 +
>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  17 +++
>   tools/perf/util/arm-spe.c                     | 101 ++++++++++++++++++
>   4 files changed, 122 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index d1cc0571798b..36c5bbfbb6e9 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -133,6 +133,7 @@
>   
>   #define HISI_CPU_PART_TSV110		0xD01
>   #define HISI_CPU_PART_HIP09			0xD02
> +#define HISI_CPU_PART_HIP12		0xD06
>   
>   #define APPLE_CPU_PART_M1_ICESTORM	0x022
>   #define APPLE_CPU_PART_M1_FIRESTORM	0x023
> @@ -220,6 +221,7 @@
>   #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>   #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>   #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>   #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>   #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>   #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
> index 488f8e751349..9a5d85cfd1fb 100644
> --- a/tools/arch/arm64/include/asm/cputype.h
> +++ b/tools/arch/arm64/include/asm/cputype.h
> @@ -129,6 +129,7 @@
>   #define FUJITSU_CPU_PART_A64FX		0x001
>   
>   #define HISI_CPU_PART_TSV110		0xD01
> +#define HISI_CPU_PART_HIP12		0xD06
>   
>   #define APPLE_CPU_PART_M1_ICESTORM	0x022
>   #define APPLE_CPU_PART_M1_FIRESTORM	0x023
> @@ -202,6 +203,7 @@
>   #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
>   #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>   #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>   #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>   #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>   #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 5d232188643b..0410abed5009 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>   	ARM_SPE_AMPEREONE_L2D                           = 0x9,
>   };
>   
> +enum arm_spe_hisi_hip_data_source {
> +	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
> +	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
> +	ARM_SPE_HISI_HIP_L3			= 2,
> +	ARM_SPE_HISI_HIP_L3_HITM		= 3,
> +	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
> +	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
> +	ARM_SPE_HISI_HIP_LOCAL			= 8,
> +	ARM_SPE_HISI_HIP_REMOTE			= 9,
> +	ARM_SPE_HISI_HIP_NC_DEV			= 13,
> +	ARM_SPE_HISI_HIP_L2			= 16,
> +	ARM_SPE_HISI_HIP_L2_HITM		= 17,
> +	ARM_SPE_HISI_HIP_L1			= 18,
> +};
> +
>   struct arm_spe_record {
>   	enum arm_spe_sample_type type;
>   	int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 2a9775649cc2..eceae4219601 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>   	{},
>   };
>   
> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> +	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> +	{},
> +};
> +
>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>   {
>   	const struct arm_spe_record *record = &speq->decoder->record;
> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>   	arm_spe__synth_data_source_common(&common_record, data_src);
>   }
>   
> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> +						union perf_mem_data_src *data_src)
> +{
> +	/* Use common synthesis method to handle store operations */
> +	if (record->op & ARM_SPE_OP_ST) {
> +		arm_spe__synth_data_source_common(record, data_src);
> +		return;
> +	}
> +
> +	switch (record->source) {
> +	case ARM_SPE_HISI_HIP_PEER_CPU:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_L3:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		break;
> +	case ARM_SPE_HISI_HIP_L3_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +		break;
> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;

Hi Yicong,

Is the mem_snoop setting missing from this one?

Other than that, LGTM.

Thanks
James


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-22  9:16 ` James Clark
@ 2025-04-22 10:11   ` Yicong Yang
  2025-04-22 10:29   ` Leo Yan
  1 sibling, 0 replies; 10+ messages in thread
From: Yicong Yang @ 2025-04-22 10:11 UTC (permalink / raw)
  To: James Clark
  Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
	caijingtao, xueshan2, prime.zeng, acme, namhyung, catalin.marinas,
	will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
	irogers, linux-arm-kernel, linux-perf-users

On 2025/4/22 17:16, James Clark wrote:
> 
> 
> On 08/04/2025 1:28 pm, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> Add data source encoding for HiSilicon HIP12 and coresponding mapping
>> to the perf's memory data source. This will help to synthesize the data
>> and support upper layer tools like perf-mem and perf-c2c.
>>
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>>   arch/arm64/include/asm/cputype.h              |   2 +
>>   tools/arch/arm64/include/asm/cputype.h        |   2 +
>>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  17 +++
>>   tools/perf/util/arm-spe.c                     | 101 ++++++++++++++++++
>>   4 files changed, 122 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index d1cc0571798b..36c5bbfbb6e9 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -133,6 +133,7 @@
>>     #define HISI_CPU_PART_TSV110        0xD01
>>   #define HISI_CPU_PART_HIP09            0xD02
>> +#define HISI_CPU_PART_HIP12        0xD06
>>     #define APPLE_CPU_PART_M1_ICESTORM    0x022
>>   #define APPLE_CPU_PART_M1_FIRESTORM    0x023
>> @@ -220,6 +221,7 @@
>>   #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>>   #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>>   #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
>> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>>   #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>>   #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>>   #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
>> diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
>> index 488f8e751349..9a5d85cfd1fb 100644
>> --- a/tools/arch/arm64/include/asm/cputype.h
>> +++ b/tools/arch/arm64/include/asm/cputype.h
>> @@ -129,6 +129,7 @@
>>   #define FUJITSU_CPU_PART_A64FX        0x001
>>     #define HISI_CPU_PART_TSV110        0xD01
>> +#define HISI_CPU_PART_HIP12        0xD06
>>     #define APPLE_CPU_PART_M1_ICESTORM    0x022
>>   #define APPLE_CPU_PART_M1_FIRESTORM    0x023
>> @@ -202,6 +203,7 @@
>>   #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
>>   #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
>>   #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
>> +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
>>   #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
>>   #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
>>   #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 5d232188643b..0410abed5009 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>>       ARM_SPE_AMPEREONE_L2D                           = 0x9,
>>   };
>>   +enum arm_spe_hisi_hip_data_source {
>> +    ARM_SPE_HISI_HIP_PEER_CPU        = 0,
>> +    ARM_SPE_HISI_HIP_PEER_CPU_HITM        = 1,
>> +    ARM_SPE_HISI_HIP_L3            = 2,
>> +    ARM_SPE_HISI_HIP_L3_HITM        = 3,
>> +    ARM_SPE_HISI_HIP_PEER_CLUSTER        = 4,
>> +    ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM    = 5,
>> +    ARM_SPE_HISI_HIP_REMOTE_SOCKET        = 6,
>> +    ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM    = 7,
>> +    ARM_SPE_HISI_HIP_LOCAL            = 8,
>> +    ARM_SPE_HISI_HIP_REMOTE            = 9,
>> +    ARM_SPE_HISI_HIP_NC_DEV            = 13,
>> +    ARM_SPE_HISI_HIP_L2            = 16,
>> +    ARM_SPE_HISI_HIP_L2_HITM        = 17,
>> +    ARM_SPE_HISI_HIP_L1            = 18,
>> +};
>> +
>>   struct arm_spe_record {
>>       enum arm_spe_sample_type type;
>>       int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 2a9775649cc2..eceae4219601 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>>       {},
>>   };
>>   +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
>> +    MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
>> +    {},
>> +};
>> +
>>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>>   {
>>       const struct arm_spe_record *record = &speq->decoder->record;
>> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>>       arm_spe__synth_data_source_common(&common_record, data_src);
>>   }
>>   +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
>> +                        union perf_mem_data_src *data_src)
>> +{
>> +    /* Use common synthesis method to handle store operations */
>> +    if (record->op & ARM_SPE_OP_ST) {
>> +        arm_spe__synth_data_source_common(record, data_src);
>> +        return;
>> +    }
>> +
>> +    switch (record->source) {
>> +    case ARM_SPE_HISI_HIP_PEER_CPU:
>> +        data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
>> +        data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_L3:
>> +        data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_L3_HITM:
>> +        data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>> +        data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
>> +        data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +        data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +        break;
>> +    case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>> +        data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>> +        data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>> +        data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>> +        data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Hi Yicong,
> 
> Is the mem_snoop setting missing from this one?
> 

it should be PERF_MEM_SNOOP_NONE, will fix. Thanks for catching this.

> Other than that, LGTM.
> 

Thanks.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-22  9:16 ` James Clark
  2025-04-22 10:11   ` Yicong Yang
@ 2025-04-22 10:29   ` Leo Yan
  2025-04-22 12:31     ` Yicong Yang
  1 sibling, 1 reply; 10+ messages in thread
From: Leo Yan @ 2025-04-22 10:29 UTC (permalink / raw)
  To: James Clark
  Cc: Yicong Yang, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
	caijingtao, xueshan2, prime.zeng, yangyicong, acme, namhyung,
	catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
	john.g.garry, leo.yan, irogers, linux-arm-kernel,
	linux-perf-users

On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
> > 
> > Add data source encoding for HiSilicon HIP12 and coresponding mapping
> > to the perf's memory data source. This will help to synthesize the data
> > and support upper layer tools like perf-mem and perf-c2c.
> > 
> > Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> > ---
> >   arch/arm64/include/asm/cputype.h              |   2 +
> >   tools/arch/arm64/include/asm/cputype.h        |   2 +

Please split into two patches.  One is a kernel patch and another is a
tool patch.

[...]

> > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> > @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
> >   	ARM_SPE_AMPEREONE_L2D                           = 0x9,
> >   };
> > +enum arm_spe_hisi_hip_data_source {
> > +	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
> > +	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
> > +	ARM_SPE_HISI_HIP_L3			= 2,
> > +	ARM_SPE_HISI_HIP_L3_HITM		= 3,
> > +	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
> > +	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
> > +	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
> > +	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
> > +	ARM_SPE_HISI_HIP_LOCAL			= 8,
> > +	ARM_SPE_HISI_HIP_REMOTE			= 9,
> > +	ARM_SPE_HISI_HIP_NC_DEV			= 13,
> > +	ARM_SPE_HISI_HIP_L2			= 16,
> > +	ARM_SPE_HISI_HIP_L2_HITM		= 17,
> > +	ARM_SPE_HISI_HIP_L1			= 18,
> > +};
> > +
> >   struct arm_spe_record {
> >   	enum arm_spe_sample_type type;
> >   	int err;
> > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> > index 2a9775649cc2..eceae4219601 100644
> > --- a/tools/perf/util/arm-spe.c
> > +++ b/tools/perf/util/arm-spe.c
> > @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
> >   	{},
> >   };
> > +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
> > +	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
> > +	{},
> > +};
> > +
> >   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
> >   {
> >   	const struct arm_spe_record *record = &speq->decoder->record;
> > @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
> >   	arm_spe__synth_data_source_common(&common_record, data_src);
> >   }
> > +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
> > +						union perf_mem_data_src *data_src)
> > +{
> > +	/* Use common synthesis method to handle store operations */
> > +	if (record->op & ARM_SPE_OP_ST) {
> > +		arm_spe__synth_data_source_common(record, data_src);
> > +		return;
> > +	}
> > +
> > +	switch (record->source) {
> > +	case ARM_SPE_HISI_HIP_PEER_CPU:
> > +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;

Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
'PERF_MEM_SNOOPX_PEER'.

Remove setting 'PERF_MEM_SNOOP_NONE' in this case?

> > +		break;
> > +	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
> > +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > +		break;
> > +	case ARM_SPE_HISI_HIP_L3:
> > +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > +		break;
> > +	case ARM_SPE_HISI_HIP_L3_HITM:
> > +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > +		break;
> > +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> > +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;

Seems to me, a CPU has L3 cache, would the cluster has a higher level's
cache?

> > +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;

Ditto for the confliction for the two flags 'SNOOP_NONE' and
'SNOOPX_PEER'.

> > +		break;
> > +	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
> > +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> > +		break;
> > +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> > +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> > +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> > +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> > +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Hi Yicong,
> 
> Is the mem_snoop setting missing from this one?

The field 'mem_snoopx' is an extension to the field 'mem_snoop'.

If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.

Thanks,
Leo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-22 10:29   ` Leo Yan
@ 2025-04-22 12:31     ` Yicong Yang
  2025-04-22 13:20       ` Leo Yan
  0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-22 12:31 UTC (permalink / raw)
  To: Leo Yan, James Clark
  Cc: yangyicong, jonathan.cameron, hejunhao3, linuxarm, wangyushan12,
	caijingtao, xueshan2, prime.zeng, acme, namhyung, catalin.marinas,
	will, peterz, mingo, mark.rutland, jolsa, john.g.garry, leo.yan,
	irogers, linux-arm-kernel, linux-perf-users

On 2025/4/22 18:29, Leo Yan wrote:
> On Tue, Apr 22, 2025 at 10:16:40AM +0100, James Clark wrote:
>>>
>>> Add data source encoding for HiSilicon HIP12 and coresponding mapping
>>> to the perf's memory data source. This will help to synthesize the data
>>> and support upper layer tools like perf-mem and perf-c2c.
>>>
>>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>>> ---
>>>   arch/arm64/include/asm/cputype.h              |   2 +
>>>   tools/arch/arm64/include/asm/cputype.h        |   2 +
> 
> Please split into two patches.  One is a kernel patch and another is a
> tool patch.

will do in next version.

> 
> [...]
> 
>>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>>> @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
>>>   	ARM_SPE_AMPEREONE_L2D                           = 0x9,
>>>   };
>>> +enum arm_spe_hisi_hip_data_source {
>>> +	ARM_SPE_HISI_HIP_PEER_CPU		= 0,
>>> +	ARM_SPE_HISI_HIP_PEER_CPU_HITM		= 1,
>>> +	ARM_SPE_HISI_HIP_L3			= 2,
>>> +	ARM_SPE_HISI_HIP_L3_HITM		= 3,
>>> +	ARM_SPE_HISI_HIP_PEER_CLUSTER		= 4,
>>> +	ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM	= 5,
>>> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET		= 6,
>>> +	ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM	= 7,
>>> +	ARM_SPE_HISI_HIP_LOCAL			= 8,
>>> +	ARM_SPE_HISI_HIP_REMOTE			= 9,
>>> +	ARM_SPE_HISI_HIP_NC_DEV			= 13,
>>> +	ARM_SPE_HISI_HIP_L2			= 16,
>>> +	ARM_SPE_HISI_HIP_L2_HITM		= 17,
>>> +	ARM_SPE_HISI_HIP_L1			= 18,
>>> +};
>>> +
>>>   struct arm_spe_record {
>>>   	enum arm_spe_sample_type type;
>>>   	int err;
>>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>>> index 2a9775649cc2..eceae4219601 100644
>>> --- a/tools/perf/util/arm-spe.c
>>> +++ b/tools/perf/util/arm-spe.c
>>> @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
>>>   	{},
>>>   };
>>> +static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
>>> +	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
>>> +	{},
>>> +};
>>> +
>>>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>>>   {
>>>   	const struct arm_spe_record *record = &speq->decoder->record;
>>> @@ -718,9 +723,105 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
>>>   	arm_spe__synth_data_source_common(&common_record, data_src);
>>>   }
>>> +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
>>> +						union perf_mem_data_src *data_src)
>>> +{
>>> +	/* Use common synthesis method to handle store operations */
>>> +	if (record->op & ARM_SPE_OP_ST) {
>>> +		arm_spe__synth_data_source_common(record, data_src);
>>> +		return;
>>> +	}
>>> +
>>> +	switch (record->source) {
>>> +	case ARM_SPE_HISI_HIP_PEER_CPU:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Seems it is conflict to set both 'PERF_MEM_SNOOP_NONE' and
> 'PERF_MEM_SNOOPX_PEER'.
> 
> Remove setting 'PERF_MEM_SNOOP_NONE' in this case?
> 
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_L3:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_L3_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> 
> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
> cache?

In my case, the cluster CPUs share the L3 cache and there's several clusters.
L3's the highest level cache in the system.

> 
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> 
> Ditto for the confliction for the two flags 'SNOOP_NONE' and
> 'SNOOPX_PEER'.
> 
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>> +		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>> +		break;
>>> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>
>> Hi Yicong,
>>
>> Is the mem_snoop setting missing from this one?
> 
> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
> 
> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
> 

they should not be mutal exclusive. mem_snoopx provides the information where
the cacheline comes from while mem_snoop provides the status of the cacheline.
if hardware supports we can gather both information from the data source, like
above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.

for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-22 12:31     ` Yicong Yang
@ 2025-04-22 13:20       ` Leo Yan
  2025-04-23  7:57         ` Yicong Yang
  0 siblings, 1 reply; 10+ messages in thread
From: Leo Yan @ 2025-04-22 13:20 UTC (permalink / raw)
  To: Yicong Yang
  Cc: James Clark, yangyicong, jonathan.cameron, hejunhao3, linuxarm,
	wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
	catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
	john.g.garry, leo.yan, irogers, linux-arm-kernel,
	linux-perf-users

On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:

[...]

> >>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
> >>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> >>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> > 
> > Seems to me, a CPU has L3 cache, would the cluster has a higher level's
> > cache?
> 
> In my case, the cluster CPUs share the L3 cache and there's several clusters.
> L3's the highest level cache in the system.

If so, you might need to revise the cache levels for:

  ARM_SPE_HISI_HIP_PEER_CPU
  ARM_SPE_HISI_HIP_PEER_CPU_HITM

IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
L1/L2 cache, for PEER_CPU cases, the memory level should be L2.

[...]

> >>> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
> >>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
> >>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> >>> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> >>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> >>
> >> Hi Yicong,
> >>
> >> Is the mem_snoop setting missing from this one?
> > 
> > The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
> > 
> > If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
> > 
> 
> they should not be mutal exclusive. mem_snoopx provides the information where
> the cacheline comes from while mem_snoop provides the status of the cacheline.
> if hardware supports we can gather both information from the data source, like
> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.

My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
support Arm SPE.  Other snoop flags were original from x86 arch.

I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
with different --display options:

  perf c2c report --display tot    => based on HITM flags
  perf c2c report --display peer   => based on SNOOPX_PEER flag

> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.

It is about how to present accurate results.

E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
DRAM or any level's cache.  Since more hardware details are absent, this
is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.

Thanks,
Leo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-22 13:20       ` Leo Yan
@ 2025-04-23  7:57         ` Yicong Yang
  2025-04-24 11:57           ` Yicong Yang
  0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-23  7:57 UTC (permalink / raw)
  To: Leo Yan
  Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
	wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
	catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
	john.g.garry, leo.yan, irogers, linux-arm-kernel,
	linux-perf-users

On 2025/4/22 21:20, Leo Yan wrote:
> On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:
> 
> [...]
> 
>>>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>>
>>> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
>>> cache?
>>
>> In my case, the cluster CPUs share the L3 cache and there's several clusters.
>> L3's the highest level cache in the system.
> 
> If so, you might need to revise the cache levels for:
> 
>   ARM_SPE_HISI_HIP_PEER_CPU
>   ARM_SPE_HISI_HIP_PEER_CPU_HITM
> 
> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
> 

confirmed with our hardware people, should be L2 for these two data sources.
I misunderstood here, thanks for pointing it out.

> [...]
> 
>>>>> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>>>> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>>>
>>>> Hi Yicong,
>>>>
>>>> Is the mem_snoop setting missing from this one?
>>>
>>> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
>>>
>>> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
>>>
>>
>> they should not be mutal exclusive. mem_snoopx provides the information where
>> the cacheline comes from while mem_snoop provides the status of the cacheline.
>> if hardware supports we can gather both information from the data source, like
>> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
> 
> My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
> support Arm SPE.  Other snoop flags were original from x86 arch.
> 
> I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
> and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
> with different --display options:
> 
>   perf c2c report --display tot    => based on HITM flags
>   perf c2c report --display peer   => based on SNOOPX_PEER flag
> 

that's exactly what we want to support :)

>> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
>> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
>> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
> 
> It is about how to present accurate results.
> 
> E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
> DRAM or any level's cache.  Since more hardware details are absent, this
> is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.
> 

this makes sense. will drop mem_snoop if no indications from the data source.

Thanks.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-23  7:57         ` Yicong Yang
@ 2025-04-24 11:57           ` Yicong Yang
  2025-04-24 12:48             ` Leo Yan
  0 siblings, 1 reply; 10+ messages in thread
From: Yicong Yang @ 2025-04-24 11:57 UTC (permalink / raw)
  To: Leo Yan
  Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
	wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
	catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
	john.g.garry, leo.yan, irogers, linux-arm-kernel,
	linux-perf-users

Hi Leo,

On 2025/4/23 15:57, Yicong Yang wrote:
> On 2025/4/22 21:20, Leo Yan wrote:
>> On Tue, Apr 22, 2025 at 08:31:43PM +0800, Yicong Yang wrote:
>>
>> [...]
>>
>>>>>> +	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
>>>>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
>>>>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>>>>
>>>> Seems to me, a CPU has L3 cache, would the cluster has a higher level's
>>>> cache?
>>>
>>> In my case, the cluster CPUs share the L3 cache and there's several clusters.
>>> L3's the highest level cache in the system.
>>
>> If so, you might need to revise the cache levels for:
>>
>>   ARM_SPE_HISI_HIP_PEER_CPU
>>   ARM_SPE_HISI_HIP_PEER_CPU_HITM
>>
>> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
>> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
>>
> 
> confirmed with our hardware people, should be L2 for these two data sources.
> I misunderstood here, thanks for pointing it out.
> 

I recalled why the handling is like this. considering 2 threads have potential false
sharing issues which are running on core0 thread0 and core 1 thread 0 in the same
cluster, we'll have some ARM_SPE_HISI_HIP_PEER_CPU_HITM samples to indicate the
cacheline contention. If the cache level is L2 then we cannot observe this by
`perf c2c report -d tot`, since L2 is not counted for HITM.

does it make sense to have below change to account L2 hitm for lcl_hitm? just like
we account L2 peer snoop for lcl_peer.

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 884d9aebce91..a384a866a562 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -680,7 +680,10 @@ do {                               \
                        if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
                        if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
                        if (lvl & P(LVL, L2)) {
-                               stats->ld_l2hit++;
+                               if (snoop & P(SNOOP, HITM))
+                                       HITM_INC(lcl_hitm);
+                               else
+                                       stats->ld_l2hit++;

                                if (snoopx & P(SNOOPX, PEER))
                                        PEER_INC(lcl_peer);


>> [...]
>>
>>>>>> +	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
>>>>>> +		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
>>>>>> +		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>>>>>> +		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>>>>>> +		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>>>>>
>>>>> Hi Yicong,
>>>>>
>>>>> Is the mem_snoop setting missing from this one?
>>>>
>>>> The field 'mem_snoopx' is an extension to the field 'mem_snoop'.
>>>>
>>>> If the field 'mem_snoopx' is set, it is no need to set 'mem_snoop'.
>>>>
>>>
>>> they should not be mutal exclusive. mem_snoopx provides the information where
>>> the cacheline comes from while mem_snoop provides the status of the cacheline.
>>> if hardware supports we can gather both information from the data source, like
>>> above for ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM.
>>
>> My understanding is the PERF_MEM_SNOOPX_PEER flag was extended for
>> support Arm SPE.  Other snoop flags were original from x86 arch.
>>
>> I agreed that in some cases above, both the flags PERF_MEM_SNOOPX_PEER
>> and PERF_MEM_SNOOP_HITM can be set together, you can parse cache sharing
>> with different --display options:
>>
>>   perf c2c report --display tot    => based on HITM flags
>>   perf c2c report --display peer   => based on SNOOPX_PEER flag
>>
> 
> that's exactly what we want to support :)
> 
>>> for other cases if there's mem_snoopx information I think mem_snoop can be dropped,
>>> this won't make differeces. Checked c2c_decode_stats(), only PERF_MEM_SNOOP_HIT and
>>> PERF_MEM_SNOOP_HITM is useful when summarizing c2c statistics.
>>
>> It is about how to present accurate results.
>>
>> E.g., for REMOTE_SOCKET type, it is hard to say the data from remote
>> DRAM or any level's cache.  Since more hardware details are absent, this
>> is why I suggested not to set 'mem_snoop' for REMOTE_SOCKET.
>>
> 
> this makes sense. will drop mem_snoop if no indications from the data source.
> 
> Thanks.
> 
> .
> 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12
  2025-04-24 11:57           ` Yicong Yang
@ 2025-04-24 12:48             ` Leo Yan
  0 siblings, 0 replies; 10+ messages in thread
From: Leo Yan @ 2025-04-24 12:48 UTC (permalink / raw)
  To: Yicong Yang
  Cc: yangyicong, James Clark, jonathan.cameron, hejunhao3, linuxarm,
	wangyushan12, caijingtao, xueshan2, prime.zeng, acme, namhyung,
	catalin.marinas, will, peterz, mingo, mark.rutland, jolsa,
	john.g.garry, leo.yan, irogers, linux-arm-kernel,
	linux-perf-users

Hi Yicong,

On Thu, Apr 24, 2025 at 07:57:39PM +0800, Yicong Yang wrote:

[...]

> >> If so, you might need to revise the cache levels for:
> >>
> >>   ARM_SPE_HISI_HIP_PEER_CPU
> >>   ARM_SPE_HISI_HIP_PEER_CPU_HITM
> >>
> >> IIUC, cluster CPUs share L3 cache, and every CPU in a cluster has
> >> L1/L2 cache, for PEER_CPU cases, the memory level should be L2.
> >>
> > 
> > confirmed with our hardware people, should be L2 for these two data sources.
> > I misunderstood here, thanks for pointing it out.
> 
> I recalled why the handling is like this. considering 2 threads have potential false
> sharing issues which are running on core0 thread0 and core 1 thread 0 in the same
> cluster, we'll have some ARM_SPE_HISI_HIP_PEER_CPU_HITM samples to indicate the
> cacheline contention. If the cache level is L2 then we cannot observe this by
> `perf c2c report -d tot`, since L2 is not counted for HITM.
> 
> does it make sense to have below change to account L2 hitm for lcl_hitm? just like
> we account L2 peer snoop for lcl_peer.

Yes.  The change below makes sense to me.

> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> index 884d9aebce91..a384a866a562 100644
> --- a/tools/perf/util/mem-events.c
> +++ b/tools/perf/util/mem-events.c
> @@ -680,7 +680,10 @@ do {                               \
>                         if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
>                         if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
>                         if (lvl & P(LVL, L2)) {
> -                               stats->ld_l2hit++;
> +                               if (snoop & P(SNOOP, HITM))
> +                                       HITM_INC(lcl_hitm);
> +                               else
> +                                       stats->ld_l2hit++;
> 
>                                 if (snoopx & P(SNOOPX, PEER))
>                                         PEER_INC(lcl_peer);

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-04-24 12:48 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-08 12:28 [PATCH] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 Yicong Yang
2025-04-22  9:01 ` Yicong Yang
2025-04-22  9:16 ` James Clark
2025-04-22 10:11   ` Yicong Yang
2025-04-22 10:29   ` Leo Yan
2025-04-22 12:31     ` Yicong Yang
2025-04-22 13:20       ` Leo Yan
2025-04-23  7:57         ` Yicong Yang
2025-04-24 11:57           ` Yicong Yang
2025-04-24 12:48             ` Leo Yan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).