linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
@ 2024-10-24 23:30 Ilkka Koskinen
  2024-10-25  8:35 ` James Clark
  2024-10-25 14:43 ` Leo Yan
  0 siblings, 2 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-24 23:30 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
	James Clark, Mike Leach, Leo Yan
  Cc: Ilkka Koskinen, linux-perf-users, linux-kernel, linux-arm-kernel

Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
 tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1443c28545a9..e4115b1e92b2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
 	ARM_SPE_NV_DRAM		 = 0xe,
 };
 
+enum arm_spe_ampereone_data_source {
+	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
+	ARM_SPE_AMPEREONE_SLC				= 0x3,
+	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
+	ARM_SPE_AMPEREONE_DDR				= 0x7,
+	ARM_SPE_AMPEREONE_L1D				= 0x8,
+	ARM_SPE_AMPEREONE_L2D				= 0x9,
+};
+
 struct arm_spe_record {
 	enum arm_spe_sample_type type;
 	int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 138ffc71b32d..04bd21ad7ea8 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 }
 
+static const struct midr_range ampereone_source_spe[] = {
+	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+	{},
+};
+
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+						 union perf_mem_data_src *data_src,
+						 u64 midr)
+{
+	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
+		arm_spe__synth_data_source_generic(record, data_src);
+		return;
+	}
+
+	if (record->op & ARM_SPE_OP_ST) {
+		data_src->mem_lvl = PERF_MEM_LVL_NA;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+		return;
+	}
+
+	switch (record->source) {
+	case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_AMPEREONE_SLC:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+		break;
+	case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_AMPEREONE_DDR:
+		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_AMPEREONE_L1D:
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_AMPEREONE_L2D:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	default:
+		break;
+	}
+}
+
 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
 {
 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
 
 	if (record->op & ARM_SPE_OP_LD)
 		data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
 
 	if (is_neoverse)
 		arm_spe__synth_data_source_neoverse(record, &data_src);
+	else if (is_ampereone)
+		arm_spe__synth_data_source_ampereone(record, &data_src, midr);
 	else
 		arm_spe__synth_data_source_generic(record, &data_src);
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
  2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
@ 2024-10-25  8:35 ` James Clark
  2024-10-29  2:06   ` Ilkka Koskinen
  2024-10-25 14:43 ` Leo Yan
  1 sibling, 1 reply; 5+ messages in thread
From: James Clark @ 2024-10-25  8:35 UTC (permalink / raw)
  To: Ilkka Koskinen
  Cc: linux-perf-users, linux-kernel, linux-arm-kernel, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, John Garry, Will Deacon, Mike Leach, Leo Yan



On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>   tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>   2 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>   	ARM_SPE_NV_DRAM		 = 0xe,
>   };
>   
> +enum arm_spe_ampereone_data_source {
> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
> +	ARM_SPE_AMPEREONE_SLC				= 0x3,
> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
> +	ARM_SPE_AMPEREONE_DDR				= 0x7,
> +	ARM_SPE_AMPEREONE_L1D				= 0x8,
> +	ARM_SPE_AMPEREONE_L2D				= 0x9,
> +};
> +
>   struct arm_spe_record {
>   	enum arm_spe_sample_type type;
>   	int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>   		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>   }
>   
> +static const struct midr_range ampereone_source_spe[] = {
> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +	{},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +						 union perf_mem_data_src *data_src,
> +						 u64 midr)
> +{
> +	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> +		arm_spe__synth_data_source_generic(record, data_src);
> +		return;
> +	}
[...]
>   static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>   {
>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>   	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> +	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>   

Hi Ilkka,

I think this read_cpuid_implementor() is for the device that's running 
report, rather than record. You need to use the midr that's saved into 
the file.

But it looks like you've done that for is_midr_in_range_list(midr, 
ampereone_source_spe) above. Is it possible to just do that and not 
read_cpuid_implementor() and then it's done the same way as neoverse and 
also works off target?

Thanks

James


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
  2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
  2024-10-25  8:35 ` James Clark
@ 2024-10-25 14:43 ` Leo Yan
  2024-10-29  2:28   ` Ilkka Koskinen
  1 sibling, 1 reply; 5+ messages in thread
From: Leo Yan @ 2024-10-25 14:43 UTC (permalink / raw)
  To: Ilkka Koskinen
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
	James Clark, Mike Leach, Leo Yan, linux-perf-users, linux-kernel,
	linux-arm-kernel

On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
> 
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>  tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>  2 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>         ARM_SPE_NV_DRAM          = 0xe,
>  };
> 
> +enum arm_spe_ampereone_data_source {
> +       ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
> +       ARM_SPE_AMPEREONE_SLC                           = 0x3,
> +       ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
> +       ARM_SPE_AMPEREONE_DDR                           = 0x7,
> +       ARM_SPE_AMPEREONE_L1D                           = 0x8,
> +       ARM_SPE_AMPEREONE_L2D                           = 0x9,
> +};
> +
>  struct arm_spe_record {
>         enum arm_spe_sample_type type;
>         int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>  }
> 
> +static const struct midr_range ampereone_source_spe[] = {
> +       MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +       {},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +                                                union perf_mem_data_src *data_src,
> +                                                u64 midr)
> +{
> +       if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> +               arm_spe__synth_data_source_generic(record, data_src);
> +               return;
> +       }

With James' suggestion, I don't think here need to check the CPU
variant again.  All generic data source generating should run in the 
arm_spe__synth_data_source() function.

> +
> +       if (record->op & ARM_SPE_OP_ST) {
> +               data_src->mem_lvl = PERF_MEM_LVL_NA;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
> +               return;
> +       }
> +
> +       switch (record->source) {
> +       case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_SLC:
> +               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
> +               break;
> +       case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
> +               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_DDR:
> +               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L1D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L2D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;

We have another way to do this.  If convert the SoC specific data source
to common data source values, e.g.

  ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
  ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
  ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
  ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
  ...

Then we don't need to maintain two functions with almost same setting.

I have no strong opinion for this. A dedicated function for Ampere CPU
might give a bit flexiblity for later tweaking. It is up to you.

Last thing, please work on the the latest perf-tools-next branch:

  https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
  branch: perf-tools-next

Recently we have Arm SPE data source refactoring, please rebase on it.

Thanks,
Leo

> +       default:
> +               break;
> +       }
> +}
> +
>  static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>  {
>         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>         bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> +       bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
> 
>         if (record->op & ARM_SPE_OP_LD)
>                 data_src.mem_op = PERF_MEM_OP_LOAD;
> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
> 
>         if (is_neoverse)
>                 arm_spe__synth_data_source_neoverse(record, &data_src);
> +       else if (is_ampereone)
> +               arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>         else
>                 arm_spe__synth_data_source_generic(record, &data_src);
> 
> --
> 2.47.0
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
  2024-10-25  8:35 ` James Clark
@ 2024-10-29  2:06   ` Ilkka Koskinen
  0 siblings, 0 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-29  2:06 UTC (permalink / raw)
  To: James Clark
  Cc: Ilkka Koskinen, linux-perf-users, linux-kernel, linux-arm-kernel,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
	Mike Leach, Leo Yan



Hi James,

On Fri, 25 Oct 2024, James Clark wrote:
> On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>> 
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>>   tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>>   2 files changed, 70 insertions(+)
>> 
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h 
>> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>>   	ARM_SPE_NV_DRAM		 = 0xe,
>>   };
>>   +enum arm_spe_ampereone_data_source {
>> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
>> +	ARM_SPE_AMPEREONE_SLC				= 0x3,
>> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
>> +	ARM_SPE_AMPEREONE_DDR				= 0x7,
>> +	ARM_SPE_AMPEREONE_L1D				= 0x8,
>> +	ARM_SPE_AMPEREONE_L2D				= 0x9,
>> +};
>> +
>>   struct arm_spe_record {
>>   	enum arm_spe_sample_type type;
>>   	int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const 
>> struct arm_spe_record *reco
>>   		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>>   }
>>   +static const struct midr_range ampereone_source_spe[] = {
>> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> +	{},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct 
>> arm_spe_record *record,
>> +						 union perf_mem_data_src 
>> *data_src,
>> +						 u64 midr)
>> +{
>> +	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> +		arm_spe__synth_data_source_generic(record, data_src);
>> +		return;
>> +	}
> [...]
>>   static u64 arm_spe__synth_data_source(const struct arm_spe_record 
>> *record, u64 midr)
>>   {
>>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>>   	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> +	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>> 
>
> Hi Ilkka,
>
> I think this read_cpuid_implementor() is for the device that's running 
> report, rather than record. You need to use the midr that's saved into the 
> file.

Uh, that didn't come to my mind at all.

> But it looks like you've done that for is_midr_in_range_list(midr, 
> ampereone_source_spe) above. Is it possible to just do that and not 
> read_cpuid_implementor() and then it's done the same way as neoverse and also 
> works off target?

You're right, it's wrong and there's no even need for separate implementor 
id check. I fix it.

Cheers, Ilkka

>
> Thanks
>
> James
>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
  2024-10-25 14:43 ` Leo Yan
@ 2024-10-29  2:28   ` Ilkka Koskinen
  0 siblings, 0 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-29  2:28 UTC (permalink / raw)
  To: Leo Yan
  Cc: Ilkka Koskinen, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, John Garry, Will Deacon, James Clark, Mike Leach,
	Leo Yan, linux-perf-users, linux-kernel, linux-arm-kernel


Hi Leo,

On Fri, 25 Oct 2024, Leo Yan wrote:
> On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
>>
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>>  tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>>  2 files changed, 70 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>>         ARM_SPE_NV_DRAM          = 0xe,
>>  };
>>
>> +enum arm_spe_ampereone_data_source {
>> +       ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
>> +       ARM_SPE_AMPEREONE_SLC                           = 0x3,
>> +       ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
>> +       ARM_SPE_AMPEREONE_DDR                           = 0x7,
>> +       ARM_SPE_AMPEREONE_L1D                           = 0x8,
>> +       ARM_SPE_AMPEREONE_L2D                           = 0x9,
>> +};
>> +
>>  struct arm_spe_record {
>>         enum arm_spe_sample_type type;
>>         int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>>                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>>  }
>>
>> +static const struct midr_range ampereone_source_spe[] = {
>> +       MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> +       {},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
>> +                                                union perf_mem_data_src *data_src,
>> +                                                u64 midr)
>> +{
>> +       if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> +               arm_spe__synth_data_source_generic(record, data_src);
>> +               return;
>> +       }
>
> With James' suggestion, I don't think here need to check the CPU
> variant again.  All generic data source generating should run in the
> arm_spe__synth_data_source() function.

Yep, checking the implementor ID was just wrong and unnecessary.
I fix that.

>
>> +
>> +       if (record->op & ARM_SPE_OP_ST) {
>> +               data_src->mem_lvl = PERF_MEM_LVL_NA;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
>> +               return;
>> +       }
>> +
>> +       switch (record->source) {
>> +       case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_SLC:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
>> +               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>> +               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_DDR:
>> +               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_L1D:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_L2D:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>
> We have another way to do this.  If convert the SoC specific data source
> to common data source values, e.g.
>
>  ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
>  ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
>  ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
>  ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
>  ...
>
> Then we don't need to maintain two functions with almost same setting.
>
> I have no strong opinion for this. A dedicated function for Ampere CPU
> might give a bit flexiblity for later tweaking. It is up to you.

Let me think about it and see how it would look like.

>
> Last thing, please work on the the latest perf-tools-next branch:
>
>  https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
>  branch: perf-tools-next
>
> Recently we have Arm SPE data source refactoring, please rebase on it.

Uh, for some reason I rebased it on top of Will's arm64 tree. Sorry about 
that.

Cheers, Ilkka

>
> Thanks,
> Leo
>
>> +       default:
>> +               break;
>> +       }
>> +}
>> +
>>  static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>>  {
>>         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>>         bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> +       bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>>
>>         if (record->op & ARM_SPE_OP_LD)
>>                 data_src.mem_op = PERF_MEM_OP_LOAD;
>> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
>>
>>         if (is_neoverse)
>>                 arm_spe__synth_data_source_neoverse(record, &data_src);
>> +       else if (is_ampereone)
>> +               arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>>         else
>>                 arm_spe__synth_data_source_generic(record, &data_src);
>>
>> --
>> 2.47.0
>>
>>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-10-29  2:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
2024-10-25  8:35 ` James Clark
2024-10-29  2:06   ` Ilkka Koskinen
2024-10-25 14:43 ` Leo Yan
2024-10-29  2:28   ` Ilkka Koskinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).