* [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
@ 2024-10-24 23:30 Ilkka Koskinen
2024-10-25 8:35 ` James Clark
2024-10-25 14:43 ` Leo Yan
0 siblings, 2 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-24 23:30 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
James Clark, Mike Leach, Leo Yan
Cc: Ilkka Koskinen, linux-perf-users, linux-kernel, linux-arm-kernel
Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
.../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
2 files changed, 70 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1443c28545a9..e4115b1e92b2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
ARM_SPE_NV_DRAM = 0xe,
};
+enum arm_spe_ampereone_data_source {
+ ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
+ ARM_SPE_AMPEREONE_SLC = 0x3,
+ ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
+ ARM_SPE_AMPEREONE_DDR = 0x7,
+ ARM_SPE_AMPEREONE_L1D = 0x8,
+ ARM_SPE_AMPEREONE_L2D = 0x9,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 138ffc71b32d..04bd21ad7ea8 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
+static const struct midr_range ampereone_source_spe[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src,
+ u64 midr)
+{
+ if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
+ arm_spe__synth_data_source_generic(record, data_src);
+ return;
+ }
+
+ if (record->op & ARM_SPE_OP_ST) {
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+ return;
+ }
+
+ switch (record->source) {
+ case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_AMPEREONE_SLC:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ break;
+ case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_AMPEREONE_DDR:
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_AMPEREONE_L1D:
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_AMPEREONE_L2D:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ default:
+ break;
+ }
+}
+
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+ bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
if (is_neoverse)
arm_spe__synth_data_source_neoverse(record, &data_src);
+ else if (is_ampereone)
+ arm_spe__synth_data_source_ampereone(record, &data_src, midr);
else
arm_spe__synth_data_source_generic(record, &data_src);
--
2.47.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
@ 2024-10-25 8:35 ` James Clark
2024-10-29 2:06 ` Ilkka Koskinen
2024-10-25 14:43 ` Leo Yan
1 sibling, 1 reply; 5+ messages in thread
From: James Clark @ 2024-10-25 8:35 UTC (permalink / raw)
To: Ilkka Koskinen
Cc: linux-perf-users, linux-kernel, linux-arm-kernel, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, John Garry, Will Deacon, Mike Leach, Leo Yan
On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
> 2 files changed, 70 insertions(+)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
> ARM_SPE_NV_DRAM = 0xe,
> };
>
> +enum arm_spe_ampereone_data_source {
> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
> + ARM_SPE_AMPEREONE_SLC = 0x3,
> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
> + ARM_SPE_AMPEREONE_DDR = 0x7,
> + ARM_SPE_AMPEREONE_L1D = 0x8,
> + ARM_SPE_AMPEREONE_L2D = 0x9,
> +};
> +
> struct arm_spe_record {
> enum arm_spe_sample_type type;
> int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
> }
>
> +static const struct midr_range ampereone_source_spe[] = {
> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> + {},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> + union perf_mem_data_src *data_src,
> + u64 midr)
> +{
> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> + arm_spe__synth_data_source_generic(record, data_src);
> + return;
> + }
[...]
> static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
> {
> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>
Hi Ilkka,
I think this read_cpuid_implementor() is for the device that's running
report, rather than record. You need to use the midr that's saved into
the file.
But it looks like you've done that for is_midr_in_range_list(midr,
ampereone_source_spe) above. Is it possible to just do that and not
read_cpuid_implementor() and then it's done the same way as neoverse and
also works off target?
Thanks
James
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
2024-10-25 8:35 ` James Clark
@ 2024-10-25 14:43 ` Leo Yan
2024-10-29 2:28 ` Ilkka Koskinen
1 sibling, 1 reply; 5+ messages in thread
From: Leo Yan @ 2024-10-25 14:43 UTC (permalink / raw)
To: Ilkka Koskinen
Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
James Clark, Mike Leach, Leo Yan, linux-perf-users, linux-kernel,
linux-arm-kernel
On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
>
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
> 2 files changed, 70 insertions(+)
>
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
> ARM_SPE_NV_DRAM = 0xe,
> };
>
> +enum arm_spe_ampereone_data_source {
> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
> + ARM_SPE_AMPEREONE_SLC = 0x3,
> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
> + ARM_SPE_AMPEREONE_DDR = 0x7,
> + ARM_SPE_AMPEREONE_L1D = 0x8,
> + ARM_SPE_AMPEREONE_L2D = 0x9,
> +};
> +
> struct arm_spe_record {
> enum arm_spe_sample_type type;
> int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
> }
>
> +static const struct midr_range ampereone_source_spe[] = {
> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> + {},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> + union perf_mem_data_src *data_src,
> + u64 midr)
> +{
> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> + arm_spe__synth_data_source_generic(record, data_src);
> + return;
> + }
With James' suggestion, I don't think here need to check the CPU
variant again. All generic data source generating should run in the
arm_spe__synth_data_source() function.
> +
> + if (record->op & ARM_SPE_OP_ST) {
> + data_src->mem_lvl = PERF_MEM_LVL_NA;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NA;
> + return;
> + }
> +
> + switch (record->source) {
> + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_AMPEREONE_SLC:
> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> + data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
> + break;
> + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> + break;
> + case ARM_SPE_AMPEREONE_DDR:
> + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_AMPEREONE_L1D:
> + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
> + case ARM_SPE_AMPEREONE_L2D:
> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> + break;
We have another way to do this. If convert the SoC specific data source
to common data source values, e.g.
ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
...
Then we don't need to maintain two functions with almost same setting.
I have no strong opinion for this. A dedicated function for Ampere CPU
might give a bit flexiblity for later tweaking. It is up to you.
Last thing, please work on the the latest perf-tools-next branch:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
branch: perf-tools-next
Recently we have Arm SPE data source refactoring, please rebase on it.
Thanks,
Leo
> + default:
> + break;
> + }
> +}
> +
> static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
> {
> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>
> if (record->op & ARM_SPE_OP_LD)
> data_src.mem_op = PERF_MEM_OP_LOAD;
> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
>
> if (is_neoverse)
> arm_spe__synth_data_source_neoverse(record, &data_src);
> + else if (is_ampereone)
> + arm_spe__synth_data_source_ampereone(record, &data_src, midr);
> else
> arm_spe__synth_data_source_generic(record, &data_src);
>
> --
> 2.47.0
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
2024-10-25 8:35 ` James Clark
@ 2024-10-29 2:06 ` Ilkka Koskinen
0 siblings, 0 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-29 2:06 UTC (permalink / raw)
To: James Clark
Cc: Ilkka Koskinen, linux-perf-users, linux-kernel, linux-arm-kernel,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, Liang, Kan, John Garry, Will Deacon,
Mike Leach, Leo Yan
Hi James,
On Fri, 25 Oct 2024, James Clark wrote:
> On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
>> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
>> 2 files changed, 70 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>> ARM_SPE_NV_DRAM = 0xe,
>> };
>> +enum arm_spe_ampereone_data_source {
>> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
>> + ARM_SPE_AMPEREONE_SLC = 0x3,
>> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
>> + ARM_SPE_AMPEREONE_DDR = 0x7,
>> + ARM_SPE_AMPEREONE_L1D = 0x8,
>> + ARM_SPE_AMPEREONE_L2D = 0x9,
>> +};
>> +
>> struct arm_spe_record {
>> enum arm_spe_sample_type type;
>> int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const
>> struct arm_spe_record *reco
>> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>> }
>> +static const struct midr_range ampereone_source_spe[] = {
>> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> + {},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct
>> arm_spe_record *record,
>> + union perf_mem_data_src
>> *data_src,
>> + u64 midr)
>> +{
>> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> + arm_spe__synth_data_source_generic(record, data_src);
>> + return;
>> + }
> [...]
>> static u64 arm_spe__synth_data_source(const struct arm_spe_record
>> *record, u64 midr)
>> {
>> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>>
>
> Hi Ilkka,
>
> I think this read_cpuid_implementor() is for the device that's running
> report, rather than record. You need to use the midr that's saved into the
> file.
Uh, that didn't come to my mind at all.
> But it looks like you've done that for is_midr_in_range_list(midr,
> ampereone_source_spe) above. Is it possible to just do that and not
> read_cpuid_implementor() and then it's done the same way as neoverse and also
> works off target?
You're right, it's wrong and there's no even need for separate implementor
id check. I fix it.
Cheers, Ilkka
>
> Thanks
>
> James
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
2024-10-25 14:43 ` Leo Yan
@ 2024-10-29 2:28 ` Ilkka Koskinen
0 siblings, 0 replies; 5+ messages in thread
From: Ilkka Koskinen @ 2024-10-29 2:28 UTC (permalink / raw)
To: Leo Yan
Cc: Ilkka Koskinen, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, John Garry, Will Deacon, James Clark, Mike Leach,
Leo Yan, linux-perf-users, linux-kernel, linux-arm-kernel
Hi Leo,
On Fri, 25 Oct 2024, Leo Yan wrote:
> On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
>>
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
>> tools/perf/util/arm-spe.c | 61 +++++++++++++++++++
>> 2 files changed, 70 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>> ARM_SPE_NV_DRAM = 0xe,
>> };
>>
>> +enum arm_spe_ampereone_data_source {
>> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
>> + ARM_SPE_AMPEREONE_SLC = 0x3,
>> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
>> + ARM_SPE_AMPEREONE_DDR = 0x7,
>> + ARM_SPE_AMPEREONE_L1D = 0x8,
>> + ARM_SPE_AMPEREONE_L2D = 0x9,
>> +};
>> +
>> struct arm_spe_record {
>> enum arm_spe_sample_type type;
>> int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>> data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>> }
>>
>> +static const struct midr_range ampereone_source_spe[] = {
>> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> + {},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
>> + union perf_mem_data_src *data_src,
>> + u64 midr)
>> +{
>> + if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> + arm_spe__synth_data_source_generic(record, data_src);
>> + return;
>> + }
>
> With James' suggestion, I don't think here need to check the CPU
> variant again. All generic data source generating should run in the
> arm_spe__synth_data_source() function.
Yep, checking the implementor ID was just wrong and unnecessary.
I fix that.
>
>> +
>> + if (record->op & ARM_SPE_OP_ST) {
>> + data_src->mem_lvl = PERF_MEM_LVL_NA;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NA;
>> + return;
>> + }
>> +
>> + switch (record->source) {
>> + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
>> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_AMPEREONE_SLC:
>> + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
>> + break;
>> + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
>> + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>> + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>> + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> + break;
>> + case ARM_SPE_AMPEREONE_DDR:
>> + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + break;
>> + case ARM_SPE_AMPEREONE_L1D:
>> + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + break;
>> + case ARM_SPE_AMPEREONE_L2D:
>> + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> + data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> + break;
>
> We have another way to do this. If convert the SoC specific data source
> to common data source values, e.g.
>
> ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
> ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
> ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
> ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
> ...
>
> Then we don't need to maintain two functions with almost same setting.
>
> I have no strong opinion for this. A dedicated function for Ampere CPU
> might give a bit flexiblity for later tweaking. It is up to you.
Let me think about it and see how it would look like.
>
> Last thing, please work on the the latest perf-tools-next branch:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
> branch: perf-tools-next
>
> Recently we have Arm SPE data source refactoring, please rebase on it.
Uh, for some reason I rebased it on top of Will's arm64 tree. Sorry about
that.
Cheers, Ilkka
>
> Thanks,
> Leo
>
>> + default:
>> + break;
>> + }
>> +}
>> +
>> static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>> {
>> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>> bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> + bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>>
>> if (record->op & ARM_SPE_OP_LD)
>> data_src.mem_op = PERF_MEM_OP_LOAD;
>> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
>>
>> if (is_neoverse)
>> arm_spe__synth_data_source_neoverse(record, &data_src);
>> + else if (is_ampereone)
>> + arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>> else
>> arm_spe__synth_data_source_generic(record, &data_src);
>>
>> --
>> 2.47.0
>>
>>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-10-29 2:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-24 23:30 [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne Ilkka Koskinen
2024-10-25 8:35 ` James Clark
2024-10-29 2:06 ` Ilkka Koskinen
2024-10-25 14:43 ` Leo Yan
2024-10-29 2:28 ` Ilkka Koskinen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).