From: Ali Saidi <alisaidi@amazon.com>
To: <linux-kernel@vger.kernel.org>,
<linux-perf-users@vger.kernel.org>,
<linux-arm-kernel@lists.infradead.org>, <german.gomez@arm.com>,
<leo.yan@linaro.org>
Cc: <alisaidi@amazon.com>, <benh@kernel.crashing.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
"Arnaldo Carvalho de Melo" <acme@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
"Alexander Shishkin" <alexander.shishkin@linux.intel.com>,
Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
John Garry <john.garry@huawei.com>,
"Will Deacon" <will@kernel.org>,
Mathieu Poirier <mathieu.poirier@linaro.org>,
"James Clark" <james.clark@arm.com>,
Andrew Kilroy <andrew.kilroy@arm.com>,
Jin Yao <yao.jin@linux.intel.com>,
Kajol Jain <kjain@linux.ibm.com>,
Li Huafei <lihuafei1@huawei.com>
Subject: [PATCH v2 1/2] perf arm-spe: Use SPE data source for neoverse cores
Date: Mon, 21 Feb 2022 22:47:59 +0000 [thread overview]
Message-ID: <20220221224807.18172-1-alisaidi@amazon.com> (raw)
When synthesizing data from SPE, augment the type with source information
for Arm Neoverse cores. The field is IMPLDEF but the Neoverse cores all use
the same encoding. I can't find encoding information for any other SPE
implementations to unify their choices with Arm's thus that is left for
future work.
This change populates the mem_lvl_num for Neoverse cores instead of the
deprecated mem_lvl namespace.
Signed-off-by: Ali Saidi <alisaidi@amazon.com>
---
.../util/arm-spe-decoder/arm-spe-decoder.c | 1 +
.../util/arm-spe-decoder/arm-spe-decoder.h | 12 ++
tools/perf/util/arm-spe.c | 106 +++++++++++++++---
3 files changed, 104 insertions(+), 15 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 5e390a1a79ab..091987dd3966 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case ARM_SPE_DATA_SOURCE:
+ decoder->record.source = payload;
break;
case ARM_SPE_BAD:
break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 69b31084d6be..c81bf90c0996 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -29,6 +29,17 @@ enum arm_spe_op_type {
ARM_SPE_ST = 1 << 1,
};
+enum arm_spe_neoverse_data_source {
+ ARM_SPE_NV_L1D = 0x0,
+ ARM_SPE_NV_L2 = 0x8,
+ ARM_SPE_NV_PEER_CORE = 0x9,
+ ARM_SPE_NV_LCL_CLSTR = 0xa,
+ ARM_SPE_NV_SYS_CACHE = 0xb,
+ ARM_SPE_NV_PEER_CLSTR = 0xc,
+ ARM_SPE_NV_REMOTE = 0xd,
+ ARM_SPE_NV_DRAM = 0xe,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
@@ -40,6 +51,7 @@ struct arm_spe_record {
u64 virt_addr;
u64 phys_addr;
u64 context_id;
+ u16 source;
};
struct arm_spe_insn;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index d2b64e3f588b..e0243c2fed5f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -34,6 +34,7 @@
#include "arm-spe-decoder/arm-spe-decoder.h"
#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+#include <../../../arch/arm64/include/asm/cputype.h>
#define MAX_TIMESTAMP (~0ULL)
struct arm_spe {
@@ -45,6 +46,7 @@ struct arm_spe {
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
+ u64 midr;
struct perf_tsc_conversion tc;
@@ -399,33 +401,103 @@ static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
return false;
}
-static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
-{
- union perf_mem_data_src data_src = { 0 };
+static const struct midr_range neoverse_spe[] = {
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ {},
+};
- if (record->op == ARM_SPE_LD)
- data_src.mem_op = PERF_MEM_OP_LOAD;
- else
- data_src.mem_op = PERF_MEM_OP_STORE;
+static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ switch (record->source) {
+ case ARM_SPE_NV_L1D:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ break;
+ case ARM_SPE_NV_L2:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ break;
+ case ARM_SPE_NV_PEER_CORE:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ break;
+ /*
+ * We don't know if this is L1, L2, or even L3 (for the cases the system
+ * has an L3, but we do know it was a cache-2-cache transfer, so set
+ * SNOOP_HITM
+ */
+ case ARM_SPE_NV_LCL_CLSTR:
+ case ARM_SPE_NV_PEER_CLSTR:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ break;
+ /*
+ * System cache is assumed to be L4, as cluster cache (if it exists)
+ * would be L3 cache on Neoverse platforms
+ */
+ case ARM_SPE_NV_SYS_CACHE:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L4;
+ break;
+ /*
+ * We don't know what level it hit in, except it came from the other
+ * socket
+ */
+ case ARM_SPE_NV_REMOTE:
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ break;
+ case ARM_SPE_NV_DRAM:
+ data_src->mem_lvl = PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ break;
+ default:
+ break;
+ }
+}
+
+static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
- data_src.mem_lvl = PERF_MEM_LVL_L3;
+ data_src->mem_lvl = PERF_MEM_LVL_L3;
if (record->type & ARM_SPE_LLC_MISS)
- data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+ data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else
- data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+ data_src->mem_lvl |= PERF_MEM_LVL_HIT;
} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
- data_src.mem_lvl = PERF_MEM_LVL_L1;
+ data_src->mem_lvl = PERF_MEM_LVL_L1;
if (record->type & ARM_SPE_L1D_MISS)
- data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+ data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else
- data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+ data_src->mem_lvl |= PERF_MEM_LVL_HIT;
}
if (record->type & ARM_SPE_REMOTE_ACCESS)
- data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+ data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+}
+
+static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
+{
+ union perf_mem_data_src data_src = { 0 };
+ bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
+
+ if (record->op & ARM_SPE_LD)
+ data_src.mem_op = PERF_MEM_OP_LOAD;
+ else
+ data_src.mem_op = PERF_MEM_OP_STORE;
+
+ if (is_neoverse)
+ arm_spe__synth_data_source_neoverse(record, &data_src);
+ else
+ arm_spe__synth_data_source_generic(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK;
@@ -446,7 +518,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
u64 data_src;
int err;
- data_src = arm_spe__synth_data_source(record);
+ data_src = arm_spe__synth_data_source(record, spe->midr);
if (spe->sample_flc) {
if (record->type & ARM_SPE_L1D_MISS) {
@@ -796,6 +868,10 @@ static int arm_spe_process_event(struct perf_session *session,
u64 timestamp;
struct arm_spe *spe = container_of(session->auxtrace,
struct arm_spe, auxtrace);
+ const char *cpuid = perf_env__cpuid(session->evlist->env);
+ u64 midr = strtol(cpuid, NULL, 16);
+
+ spe->midr = midr;
if (dump_trace)
return 0;
--
2.32.0
next reply other threads:[~2022-02-21 22:48 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-21 22:47 Ali Saidi [this message]
2022-02-21 22:48 ` [PATCH v2 2/2] perf mem: Support HITM for when mem_lvl_num is used Ali Saidi
2022-03-02 15:39 ` German Gomez
2022-03-13 12:44 ` Leo Yan
2022-03-13 19:19 ` Ali Saidi
2022-03-14 6:33 ` Leo Yan
2022-03-14 18:00 ` German Gomez
2022-03-14 18:37 ` Ali Saidi
2022-03-15 18:44 ` German Gomez
2022-03-16 11:43 ` German Gomez
2022-03-16 12:42 ` Leo Yan
2022-03-16 15:10 ` German Gomez
2022-03-02 11:59 ` [PATCH v2 1/2] perf arm-spe: Use SPE data source for neoverse cores German Gomez
2022-03-13 11:46 ` Leo Yan
2022-03-13 19:06 ` Ali Saidi
2022-03-14 4:05 ` Leo Yan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220221224807.18172-1-alisaidi@amazon.com \
--to=alisaidi@amazon.com \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=andrew.kilroy@arm.com \
--cc=benh@kernel.crashing.org \
--cc=german.gomez@arm.com \
--cc=james.clark@arm.com \
--cc=john.garry@huawei.com \
--cc=jolsa@redhat.com \
--cc=kjain@linux.ibm.com \
--cc=leo.yan@linaro.org \
--cc=lihuafei1@huawei.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mathieu.poirier@linaro.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=will@kernel.org \
--cc=yao.jin@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).