* [PATCH v2 0/8] perf/arm-cmn: Fixes and updates
@ 2024-09-02 17:51 Robin Murphy
2024-09-02 17:51 ` [PATCH v2 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
` (9 more replies)
0 siblings, 10 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:51 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
v1: https://lore.kernel.org/linux-arm-kernel/cover.1723229941.git.robin.murphy@arm.com/
Hi all,
Here's v2 addressing Mark and Ilkka's comments. I hope I've added
sufficient clarification to patch #5, but at worst I'd be OK with
landing the rest while that discussion continues.
Thanks,
Robin.
Robin Murphy (8):
perf/arm-cmn: Refactor node ID handling. Again.
perf/arm-cmn: Fix CCLA register offset
perf/arm-cmn: Ensure dtm_idx is big enough
perf/arm-cmn: Improve build-time assertion
perf/arm-cmn: Make cycle counts less surprising
perf/arm-cmn: Refactor DTC PMU register access
dt-bindings: perf: arm-cmn: Add CMN S3
perf/arm-cmn: Support CMN S3
.../devicetree/bindings/perf/arm,cmn.yaml | 1 +
drivers/perf/arm-cmn.c | 308 ++++++++++--------
2 files changed, 175 insertions(+), 134 deletions(-)
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 1/8] perf/arm-cmn: Refactor node ID handling. Again.
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
@ 2024-09-02 17:51 ` Robin Murphy
2024-09-02 17:51 ` [PATCH v2 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:51 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
The scope of the "extra device ports" configuration is not made clear by
the CMN documentation - so far we've assumed it applies globally, based
on the sole example which suggests as much. However it transpires that
this is incorrect, and the format does in fact vary based on each
individual XP's port configuration. As a consequence, we're currenly
liable to decode the port/device indices from a node ID incorrectly,
thus program the wrong event source in the DTM leading to bogus event
counts, and also show device topology on the wrong ports in debugfs.
To put this right, rework node IDs yet again to carry around the
additional data necessary to decode them properly per-XP. At this point
the notion of fully decomposing an ID becomes more impractical than it's
worth, so unabstracting the XY mesh coordinates (where 2/3 users were
just debug anyway) ends up leaving things a bit simpler overall.
Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Fix bitshift typo, expand commit messgae
---
drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
1 file changed, 40 insertions(+), 54 deletions(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index c932d9d355cf..b59ae8513dce 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -24,14 +24,6 @@
#define CMN_NI_NODE_ID GENMASK_ULL(31, 16)
#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32)
-#define CMN_NODEID_DEVID(reg) ((reg) & 3)
-#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1)
-#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1)
-#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3)
-#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7)
-#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits)))
-#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1))
-
#define CMN_CHILD_INFO 0x0080
#define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0)
#define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16)
@@ -280,8 +272,11 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
+ /* XP properties really, but replicated to children for convenience */
u8 dtm;
s8 dtc;
+ u8 portid_bits:4;
+ u8 deviceid_bits:4;
/* DN/HN-F/CXHA */
struct {
u8 val : 4;
@@ -357,49 +352,33 @@ struct arm_cmn {
static int arm_cmn_hp_state;
struct arm_cmn_nodeid {
- u8 x;
- u8 y;
u8 port;
u8 dev;
};
static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
{
- return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
+ return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
}
-static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
{
struct arm_cmn_nodeid nid;
- if (cmn->num_xps == 1) {
- nid.x = 0;
- nid.y = 0;
- nid.port = CMN_NODEID_1x1_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- } else {
- int bits = arm_cmn_xyidbits(cmn);
-
- nid.x = CMN_NODEID_X(id, bits);
- nid.y = CMN_NODEID_Y(id, bits);
- if (cmn->ports_used & 0xc) {
- nid.port = CMN_NODEID_EXT_PID(id);
- nid.dev = CMN_NODEID_EXT_DEVID(id);
- } else {
- nid.port = CMN_NODEID_PID(id);
- nid.dev = CMN_NODEID_DEVID(id);
- }
- }
+ nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
+ nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
return nid;
}
static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
const struct arm_cmn_node *dn)
{
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
- int xp_idx = cmn->mesh_x * nid.y + nid.x;
+ int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
+ int bits = arm_cmn_xyidbits(cmn);
+ int x = id >> bits;
+ int y = id & ((1U << bits) - 1);
- return cmn->xps + xp_idx;
+ return cmn->xps + cmn->mesh_x * y + x;
}
static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
enum cmn_node_type type)
@@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
}
}
-static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
{
struct arm_cmn *cmn = s->private;
struct arm_cmn_node *dn;
+ u16 id = xp->id | d | (p << xp->deviceid_bits);
for (dn = cmn->dns; dn->type; dn++) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
int pad = dn->logid < 10;
if (dn->type == CMN_TYPE_XP)
@@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
if (dn->type < CMN_TYPE_HNI)
continue;
- if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+ if (dn->id != id)
continue;
seq_printf(s, " %*c#%-*d |", pad + 1, ' ', 3 - pad, dn->logid);
@@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
y = cmn->mesh_y;
while (y--) {
int xp_base = cmn->mesh_x * y;
+ struct arm_cmn_node *xp = cmn->xps + xp_base;
u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
for (x = 0; x < cmn->mesh_x; x++)
@@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_printf(s, "\n%-2d |", y);
for (x = 0; x < cmn->mesh_x; x++) {
- struct arm_cmn_node *xp = cmn->xps + xp_base + x;
-
for (p = 0; p < CMN_MAX_PORTS; p++)
- port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
+ port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
seq_printf(s, " XP #%-3d|", xp_base + x);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++) {
- s8 dtc = cmn->xps[xp_base + x].dtc;
+ s8 dtc = xp[x].dtc;
if (dtc < 0)
seq_puts(s, " DTC ?? |");
@@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, arm_cmn_device_type(port[p][x]));
seq_puts(s, "\n 0|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 0);
+ arm_cmn_show_logid(s, xp + x, p, 0);
seq_puts(s, "\n 1|");
for (x = 0; x < cmn->mesh_x; x++)
- arm_cmn_show_logid(s, x, y, p, 1);
+ arm_cmn_show_logid(s, xp + x, p, 1);
}
seq_puts(s, "\n-----+");
}
@@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
}
if (!hw->num_dns) {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
-
- dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
- nodeid, nid.x, nid.y, nid.port, nid.dev, type);
+ dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
return -EINVAL;
}
@@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
- struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+ struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
if (cmn->multi_dtm)
nid.port %= 2;
@@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
continue;
xp = arm_cmn_node_to_xp(cmn, dn);
+ dn->portid_bits = xp->portid_bits;
+ dn->deviceid_bits = xp->deviceid_bits;
dn->dtc = xp->dtc;
dn->dtm = xp->dtm;
if (cmn->multi_dtm)
- dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+ dn->dtm += arm_cmn_nid(dn).port / 2;
if (dn->type == CMN_TYPE_DTC) {
int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
@@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
arm_cmn_init_dtm(dtm++, xp, 0);
/*
* Keeping track of connected ports will let us filter out
- * unnecessary XP events easily. We can also reliably infer the
- * "extra device ports" configuration for the node ID format
- * from this, since in that case we will see at least one XP
- * with port 2 connected, for the HN-D.
+ * unnecessary XP events easily, and also infer the per-XP
+ * part of the node ID format.
*/
for (int p = 0; p < CMN_MAX_PORTS; p++)
if (arm_cmn_device_connect_info(cmn, xp, p))
xp_ports |= BIT(p);
- if (cmn->multi_dtm && (xp_ports & 0xc))
+ if (cmn->num_xps == 1) {
+ xp->portid_bits = 3;
+ xp->deviceid_bits = 2;
+ } else if (xp_ports > 0x3) {
+ xp->portid_bits = 2;
+ xp->deviceid_bits = 1;
+ } else {
+ xp->portid_bits = 1;
+ xp->deviceid_bits = 2;
+ }
+
+ if (cmn->multi_dtm && (xp_ports > 0x3))
arm_cmn_init_dtm(dtm++, xp, 1);
- if (cmn->multi_dtm && (xp_ports & 0x30))
+ if (cmn->multi_dtm && (xp_ports > 0xf))
arm_cmn_init_dtm(dtm++, xp, 2);
cmn->ports_used |= xp_ports;
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 2/8] perf/arm-cmn: Fix CCLA register offset
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
2024-09-02 17:51 ` [PATCH v2 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
@ 2024-09-02 17:51 ` Robin Murphy
2024-09-02 17:51 ` [PATCH v2 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:51 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just
the CCLA_RNI combination type.
Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Add CCLA-specific definition for clarity
---
drivers/perf/arm-cmn.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b59ae8513dce..4e2338afe669 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -70,7 +70,8 @@
/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
#define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32)
-/* HN-Ps are weird... */
+/* Some types are designed to coexist with another device in the same node */
+#define CMN_CCLA_PMU_EVENT_SEL 0x008
#define CMN_HNP_PMU_EVENT_SEL 0x008
/* DTMs live in the PMU space of XP registers */
@@ -2393,10 +2394,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CXHA:
case CMN_TYPE_CCRA:
case CMN_TYPE_CCHA:
- case CMN_TYPE_CCLA:
case CMN_TYPE_HNS:
dn++;
break;
+ case CMN_TYPE_CCLA:
+ dn->pmu_base += CMN_CCLA_PMU_EVENT_SEL;
+ dn++;
+ break;
/* Nothing to see here */
case CMN_TYPE_MPAM_S:
case CMN_TYPE_MPAM_NS:
@@ -2414,7 +2418,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_HNP:
case CMN_TYPE_CCLA_RNI:
dn[1] = dn[0];
- dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL;
+ dn[0].pmu_base += CMN_CCLA_PMU_EVENT_SEL;
dn[1].type = arm_cmn_subtype(dn->type);
dn += 2;
break;
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
2024-09-02 17:51 ` [PATCH v2 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
2024-09-02 17:51 ` [PATCH v2 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
@ 2024-09-02 17:51 ` Robin Murphy
2024-09-02 17:52 ` [PATCH v2 4/8] perf/arm-cmn: Improve build-time assertion Robin Murphy
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:51 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
out OK in practice. However CMN-700 did finally support up to 144 XPs,
and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
event on a maxed-out config. Oops.
Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Make the calculation more explicit
---
drivers/perf/arm-cmn.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 4e2338afe669..48863b31ccfb 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -35,6 +35,9 @@
#define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
#define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
+/* Currently XPs are the node type we can have most of; others top out at 128 */
+#define CMN_MAX_NODES_PER_EVENT CMN_MAX_XPS
+
/* The CFG node has various info besides the discovery tree */
#define CMN_CFGM_PERIPH_ID_01 0x0008
#define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0)
@@ -564,7 +567,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
- u64 dtm_idx[4];
+ u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 4/8] perf/arm-cmn: Improve build-time assertion
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (2 preceding siblings ...)
2024-09-02 17:51 ` [PATCH v2 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
@ 2024-09-02 17:52 ` Robin Murphy
2024-09-02 17:52 ` [PATCH v2 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:52 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
These days we can use static_assert() in the logical place rather than
jamming a BUILD_BUG_ON() into the nearest function scope.
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Drop now-unnecessary dtm_idx check
---
drivers/perf/arm-cmn.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 48863b31ccfb..5b0edeb69394 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -581,6 +581,7 @@ struct arm_cmn_hw_event {
bool wide_sel;
enum cmn_filter_select filter_sel;
};
+static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
@@ -591,7 +592,6 @@ struct arm_cmn_hw_event {
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
- BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
return (struct arm_cmn_hw_event *)&event->hw;
}
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 5/8] perf/arm-cmn: Make cycle counts less surprising
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (3 preceding siblings ...)
2024-09-02 17:52 ` [PATCH v2 4/8] perf/arm-cmn: Improve build-time assertion Robin Murphy
@ 2024-09-02 17:52 ` Robin Murphy
2024-09-02 17:52 ` [PATCH v2 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:52 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
By default, CMN has automatic clock-gating with the implication that
a DTC's cycle counter may not increment while the DTC is sufficiently
idle. Given that we may have up to 4 DTCs to choose from when scheduling
a cycles event, this may potentially lead to surprising results if
trying to measure metrics based on activity in a different DTC domain
from where cycles end up being counted. Furthermore, since the details
of internal clock gating are not documented, we can't even reason about
what "active" cycles for a DTC actually mean relative to the activity of
other nodes within the same nominal DTC domain.
Make the reasonable assumption that if the user wants to count cycles,
they almost certainly want to count all of the cycles, and disable clock
gating while a DTC's cycle counter is in use.
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Expand commit message
---
drivers/perf/arm-cmn.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 5b0edeb69394..2205c183ec1b 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -119,6 +119,7 @@
/* The DTC node is where the magic happens */
#define CMN_DT_DTC_CTL 0x0a00
#define CMN_DT_DTC_CTL_DT_EN BIT(0)
+#define CMN_DT_DTC_CTL_CG_DISABLE BIT(10)
/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4)
@@ -1546,9 +1547,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = hw->dtc_idx[0];
- writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
- cmn->dtc[i].cc_active = true;
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
+ dtc->base + CMN_DT_DTC_CTL);
+ writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+ dtc->cc_active = true;
} else if (type == CMN_TYPE_WP) {
u64 val = CMN_EVENT_WP_VAL(event);
u64 mask = CMN_EVENT_WP_MASK(event);
@@ -1577,8 +1581,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = hw->dtc_idx[0];
- cmn->dtc[i].cc_active = false;
+ struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+ dtc->cc_active = false;
+ writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
} else if (type == CMN_TYPE_WP) {
for_each_hw_dn(hw, dn, i) {
void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 6/8] perf/arm-cmn: Refactor DTC PMU register access
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (4 preceding siblings ...)
2024-09-02 17:52 ` [PATCH v2 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
@ 2024-09-02 17:52 ` Robin Murphy
2024-09-02 17:52 ` [PATCH v2 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:52 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
Annoyingly, we're soon going to have to cope with PMU registers moving
about. This will mostly be straightforward, except for the hard-coding
of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
those accessors to allow for encapsulating a variable offset without
making a big mess all over. As a bonus, we can repack the arm_cmn_dtc
structure to accommodate the new pointer without growing any larger,
since irq_friend only encodes a range of +/-3.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Mention deliberate struct repack
---
drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
1 file changed, 36 insertions(+), 28 deletions(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 2205c183ec1b..ac7dd4c352e8 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -123,24 +123,24 @@
/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4)
-#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40)
+#define CMN_DT_PMEVCNT(dtc, n) ((dtc)->pmu_base + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR(dtc) ((dtc)->pmu_base + 0x40)
-#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90)
+#define CMN_DT_PMEVCNTSR(dtc, n) ((dtc)->pmu_base + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR(dtc) ((dtc)->pmu_base + 0x90)
-#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR(dtc) ((dtc)->pmu_base + 0x100)
#define CMN_DT_PMCR_PMU_EN BIT(0)
#define CMN_DT_PMCR_CNTR_RST BIT(5)
#define CMN_DT_PMCR_OVFL_INTR_EN BIT(6)
-#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118)
-#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120)
+#define CMN_DT_PMOVSR(dtc) ((dtc)->pmu_base + 0x118)
+#define CMN_DT_PMOVSR_CLR(dtc) ((dtc)->pmu_base + 0x120)
-#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR(dtc) ((dtc)->pmu_base + 0x128)
#define CMN_DT_PMSSR_SS_STATUS(n) BIT(n)
-#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR(dtc) ((dtc)->pmu_base + 0x130)
#define CMN_DT_PMSRR_SS_REQ BIT(0)
#define CMN_DT_NUM_COUNTERS 8
@@ -307,8 +307,9 @@ struct arm_cmn_dtm {
struct arm_cmn_dtc {
void __iomem *base;
+ void __iomem *pmu_base;
int irq;
- int irq_friend;
+ s8 irq_friend;
bool cc_active;
struct perf_event *counters[CMN_DT_NUM_COUNTERS];
@@ -412,10 +413,15 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
};
}
+static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
+{
+ return CMN_PMU_OFFSET;
+}
+
static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
const struct arm_cmn_node *xp, int port)
{
- int offset = CMN_MXP__CONNECT_INFO(port);
+ int offset = CMN_MXP__CONNECT_INFO(port) - arm_cmn_pmu_offset(cmn, xp);
if (port >= 2) {
if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
@@ -428,7 +434,7 @@ static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
offset += CI700_CONNECT_INFO_P2_5_OFFSET;
}
- return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset);
+ return readl_relaxed(xp->pmu_base + offset);
}
static struct dentry *arm_cmn_debugfs;
@@ -1398,7 +1404,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
{
if (!cmn->state)
- writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+ writel_relaxed(0, CMN_DT_PMCR(&cmn->dtc[0]));
cmn->state |= state;
}
@@ -1407,7 +1413,7 @@ static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
cmn->state &= ~state;
if (!cmn->state)
writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
- cmn->dtc[0].base + CMN_DT_PMCR);
+ CMN_DT_PMCR(&cmn->dtc[0]));
}
static void arm_cmn_pmu_enable(struct pmu *pmu)
@@ -1442,18 +1448,19 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
{
- u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+ void __iomem *pmccntr = CMN_DT_PMCCNTR(dtc);
+ u64 val = readq_relaxed(pmccntr);
- writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+ writeq_relaxed(CMN_CC_INIT, pmccntr);
return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
}
static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
{
- u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+ void __iomem *pmevcnt = CMN_DT_PMEVCNT(dtc, idx);
+ u32 val = readl_relaxed(pmevcnt);
- val = readl_relaxed(dtc->base + pmevcnt);
- writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+ writel_relaxed(CMN_COUNTER_INIT, pmevcnt);
return val - CMN_COUNTER_INIT;
}
@@ -1464,7 +1471,7 @@ static void arm_cmn_init_counter(struct perf_event *event)
u64 count;
for_each_hw_dtc_idx(hw, i, idx) {
- writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+ writel_relaxed(CMN_COUNTER_INIT, CMN_DT_PMEVCNT(&cmn->dtc[i], idx));
cmn->dtc[i].counters[idx] = event;
}
@@ -1551,7 +1558,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
dtc->base + CMN_DT_DTC_CTL);
- writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+ writeq_relaxed(CMN_CC_INIT, CMN_DT_PMCCNTR(dtc));
dtc->cc_active = true;
} else if (type == CMN_TYPE_WP) {
u64 val = CMN_EVENT_WP_VAL(event);
@@ -2028,7 +2035,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
irqreturn_t ret = IRQ_NONE;
for (;;) {
- u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+ u32 status = readl_relaxed(CMN_DT_PMOVSR(dtc));
u64 delta;
int i;
@@ -2050,7 +2057,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
}
}
- writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+ writel_relaxed(status, CMN_DT_PMOVSR_CLR(dtc));
if (!dtc->irq_friend)
return ret;
@@ -2104,15 +2111,16 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
{
struct arm_cmn_dtc *dtc = cmn->dtc + idx;
- dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+ dtc->pmu_base = dn->pmu_base;
+ dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn);
dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
if (dtc->irq < 0)
return dtc->irq;
writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
- writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
- writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
- writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+ writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc));
+ writeq_relaxed(0, CMN_DT_PMCCNTR(dtc));
+ writel_relaxed(0x1ff, CMN_DT_PMOVSR_CLR(dtc));
return 0;
}
@@ -2200,7 +2208,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
- node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+ node->pmu_base = cmn->base + offset + arm_cmn_pmu_offset(cmn, node);
if (node->type == CMN_TYPE_CFG)
level = 0;
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 7/8] dt-bindings: perf: arm-cmn: Add CMN S3
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (5 preceding siblings ...)
2024-09-02 17:52 ` [PATCH v2 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
@ 2024-09-02 17:52 ` Robin Murphy
2024-09-02 17:52 ` [PATCH v2 8/8] perf/arm-cmn: Support " Robin Murphy
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:52 UTC (permalink / raw)
To: will
Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka, devicetree,
Rob Herring
The CMN S3 PMU is functionally still very similar to CMN-700, however
while the register contents are compatible, many of them are moved to
different offsets. While this is technically discoverable by a careful
driver that understands the part number in the peripheral ID registers
(which do at least remain in the same place), a new unique compatible
seems warranted to avoid any surprises.
CC: <devicetree@vger.kernel.org>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
Documentation/devicetree/bindings/perf/arm,cmn.yaml | 1 +
1 file changed, 1 insertion(+)
diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
index 2e51072e794a..0e9d665584e6 100644
--- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@@ -16,6 +16,7 @@ properties:
- arm,cmn-600
- arm,cmn-650
- arm,cmn-700
+ - arm,cmn-s3
- arm,ci-700
reg:
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 8/8] perf/arm-cmn: Support CMN S3
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (6 preceding siblings ...)
2024-09-02 17:52 ` [PATCH v2 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
@ 2024-09-02 17:52 ` Robin Murphy
2024-09-04 16:12 ` [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Will Deacon
2024-09-05 6:35 ` Ilkka Koskinen
9 siblings, 0 replies; 11+ messages in thread
From: Robin Murphy @ 2024-09-02 17:52 UTC (permalink / raw)
To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka
CMN S3 is the latest and greatest evolution for 2024, although most of
the new features don't impact the PMU, so from our point of view it ends
up looking a lot like CMN-700 r3 still. We have some new device types to
ignore, a mildly irritating rearrangement of the register layouts, and a
scary new configuration option that makes it potentially unsafe to even
walk the full discovery tree, let alone attempt to use the PMU.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
drivers/perf/arm-cmn.c | 119 ++++++++++++++++++++++++++---------------
1 file changed, 76 insertions(+), 43 deletions(-)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index ac7dd4c352e8..0266ff041e0b 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -45,24 +45,28 @@
#define CMN_CFGM_PERIPH_ID_23 0x0010
#define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4)
-#define CMN_CFGM_INFO_GLOBAL 0x900
+#define CMN_CFGM_INFO_GLOBAL 0x0900
#define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63)
#define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52)
#define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50)
+#define CMN_INFO_DEVICE_ISO_ENABLE BIT_ULL(44)
-#define CMN_CFGM_INFO_GLOBAL_1 0x908
+#define CMN_CFGM_INFO_GLOBAL_1 0x0908
#define CMN_INFO_SNP_VC_NUM GENMASK_ULL(3, 2)
#define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0)
/* XPs also have some local topology info which has uses too */
#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p))
-#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0)
+#define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(5, 0)
#define CMN_MAX_PORTS 6
#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10
/* PMU registers occupy the 3rd 4KB page of each node's region */
#define CMN_PMU_OFFSET 0x2000
+/* ...except when they don't :( */
+#define CMN_S3_DTM_OFFSET 0xa000
+#define CMN_S3_PMU_OFFSET 0xd900
/* For most nodes, this is all there is */
#define CMN_PMU_EVENT_SEL 0x000
@@ -195,10 +199,11 @@ enum cmn_model {
CMN650 = 2,
CMN700 = 4,
CI700 = 8,
+ CMNS3 = 16,
/* ...and then we can use bitmap tricks for commonality */
CMN_ANY = -1,
NOT_CMN600 = -2,
- CMN_650ON = CMN650 | CMN700,
+ CMN_650ON = CMN650 | CMN700 | CMNS3,
};
/* Actual part numbers and revision IDs defined by the hardware */
@@ -207,6 +212,7 @@ enum cmn_part {
PART_CMN650 = 0x436,
PART_CMN700 = 0x43c,
PART_CI700 = 0x43a,
+ PART_CMN_S3 = 0x43e,
};
/* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -258,6 +264,7 @@ enum cmn_node_type {
CMN_TYPE_HNS = 0x200,
CMN_TYPE_HNS_MPAM_S,
CMN_TYPE_HNS_MPAM_NS,
+ CMN_TYPE_APB = 0x1000,
/* Not a real node type */
CMN_TYPE_WP = 0x7770
};
@@ -408,6 +415,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
return CMN700;
case PART_CI700:
return CI700;
+ case PART_CMN_S3:
+ return CMNS3;
default:
return 0;
};
@@ -415,6 +424,11 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
{
+ if (cmn->part == PART_CMN_S3) {
+ if (dn->type == CMN_TYPE_XP)
+ return CMN_S3_DTM_OFFSET;
+ return CMN_S3_PMU_OFFSET;
+ }
return CMN_PMU_OFFSET;
}
@@ -468,9 +482,14 @@ static const char *arm_cmn_device_type(u8 type)
case 0x17: return "RN-F_C_E|";
case 0x18: return " RN-F_E |";
case 0x19: return "RN-F_E_E|";
+ case 0x1a: return " HN-S |";
+ case 0x1b: return " LCN |";
case 0x1c: return " MTSX |";
case 0x1d: return " HN-V |";
case 0x1e: return " CCG |";
+ case 0x20: return " RN-F_F |";
+ case 0x21: return "RN-F_F_E|";
+ case 0x22: return " SN-F_F |";
default: return " ???? |";
}
}
@@ -779,8 +798,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
#define CMN_EVENT_CCRA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
-#define CMN_EVENT_CCHA(_name, _event) \
- CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCHA(_model, _name, _event) \
+ CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
#define CMN_EVENT_CCLA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
#define CMN_EVENT_CCLA_RNI(_name, _event) \
@@ -1138,42 +1157,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
CMN_EVENT_CCRA(wdb_alloc, 0x59),
CMN_EVENT_CCRA(ssb_alloc, 0x5a),
- CMN_EVENT_CCHA(rddatbyp, 0x61),
- CMN_EVENT_CCHA(chirsp_up_stall, 0x62),
- CMN_EVENT_CCHA(chidat_up_stall, 0x63),
- CMN_EVENT_CCHA(snppcrd_link0_stall, 0x64),
- CMN_EVENT_CCHA(snppcrd_link1_stall, 0x65),
- CMN_EVENT_CCHA(snppcrd_link2_stall, 0x66),
- CMN_EVENT_CCHA(reqtrk_occ, 0x67),
- CMN_EVENT_CCHA(rdb_occ, 0x68),
- CMN_EVENT_CCHA(rdbyp_occ, 0x69),
- CMN_EVENT_CCHA(wdb_occ, 0x6a),
- CMN_EVENT_CCHA(snptrk_occ, 0x6b),
- CMN_EVENT_CCHA(sdb_occ, 0x6c),
- CMN_EVENT_CCHA(snphaz_occ, 0x6d),
- CMN_EVENT_CCHA(reqtrk_alloc, 0x6e),
- CMN_EVENT_CCHA(rdb_alloc, 0x6f),
- CMN_EVENT_CCHA(rdbyp_alloc, 0x70),
- CMN_EVENT_CCHA(wdb_alloc, 0x71),
- CMN_EVENT_CCHA(snptrk_alloc, 0x72),
- CMN_EVENT_CCHA(sdb_alloc, 0x73),
- CMN_EVENT_CCHA(snphaz_alloc, 0x74),
- CMN_EVENT_CCHA(pb_rhu_req_occ, 0x75),
- CMN_EVENT_CCHA(pb_rhu_req_alloc, 0x76),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_occ, 0x77),
- CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc, 0x78),
- CMN_EVENT_CCHA(pb_pcie_wr_req_occ, 0x79),
- CMN_EVENT_CCHA(pb_pcie_wr_req_alloc, 0x7a),
- CMN_EVENT_CCHA(pb_pcie_reg_req_occ, 0x7b),
- CMN_EVENT_CCHA(pb_pcie_reg_req_alloc, 0x7c),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ, 0x7d),
- CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc, 0x7e),
- CMN_EVENT_CCHA(pb_rhu_dat_occ, 0x7f),
- CMN_EVENT_CCHA(pb_rhu_dat_alloc, 0x80),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ, 0x81),
- CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc, 0x82),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_occ, 0x83),
- CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMN_ANY, rddatbyp, 0x61),
+ CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall, 0x62),
+ CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall, 0x63),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall, 0x64),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall, 0x65),
+ CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall, 0x66),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ, 0x67),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_occ, 0x68),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ, 0x69),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_occ, 0x6a),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_occ, 0x6b),
+ CMN_EVENT_CCHA(CMN_ANY, sdb_occ, 0x6c),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_occ, 0x6d),
+ CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc, 0x6e),
+ CMN_EVENT_CCHA(CMN_ANY, rdb_alloc, 0x6f),
+ CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc, 0x70),
+ CMN_EVENT_CCHA(CMN_ANY, wdb_alloc, 0x71),
+ CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc, 0x72),
+ CMN_EVENT_CCHA(CMN_ANY, db_alloc, 0x73),
+ CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc, 0x74),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ, 0x75),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc, 0x76),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ, 0x77),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc, 0x78),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ, 0x79),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc, 0x7a),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ, 0x7b),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc, 0x7c),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ, 0x7d),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc, 0x7e),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ, 0x7f),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc, 0x80),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ, 0x81),
+ CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc, 0x82),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ, 0x83),
+ CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc, 0x84),
+ CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall, 0x85),
CMN_EVENT_CCLA(rx_cxs, 0x21),
CMN_EVENT_CCLA(tx_cxs, 0x22),
@@ -1779,7 +1799,8 @@ static int arm_cmn_event_init(struct perf_event *event)
/* ...but the DTM may depend on which port we're watching */
if (cmn->multi_dtm)
hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
- } else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
+ } else if (type == CMN_TYPE_XP &&
+ (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
hw->wide_sel = true;
}
@@ -2266,7 +2287,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
+ /*
+ * With the device isolation feature, if firmware has neglected to enable
+ * an XP port then we risk locking up if we try to access anything behind
+ * it; however we also have no way to tell from Non-Secure whether any
+ * given port is disabled or not, so the only way to win is not to play...
+ */
reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+ if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
+ dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
+ return -ENODEV;
+ }
cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
@@ -2425,6 +2456,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
case CMN_TYPE_CXLA:
case CMN_TYPE_HNS_MPAM_S:
case CMN_TYPE_HNS_MPAM_NS:
+ case CMN_TYPE_APB:
break;
/*
* Split "optimised" combination nodes into separate
@@ -2610,6 +2642,7 @@ static const struct of_device_id arm_cmn_of_match[] = {
{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
{ .compatible = "arm,cmn-650" },
{ .compatible = "arm,cmn-700" },
+ { .compatible = "arm,cmn-s3" },
{ .compatible = "arm,ci-700" },
{}
};
--
2.39.2.101.g768bb238c484.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 0/8] perf/arm-cmn: Fixes and updates
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (7 preceding siblings ...)
2024-09-02 17:52 ` [PATCH v2 8/8] perf/arm-cmn: Support " Robin Murphy
@ 2024-09-04 16:12 ` Will Deacon
2024-09-05 6:35 ` Ilkka Koskinen
9 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2024-09-04 16:12 UTC (permalink / raw)
To: Robin Murphy
Cc: catalin.marinas, kernel-team, Will Deacon, mark.rutland,
linux-arm-kernel, linux-kernel, ilkka
On Mon, 02 Sep 2024 18:51:56 +0100, Robin Murphy wrote:
> v1: https://lore.kernel.org/linux-arm-kernel/cover.1723229941.git.robin.murphy@arm.com/
>
> Hi all,
>
> Here's v2 addressing Mark and Ilkka's comments. I hope I've added
> sufficient clarification to patch #5, but at worst I'd be OK with
> landing the rest while that discussion continues.
>
> [...]
Applied to will (for-next/perf), thanks!
[1/8] perf/arm-cmn: Refactor node ID handling. Again.
https://git.kernel.org/will/c/e79634b53e39
[2/8] perf/arm-cmn: Fix CCLA register offset
https://git.kernel.org/will/c/88b63a82c84e
[3/8] perf/arm-cmn: Ensure dtm_idx is big enough
https://git.kernel.org/will/c/359414b33e00
[4/8] perf/arm-cmn: Improve build-time assertion
https://git.kernel.org/will/c/ff436cee694e
[5/8] perf/arm-cmn: Make cycle counts less surprising
https://git.kernel.org/will/c/c5b15ddf11a8
[6/8] perf/arm-cmn: Refactor DTC PMU register access
https://git.kernel.org/will/c/67acca3504ea
[7/8] dt-bindings: perf: arm-cmn: Add CMN S3
https://git.kernel.org/will/c/a87ef537f961
[8/8] perf/arm-cmn: Support CMN S3
https://git.kernel.org/will/c/0dc2f4963f7e
Cheers,
--
Will
https://fixes.arm64.dev
https://next.arm64.dev
https://will.arm64.dev
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 0/8] perf/arm-cmn: Fixes and updates
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
` (8 preceding siblings ...)
2024-09-04 16:12 ` [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Will Deacon
@ 2024-09-05 6:35 ` Ilkka Koskinen
9 siblings, 0 replies; 11+ messages in thread
From: Ilkka Koskinen @ 2024-09-05 6:35 UTC (permalink / raw)
To: Robin Murphy; +Cc: will, mark.rutland, linux-arm-kernel, linux-kernel, ilkka
On Mon, 2 Sep 2024, Robin Murphy wrote:
> v1: https://lore.kernel.org/linux-arm-kernel/cover.1723229941.git.robin.murphy@arm.com/
>
> Hi all,
>
> Here's v2 addressing Mark and Ilkka's comments. I hope I've added
> sufficient clarification to patch #5, but at worst I'd be OK with
> landing the rest while that discussion continues.
I was a little slow again. Anyways, the patch set looked good to me
and I'm glad Will already picked it.
Cheers, Ilkka
>
> Thanks,
> Robin.
>
>
> Robin Murphy (8):
> perf/arm-cmn: Refactor node ID handling. Again.
> perf/arm-cmn: Fix CCLA register offset
> perf/arm-cmn: Ensure dtm_idx is big enough
> perf/arm-cmn: Improve build-time assertion
> perf/arm-cmn: Make cycle counts less surprising
> perf/arm-cmn: Refactor DTC PMU register access
> dt-bindings: perf: arm-cmn: Add CMN S3
> perf/arm-cmn: Support CMN S3
>
> .../devicetree/bindings/perf/arm,cmn.yaml | 1 +
> drivers/perf/arm-cmn.c | 308 ++++++++++--------
> 2 files changed, 175 insertions(+), 134 deletions(-)
>
> --
> 2.39.2.101.g768bb238c484.dirty
>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2024-09-05 6:36 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-02 17:51 [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
2024-09-02 17:51 ` [PATCH v2 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
2024-09-02 17:51 ` [PATCH v2 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
2024-09-02 17:51 ` [PATCH v2 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
2024-09-02 17:52 ` [PATCH v2 4/8] perf/arm-cmn: Improve build-time assertion Robin Murphy
2024-09-02 17:52 ` [PATCH v2 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
2024-09-02 17:52 ` [PATCH v2 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
2024-09-02 17:52 ` [PATCH v2 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
2024-09-02 17:52 ` [PATCH v2 8/8] perf/arm-cmn: Support " Robin Murphy
2024-09-04 16:12 ` [PATCH v2 0/8] perf/arm-cmn: Fixes and updates Will Deacon
2024-09-05 6:35 ` Ilkka Koskinen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).