linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] perf/arm-cmn: Fixes and updates
@ 2024-08-09 19:15 Robin Murphy
  2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
                   ` (7 more replies)
  0 siblings, 8 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

Hi Will,

Here's the latest collection of accrued CMN fixes, along with a
surprisingly small update (way more refactoring than new content!)
for the newest product iteration.

Thanks,
Robin.


Robin Murphy (8):
  perf/arm-cmn: Refactor node ID handling. Again.
  perf/arm-cmn: Fix CCLA register offset
  perf/arm-cmn: Ensure dtm_idx is big enough
  perf/arm-cmn: Improve build-time assertions
  perf/arm-cmn: Make cycle counts less surprising
  perf/arm-cmn: Refactor DTC PMU register access
  dt-bindings: perf: arm-cmn: Add CMN S3
  perf/arm-cmn: Support CMN S3

 .../devicetree/bindings/perf/arm,cmn.yaml     |   1 +
 drivers/perf/arm-cmn.c                        | 300 ++++++++++--------
 2 files changed, 169 insertions(+), 132 deletions(-)

-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16  9:33   ` Mark Rutland
  2024-08-24  1:00   ` Ilkka Koskinen
  2024-08-09 19:15 ` [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

It transpires that despite the explicit example to the contrary in the
CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
rather than a global one. To that end, rework node IDs yet again to
carry around the additional data necessary to decode them properly. At
this point the notion of fully decomposing an ID becomes more
impractical than it's worth, so unabstracting the XY mesh coordinates
(where 2/3 users were just debug anyway) ends up leaving things a bit
simpler overall.

Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 54 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index c932d9d355cf..fd2122a37f22 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -24,14 +24,6 @@
 #define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
 #define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
 
-#define CMN_NODEID_DEVID(reg)		((reg) & 3)
-#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
-#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
-#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
-#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
-#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
-#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
-
 #define CMN_CHILD_INFO			0x0080
 #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
 #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
@@ -280,8 +272,11 @@ struct arm_cmn_node {
 	u16 id, logid;
 	enum cmn_node_type type;
 
+	/* XP properties really, but replicated to children for convenience */
 	u8 dtm;
 	s8 dtc;
+	u8 portid_bits:4;
+	u8 deviceid_bits:4;
 	/* DN/HN-F/CXHA */
 	struct {
 		u8 val : 4;
@@ -357,49 +352,33 @@ struct arm_cmn {
 static int arm_cmn_hp_state;
 
 struct arm_cmn_nodeid {
-	u8 x;
-	u8 y;
 	u8 port;
 	u8 dev;
 };
 
 static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
 {
-	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
+	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
 }
 
-static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
 {
 	struct arm_cmn_nodeid nid;
 
-	if (cmn->num_xps == 1) {
-		nid.x = 0;
-		nid.y = 0;
-		nid.port = CMN_NODEID_1x1_PID(id);
-		nid.dev = CMN_NODEID_DEVID(id);
-	} else {
-		int bits = arm_cmn_xyidbits(cmn);
-
-		nid.x = CMN_NODEID_X(id, bits);
-		nid.y = CMN_NODEID_Y(id, bits);
-		if (cmn->ports_used & 0xc) {
-			nid.port = CMN_NODEID_EXT_PID(id);
-			nid.dev = CMN_NODEID_EXT_DEVID(id);
-		} else {
-			nid.port = CMN_NODEID_PID(id);
-			nid.dev = CMN_NODEID_DEVID(id);
-		}
-	}
+	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
+	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
 	return nid;
 }
 
 static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
 					       const struct arm_cmn_node *dn)
 {
-	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
-	int xp_idx = cmn->mesh_x * nid.y + nid.x;
+	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
+	int bits = arm_cmn_xyidbits(cmn);
+	int x = id > bits;
+	int y = id & ((1U << bits) - 1);
 
-	return cmn->xps + xp_idx;
+	return cmn->xps + cmn->mesh_x * y + x;
 }
 static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
 					 enum cmn_node_type type)
@@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
 	}
 }
 
-static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
 {
 	struct arm_cmn *cmn = s->private;
 	struct arm_cmn_node *dn;
+	u16 id = xp->id | d | (p << xp->deviceid_bits);
 
 	for (dn = cmn->dns; dn->type; dn++) {
-		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
 		int pad = dn->logid < 10;
 
 		if (dn->type == CMN_TYPE_XP)
@@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
 		if (dn->type < CMN_TYPE_HNI)
 			continue;
 
-		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+		if (dn->id != id)
 			continue;
 
 		seq_printf(s, " %*c#%-*d  |", pad + 1, ' ', 3 - pad, dn->logid);
@@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
 	y = cmn->mesh_y;
 	while (y--) {
 		int xp_base = cmn->mesh_x * y;
+		struct arm_cmn_node *xp = cmn->xps + xp_base;
 		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
 
 		for (x = 0; x < cmn->mesh_x; x++)
@@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
 
 		seq_printf(s, "\n%-2d   |", y);
 		for (x = 0; x < cmn->mesh_x; x++) {
-			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
-
 			for (p = 0; p < CMN_MAX_PORTS; p++)
-				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
+				port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
 			seq_printf(s, " XP #%-3d|", xp_base + x);
 		}
 
 		seq_puts(s, "\n     |");
 		for (x = 0; x < cmn->mesh_x; x++) {
-			s8 dtc = cmn->xps[xp_base + x].dtc;
+			s8 dtc = xp[x].dtc;
 
 			if (dtc < 0)
 				seq_puts(s, " DTC ?? |");
@@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
 				seq_puts(s, arm_cmn_device_type(port[p][x]));
 			seq_puts(s, "\n    0|");
 			for (x = 0; x < cmn->mesh_x; x++)
-				arm_cmn_show_logid(s, x, y, p, 0);
+				arm_cmn_show_logid(s, xp + x, p, 0);
 			seq_puts(s, "\n    1|");
 			for (x = 0; x < cmn->mesh_x; x++)
-				arm_cmn_show_logid(s, x, y, p, 1);
+				arm_cmn_show_logid(s, xp + x, p, 1);
 		}
 		seq_puts(s, "\n-----+");
 	}
@@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
 	}
 
 	if (!hw->num_dns) {
-		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
-
-		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
-			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
+		dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
 		return -EINVAL;
 	}
 
@@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
 			arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
 			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
 		} else {
-			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+			struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
 
 			if (cmn->multi_dtm)
 				nid.port %= 2;
@@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
 			continue;
 
 		xp = arm_cmn_node_to_xp(cmn, dn);
+		dn->portid_bits = xp->portid_bits;
+		dn->deviceid_bits = xp->deviceid_bits;
 		dn->dtc = xp->dtc;
 		dn->dtm = xp->dtm;
 		if (cmn->multi_dtm)
-			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+			dn->dtm += arm_cmn_nid(dn).port / 2;
 
 		if (dn->type == CMN_TYPE_DTC) {
 			int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
@@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 		arm_cmn_init_dtm(dtm++, xp, 0);
 		/*
 		 * Keeping track of connected ports will let us filter out
-		 * unnecessary XP events easily. We can also reliably infer the
-		 * "extra device ports" configuration for the node ID format
-		 * from this, since in that case we will see at least one XP
-		 * with port 2 connected, for the HN-D.
+		 * unnecessary XP events easily, and also infer the per-XP
+		 * part of the node ID format.
 		 */
 		for (int p = 0; p < CMN_MAX_PORTS; p++)
 			if (arm_cmn_device_connect_info(cmn, xp, p))
 				xp_ports |= BIT(p);
 
-		if (cmn->multi_dtm && (xp_ports & 0xc))
+		if (cmn->num_xps == 1) {
+			xp->portid_bits = 3;
+			xp->deviceid_bits = 2;
+		} else if (xp_ports > 0x3) {
+			xp->portid_bits = 2;
+			xp->deviceid_bits = 1;
+		} else {
+			xp->portid_bits = 1;
+			xp->deviceid_bits = 2;
+		}
+
+		if (cmn->multi_dtm && (xp_ports > 0x3))
 			arm_cmn_init_dtm(dtm++, xp, 1);
-		if (cmn->multi_dtm && (xp_ports & 0x30))
+		if (cmn->multi_dtm && (xp_ports > 0xf))
 			arm_cmn_init_dtm(dtm++, xp, 2);
 
 		cmn->ports_used |= xp_ports;
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
  2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:00   ` Mark Rutland
  2024-08-09 19:15 ` [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just
the CCLA_RNI combination type.

Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index fd2122a37f22..0e2e12e2f4fb 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -2393,10 +2393,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 			case CMN_TYPE_CXHA:
 			case CMN_TYPE_CCRA:
 			case CMN_TYPE_CCHA:
-			case CMN_TYPE_CCLA:
 			case CMN_TYPE_HNS:
 				dn++;
 				break;
+			case CMN_TYPE_CCLA:
+				dn->pmu_base += CMN_HNP_PMU_EVENT_SEL;
+				dn++;
+				break;
 			/* Nothing to see here */
 			case CMN_TYPE_MPAM_S:
 			case CMN_TYPE_MPAM_NS:
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
  2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
  2024-08-09 19:15 ` [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:14   ` Mark Rutland
  2024-08-09 19:15 ` [PATCH 4/8] perf/arm-cmn: Improve build-time assertions Robin Murphy
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
out OK in practice. However CMN-700 did finally support up to 144 XPs,
and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
event on a maxed-out config. Oops.

Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 0e2e12e2f4fb..c9a2b21a7aec 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -563,7 +563,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
 
 struct arm_cmn_hw_event {
 	struct arm_cmn_node *dn;
-	u64 dtm_idx[4];
+	u64 dtm_idx[5];
 	s8 dtc_idx[CMN_MAX_DTCS];
 	u8 num_dns;
 	u8 dtm_offset;
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 4/8] perf/arm-cmn: Improve build-time assertions
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
                   ` (2 preceding siblings ...)
  2024-08-09 19:15 ` [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:20   ` Mark Rutland
  2024-08-09 19:15 ` [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

These days we can use static_assert() in the logical place rather than
jamming a BUILD_BUG_ON() into the nearest function scope. And since it
is now so convenient to do so, let's add an extra one to reinforce the
dtm_idx bitmap as well.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index c9a2b21a7aec..8f7a1a6f8ab7 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -577,6 +577,9 @@ struct arm_cmn_hw_event {
 	bool wide_sel;
 	enum cmn_filter_select filter_sel;
 };
+static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
+/* Currently XPs are the node type we can have most of; others top out at 128 */
+static_assert(sizeof_field(struct arm_cmn_hw_event, dtm_idx) >= CMN_MAX_XPS / 4);
 
 #define for_each_hw_dn(hw, dn, i) \
 	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
@@ -587,7 +590,6 @@ struct arm_cmn_hw_event {
 
 static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
 {
-	BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
 	return (struct arm_cmn_hw_event *)&event->hw;
 }
 
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
                   ` (3 preceding siblings ...)
  2024-08-09 19:15 ` [PATCH 4/8] perf/arm-cmn: Improve build-time assertions Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:25   ` Mark Rutland
  2024-08-09 19:15 ` [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

By default, CMN has automatic clock-gating with the implication that a
DTC's cycle counter may not increment while the domain is sufficiently
idle. Given that we may have up to 4 DTCs to choose from when scheduling
a cycles event, this may potentially lead to surprising results if
trying to measure metrics based on activity in a different DTC domain
from where cycles end up being counted. Make the reasonable assumption
that if the user wants to count cycles, they almost certainly want to
count all of the cycles, and disable clock gating while a DTC's cycle
counter is in use.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 8f7a1a6f8ab7..4d128db2040c 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -115,6 +115,7 @@
 /* The DTC node is where the magic happens */
 #define CMN_DT_DTC_CTL			0x0a00
 #define CMN_DT_DTC_CTL_DT_EN		BIT(0)
+#define CMN_DT_DTC_CTL_CG_DISABLE	BIT(10)
 
 /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
 #define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
@@ -1544,9 +1545,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
 	int i;
 
 	if (type == CMN_TYPE_DTC) {
-		i = hw->dtc_idx[0];
-		writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
-		cmn->dtc[i].cc_active = true;
+		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+		writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
+			       dtc->base + CMN_DT_DTC_CTL);
+		writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+		dtc->cc_active = true;
 	} else if (type == CMN_TYPE_WP) {
 		u64 val = CMN_EVENT_WP_VAL(event);
 		u64 mask = CMN_EVENT_WP_MASK(event);
@@ -1575,8 +1579,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
 	int i;
 
 	if (type == CMN_TYPE_DTC) {
-		i = hw->dtc_idx[0];
-		cmn->dtc[i].cc_active = false;
+		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
+
+		dtc->cc_active = false;
+		writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
 	} else if (type == CMN_TYPE_WP) {
 		for_each_hw_dn(hw, dn, i) {
 			void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
                   ` (4 preceding siblings ...)
  2024-08-09 19:15 ` [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:29   ` Mark Rutland
  2024-08-09 19:15 ` [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
  2024-08-09 19:15 ` [PATCH 8/8] perf/arm-cmn: Support " Robin Murphy
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

Annoyingly, we're soon going to have to cope with PMU registers moving
about. This will mostly be straightforward, except for the hard-coding
of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
those accessors to allow for encapsulating a variable offset without
making a big mess all over.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 4d128db2040c..12bbb689a1af 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -119,24 +119,24 @@
 
 /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
 #define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
-#define CMN_DT_PMEVCNT(n)		(CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTR			(CMN_PMU_OFFSET + 0x40)
+#define CMN_DT_PMEVCNT(dtc, n)		((dtc)->pmu_base + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR(dtc)		((dtc)->pmu_base + 0x40)
 
-#define CMN_DT_PMEVCNTSR(n)		(CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
-#define CMN_DT_PMCCNTRSR		(CMN_PMU_OFFSET + 0x90)
+#define CMN_DT_PMEVCNTSR(dtc, n)	((dtc)->pmu_base + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR(dtc)		((dtc)->pmu_base + 0x90)
 
-#define CMN_DT_PMCR			(CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR(dtc)		((dtc)->pmu_base + 0x100)
 #define CMN_DT_PMCR_PMU_EN		BIT(0)
 #define CMN_DT_PMCR_CNTR_RST		BIT(5)
 #define CMN_DT_PMCR_OVFL_INTR_EN	BIT(6)
 
-#define CMN_DT_PMOVSR			(CMN_PMU_OFFSET + 0x118)
-#define CMN_DT_PMOVSR_CLR		(CMN_PMU_OFFSET + 0x120)
+#define CMN_DT_PMOVSR(dtc)		((dtc)->pmu_base + 0x118)
+#define CMN_DT_PMOVSR_CLR(dtc)		((dtc)->pmu_base + 0x120)
 
-#define CMN_DT_PMSSR			(CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR(dtc)		((dtc)->pmu_base + 0x128)
 #define CMN_DT_PMSSR_SS_STATUS(n)	BIT(n)
 
-#define CMN_DT_PMSRR			(CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR(dtc)		((dtc)->pmu_base + 0x130)
 #define CMN_DT_PMSRR_SS_REQ		BIT(0)
 
 #define CMN_DT_NUM_COUNTERS		8
@@ -303,8 +303,9 @@ struct arm_cmn_dtm {
 
 struct arm_cmn_dtc {
 	void __iomem *base;
+	void __iomem *pmu_base;
 	int irq;
-	int irq_friend;
+	s8 irq_friend;
 	bool cc_active;
 
 	struct perf_event *counters[CMN_DT_NUM_COUNTERS];
@@ -408,10 +409,15 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
 	};
 }
 
+static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
+{
+	return CMN_PMU_OFFSET;
+}
+
 static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
 				       const struct arm_cmn_node *xp, int port)
 {
-	int offset = CMN_MXP__CONNECT_INFO(port);
+	int offset = CMN_MXP__CONNECT_INFO(port) - arm_cmn_pmu_offset(cmn, xp);
 
 	if (port >= 2) {
 		if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
@@ -424,7 +430,7 @@ static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
 			offset += CI700_CONNECT_INFO_P2_5_OFFSET;
 	}
 
-	return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset);
+	return readl_relaxed(xp->pmu_base + offset);
 }
 
 static struct dentry *arm_cmn_debugfs;
@@ -1396,7 +1402,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event, int wp_idx)
 static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
 {
 	if (!cmn->state)
-		writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+		writel_relaxed(0, CMN_DT_PMCR(&cmn->dtc[0]));
 	cmn->state |= state;
 }
 
@@ -1405,7 +1411,7 @@ static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
 	cmn->state &= ~state;
 	if (!cmn->state)
 		writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
-			       cmn->dtc[0].base + CMN_DT_PMCR);
+			       CMN_DT_PMCR(&cmn->dtc[0]));
 }
 
 static void arm_cmn_pmu_enable(struct pmu *pmu)
@@ -1440,18 +1446,19 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
 
 static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
 {
-	u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+	void __iomem *pmccntr = CMN_DT_PMCCNTR(dtc);
+	u64 val = readq_relaxed(pmccntr);
 
-	writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+	writeq_relaxed(CMN_CC_INIT, pmccntr);
 	return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
 }
 
 static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
 {
-	u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+	void __iomem *pmevcnt = CMN_DT_PMEVCNT(dtc, idx);
+	u32 val = readl_relaxed(pmevcnt);
 
-	val = readl_relaxed(dtc->base + pmevcnt);
-	writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+	writel_relaxed(CMN_COUNTER_INIT, pmevcnt);
 	return val - CMN_COUNTER_INIT;
 }
 
@@ -1462,7 +1469,7 @@ static void arm_cmn_init_counter(struct perf_event *event)
 	u64 count;
 
 	for_each_hw_dtc_idx(hw, i, idx) {
-		writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+		writel_relaxed(CMN_COUNTER_INIT, CMN_DT_PMEVCNT(&cmn->dtc[i], idx));
 		cmn->dtc[i].counters[idx] = event;
 	}
 
@@ -1549,7 +1556,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
 
 		writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
 			       dtc->base + CMN_DT_DTC_CTL);
-		writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+		writeq_relaxed(CMN_CC_INIT, CMN_DT_PMCCNTR(dtc));
 		dtc->cc_active = true;
 	} else if (type == CMN_TYPE_WP) {
 		u64 val = CMN_EVENT_WP_VAL(event);
@@ -2026,7 +2033,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 
 	for (;;) {
-		u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+		u32 status = readl_relaxed(CMN_DT_PMOVSR(dtc));
 		u64 delta;
 		int i;
 
@@ -2048,7 +2055,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
 			}
 		}
 
-		writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+		writel_relaxed(status, CMN_DT_PMOVSR_CLR(dtc));
 
 		if (!dtc->irq_friend)
 			return ret;
@@ -2102,15 +2109,16 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
 {
 	struct arm_cmn_dtc *dtc = cmn->dtc + idx;
 
-	dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+	dtc->pmu_base = dn->pmu_base;
+	dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn);
 	dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
 	if (dtc->irq < 0)
 		return dtc->irq;
 
 	writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
-	writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
-	writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
-	writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+	writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc));
+	writeq_relaxed(0, CMN_DT_PMCCNTR(dtc));
+	writel_relaxed(0x1ff, CMN_DT_PMOVSR_CLR(dtc));
 
 	return 0;
 }
@@ -2198,7 +2206,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
 	node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
 	node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
 
-	node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+	node->pmu_base = cmn->base + offset + arm_cmn_pmu_offset(cmn, node);
 
 	if (node->type == CMN_TYPE_CFG)
 		level = 0;
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
                   ` (5 preceding siblings ...)
  2024-08-09 19:15 ` [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-15 15:20   ` Rob Herring (Arm)
  2024-08-09 19:15 ` [PATCH 8/8] perf/arm-cmn: Support " Robin Murphy
  7 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka, devicetree

The CMN S3 PMU is functionally still very similar to CMN-700, however
while the register contents are compatible, many of them are moved to
different offsets. While this is technically discoverable by a careful
driver that understands the part number in the peripheral ID registers
(which do at least remain in the same place), a new unique compatible
seems warranted to avoid any surprises.

CC: <devicetree@vger.kernel.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 Documentation/devicetree/bindings/perf/arm,cmn.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
index 2e51072e794a..0e9d665584e6 100644
--- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@@ -16,6 +16,7 @@ properties:
       - arm,cmn-600
       - arm,cmn-650
       - arm,cmn-700
+      - arm,cmn-s3
       - arm,ci-700
 
   reg:
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 8/8] perf/arm-cmn: Support CMN S3
  2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
                   ` (6 preceding siblings ...)
  2024-08-09 19:15 ` [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
@ 2024-08-09 19:15 ` Robin Murphy
  2024-08-16 10:32   ` Mark Rutland
  2024-08-23 23:38   ` Ilkka Koskinen
  7 siblings, 2 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-09 19:15 UTC (permalink / raw)
  To: will; +Cc: mark.rutland, linux-arm-kernel, linux-kernel, ilkka

CMN S3 is the latest and greatest evolution for 2024, although most of
the new features don't impact the PMU, so from our point of view it ends
up looking a lot like CMN-700 r3 still. We have some new device types to
ignore, a mildly irritating rearrangement of the register layouts, and a
scary new configuration option that makes it potentially unsafe to even
walk the full discovery tree, let alone attempt to use the PMU.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 117 ++++++++++++++++++++++++++---------------
 1 file changed, 74 insertions(+), 43 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 12bbb689a1af..0d19163fba5a 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -42,24 +42,28 @@
 #define CMN_CFGM_PERIPH_ID_23		0x0010
 #define CMN_CFGM_PID2_REVISION		GENMASK_ULL(7, 4)
 
-#define CMN_CFGM_INFO_GLOBAL		0x900
+#define CMN_CFGM_INFO_GLOBAL		0x0900
 #define CMN_INFO_MULTIPLE_DTM_EN	BIT_ULL(63)
 #define CMN_INFO_RSP_VC_NUM		GENMASK_ULL(53, 52)
 #define CMN_INFO_DAT_VC_NUM		GENMASK_ULL(51, 50)
+#define CMN_INFO_DEVICE_ISO_ENABLE	BIT_ULL(44)
 
-#define CMN_CFGM_INFO_GLOBAL_1		0x908
+#define CMN_CFGM_INFO_GLOBAL_1		0x0908
 #define CMN_INFO_SNP_VC_NUM		GENMASK_ULL(3, 2)
 #define CMN_INFO_REQ_VC_NUM		GENMASK_ULL(1, 0)
 
 /* XPs also have some local topology info which has uses too */
 #define CMN_MXP__CONNECT_INFO(p)	(0x0008 + 8 * (p))
-#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(4, 0)
+#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(5, 0)
 
 #define CMN_MAX_PORTS			6
 #define CI700_CONNECT_INFO_P2_5_OFFSET	0x10
 
 /* PMU registers occupy the 3rd 4KB page of each node's region */
 #define CMN_PMU_OFFSET			0x2000
+/* ...except when they don't :( */
+#define CMN_S3_DTM_OFFSET		0xa000
+#define CMN_S3_PMU_OFFSET		0xd900
 
 /* For most nodes, this is all there is */
 #define CMN_PMU_EVENT_SEL		0x000
@@ -191,10 +195,11 @@ enum cmn_model {
 	CMN650 = 2,
 	CMN700 = 4,
 	CI700 = 8,
+	CMNS3 = 16,
 	/* ...and then we can use bitmap tricks for commonality */
 	CMN_ANY = -1,
 	NOT_CMN600 = -2,
-	CMN_650ON = CMN650 | CMN700,
+	CMN_650ON = CMN650 | CMN700 | CMNS3,
 };
 
 /* Actual part numbers and revision IDs defined by the hardware */
@@ -203,6 +208,7 @@ enum cmn_part {
 	PART_CMN650 = 0x436,
 	PART_CMN700 = 0x43c,
 	PART_CI700 = 0x43a,
+	PART_CMN_S3 = 0x43e,
 };
 
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -254,6 +260,7 @@ enum cmn_node_type {
 	CMN_TYPE_HNS = 0x200,
 	CMN_TYPE_HNS_MPAM_S,
 	CMN_TYPE_HNS_MPAM_NS,
+	CMN_TYPE_APB = 0x1000,
 	/* Not a real node type */
 	CMN_TYPE_WP = 0x7770
 };
@@ -404,6 +411,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
 		return CMN700;
 	case PART_CI700:
 		return CI700;
+	case PART_CMN_S3:
+		return CMNS3;
 	default:
 		return 0;
 	};
@@ -411,6 +420,11 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
 
 static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
 {
+	if (cmn->part == PART_CMN_S3) {
+		if (dn->type == CMN_TYPE_XP)
+			return CMN_S3_DTM_OFFSET;
+		return CMN_S3_PMU_OFFSET;
+	}
 	return CMN_PMU_OFFSET;
 }
 
@@ -467,6 +481,9 @@ static const char *arm_cmn_device_type(u8 type)
 		case 0x1c: return "  MTSX  |";
 		case 0x1d: return "  HN-V  |";
 		case 0x1e: return "  CCG   |";
+		case 0x20: return " RN-F_F |";
+		case 0x21: return "RN-F_F_E|";
+		case 0x22: return " SN-F_F |";
 		default:   return "  ????  |";
 	}
 }
@@ -777,8 +794,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
 #define CMN_EVENT_CCRA(_name, _event)				\
 	CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
-#define CMN_EVENT_CCHA(_name, _event)				\
-	CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCHA(_model, _name, _event)				\
+	CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
 #define CMN_EVENT_CCLA(_name, _event)				\
 	CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
 #define CMN_EVENT_CCLA_RNI(_name, _event)				\
@@ -1136,42 +1153,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
 	CMN_EVENT_CCRA(wdb_alloc,			0x59),
 	CMN_EVENT_CCRA(ssb_alloc,			0x5a),
 
-	CMN_EVENT_CCHA(rddatbyp,			0x61),
-	CMN_EVENT_CCHA(chirsp_up_stall,			0x62),
-	CMN_EVENT_CCHA(chidat_up_stall,			0x63),
-	CMN_EVENT_CCHA(snppcrd_link0_stall,		0x64),
-	CMN_EVENT_CCHA(snppcrd_link1_stall,		0x65),
-	CMN_EVENT_CCHA(snppcrd_link2_stall,		0x66),
-	CMN_EVENT_CCHA(reqtrk_occ,			0x67),
-	CMN_EVENT_CCHA(rdb_occ,				0x68),
-	CMN_EVENT_CCHA(rdbyp_occ,			0x69),
-	CMN_EVENT_CCHA(wdb_occ,				0x6a),
-	CMN_EVENT_CCHA(snptrk_occ,			0x6b),
-	CMN_EVENT_CCHA(sdb_occ,				0x6c),
-	CMN_EVENT_CCHA(snphaz_occ,			0x6d),
-	CMN_EVENT_CCHA(reqtrk_alloc,			0x6e),
-	CMN_EVENT_CCHA(rdb_alloc,			0x6f),
-	CMN_EVENT_CCHA(rdbyp_alloc,			0x70),
-	CMN_EVENT_CCHA(wdb_alloc,			0x71),
-	CMN_EVENT_CCHA(snptrk_alloc,			0x72),
-	CMN_EVENT_CCHA(sdb_alloc,			0x73),
-	CMN_EVENT_CCHA(snphaz_alloc,			0x74),
-	CMN_EVENT_CCHA(pb_rhu_req_occ,			0x75),
-	CMN_EVENT_CCHA(pb_rhu_req_alloc,		0x76),
-	CMN_EVENT_CCHA(pb_rhu_pcie_req_occ,		0x77),
-	CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc,		0x78),
-	CMN_EVENT_CCHA(pb_pcie_wr_req_occ,		0x79),
-	CMN_EVENT_CCHA(pb_pcie_wr_req_alloc,		0x7a),
-	CMN_EVENT_CCHA(pb_pcie_reg_req_occ,		0x7b),
-	CMN_EVENT_CCHA(pb_pcie_reg_req_alloc,		0x7c),
-	CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ,		0x7d),
-	CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc,		0x7e),
-	CMN_EVENT_CCHA(pb_rhu_dat_occ,			0x7f),
-	CMN_EVENT_CCHA(pb_rhu_dat_alloc,		0x80),
-	CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ,		0x81),
-	CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc,		0x82),
-	CMN_EVENT_CCHA(pb_pcie_wr_dat_occ,		0x83),
-	CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc,		0x84),
+	CMN_EVENT_CCHA(CMN_ANY, rddatbyp,		0x61),
+	CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall,	0x62),
+	CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall,	0x63),
+	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall,	0x64),
+	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall,	0x65),
+	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall,	0x66),
+	CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ,		0x67),
+	CMN_EVENT_CCHA(CMN_ANY, rdb_occ,		0x68),
+	CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ,		0x69),
+	CMN_EVENT_CCHA(CMN_ANY, wdb_occ,		0x6a),
+	CMN_EVENT_CCHA(CMN_ANY, snptrk_occ,		0x6b),
+	CMN_EVENT_CCHA(CMN_ANY, sdb_occ,		0x6c),
+	CMN_EVENT_CCHA(CMN_ANY, snphaz_occ,		0x6d),
+	CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc,		0x6e),
+	CMN_EVENT_CCHA(CMN_ANY, rdb_alloc,		0x6f),
+	CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc,		0x70),
+	CMN_EVENT_CCHA(CMN_ANY, wdb_alloc,		0x71),
+	CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc,		0x72),
+	CMN_EVENT_CCHA(CMN_ANY, db_alloc,		0x73),
+	CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc,		0x74),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ,		0x75),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc,	0x76),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ,	0x77),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc,	0x78),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ,	0x79),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc,	0x7a),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ,	0x7b),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc,	0x7c),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ,	0x7d),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc,	0x7e),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ,		0x7f),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc,	0x80),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ,	0x81),
+	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc,	0x82),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ,	0x83),
+	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc,	0x84),
+	CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall,		0x85),
 
 	CMN_EVENT_CCLA(rx_cxs,				0x21),
 	CMN_EVENT_CCLA(tx_cxs,				0x22),
@@ -1777,7 +1795,8 @@ static int arm_cmn_event_init(struct perf_event *event)
 		/* ...but the DTM may depend on which port we're watching */
 		if (cmn->multi_dtm)
 			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
-	} else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
+	} else if (type == CMN_TYPE_XP &&
+		   (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
 		hw->wide_sel = true;
 	}
 
@@ -2264,7 +2283,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
 	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
 
+	/*
+	 * With the device isolation feature, if firmware has neglected to enable
+	 * an XP port then we risk locking up if we try to access anything behind
+	 * it; however we also have no way to tell from Non-Secure whether any
+	 * given port is disabled or not, so the only way to win is not to play...
+	 */
 	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+	if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
+		dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
+		return -ENODEV;
+	}
 	cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
 	cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
 	cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
@@ -2423,6 +2452,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 			case CMN_TYPE_CXLA:
 			case CMN_TYPE_HNS_MPAM_S:
 			case CMN_TYPE_HNS_MPAM_NS:
+			case CMN_TYPE_APB:
 				break;
 			/*
 			 * Split "optimised" combination nodes into separate
@@ -2608,6 +2638,7 @@ static const struct of_device_id arm_cmn_of_match[] = {
 	{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
 	{ .compatible = "arm,cmn-650" },
 	{ .compatible = "arm,cmn-700" },
+	{ .compatible = "arm,cmn-s3" },
 	{ .compatible = "arm,ci-700" },
 	{}
 };
-- 
2.39.2.101.g768bb238c484.dirty



^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3
  2024-08-09 19:15 ` [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
@ 2024-08-15 15:20   ` Rob Herring (Arm)
  0 siblings, 0 replies; 32+ messages in thread
From: Rob Herring (Arm) @ 2024-08-15 15:20 UTC (permalink / raw)
  To: Robin Murphy
  Cc: mark.rutland, ilkka, devicetree, linux-kernel, will,
	linux-arm-kernel


On Fri, 09 Aug 2024 20:15:46 +0100, Robin Murphy wrote:
> The CMN S3 PMU is functionally still very similar to CMN-700, however
> while the register contents are compatible, many of them are moved to
> different offsets. While this is technically discoverable by a careful
> driver that understands the part number in the peripheral ID registers
> (which do at least remain in the same place), a new unique compatible
> seems warranted to avoid any surprises.
> 
> CC: <devicetree@vger.kernel.org>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  Documentation/devicetree/bindings/perf/arm,cmn.yaml | 1 +
>  1 file changed, 1 insertion(+)
> 

Acked-by: Rob Herring (Arm) <robh@kernel.org>



^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
@ 2024-08-16  9:33   ` Mark Rutland
  2024-08-16 11:56     ` Robin Murphy
  2024-08-24  1:00   ` Ilkka Koskinen
  1 sibling, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16  9:33 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

Hi Robin,

On Fri, Aug 09, 2024 at 08:15:40PM +0100, Robin Murphy wrote:
> It transpires that despite the explicit example to the contrary in the
> CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
> rather than a global one. To that end, rework node IDs yet again to
> carry around the additional data necessary to decode them properly. At
> this point the notion of fully decomposing an ID becomes more
> impractical than it's worth, so unabstracting the XY mesh coordinates
> (where 2/3 users were just debug anyway) ends up leaving things a bit
> simpler overall.
> 
> Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")

Does this fix an observable functional issue? It's difficult to tell
what the impact of this is from the commit message.

i.e. what is the impact:

* On the CMN-700 programmers model?

* As observed by a user of the PMU driver?

... and is there anything in the manual that spells out that this is a
per-XP property? I'm struggling to find that in the CMN-700 TRM, as it
seems to talk about "mesh configuration(s) with extra device ports".

Mark.

> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
>  1 file changed, 40 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index c932d9d355cf..fd2122a37f22 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -24,14 +24,6 @@
>  #define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
>  #define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
>  
> -#define CMN_NODEID_DEVID(reg)		((reg) & 3)
> -#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
> -#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
> -#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
> -#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
> -#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
> -#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
> -
>  #define CMN_CHILD_INFO			0x0080
>  #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
>  #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
> @@ -280,8 +272,11 @@ struct arm_cmn_node {
>  	u16 id, logid;
>  	enum cmn_node_type type;
>  
> +	/* XP properties really, but replicated to children for convenience */
>  	u8 dtm;
>  	s8 dtc;
> +	u8 portid_bits:4;
> +	u8 deviceid_bits:4;
>  	/* DN/HN-F/CXHA */
>  	struct {
>  		u8 val : 4;
> @@ -357,49 +352,33 @@ struct arm_cmn {
>  static int arm_cmn_hp_state;
>  
>  struct arm_cmn_nodeid {
> -	u8 x;
> -	u8 y;
>  	u8 port;
>  	u8 dev;
>  };
>  
>  static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
>  {
> -	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
> +	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
>  }
>  
> -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
> +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
>  {
>  	struct arm_cmn_nodeid nid;
>  
> -	if (cmn->num_xps == 1) {
> -		nid.x = 0;
> -		nid.y = 0;
> -		nid.port = CMN_NODEID_1x1_PID(id);
> -		nid.dev = CMN_NODEID_DEVID(id);
> -	} else {
> -		int bits = arm_cmn_xyidbits(cmn);
> -
> -		nid.x = CMN_NODEID_X(id, bits);
> -		nid.y = CMN_NODEID_Y(id, bits);
> -		if (cmn->ports_used & 0xc) {
> -			nid.port = CMN_NODEID_EXT_PID(id);
> -			nid.dev = CMN_NODEID_EXT_DEVID(id);
> -		} else {
> -			nid.port = CMN_NODEID_PID(id);
> -			nid.dev = CMN_NODEID_DEVID(id);
> -		}
> -	}
> +	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
> +	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
>  	return nid;
>  }
>  
>  static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
>  					       const struct arm_cmn_node *dn)
>  {
> -	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> -	int xp_idx = cmn->mesh_x * nid.y + nid.x;
> +	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
> +	int bits = arm_cmn_xyidbits(cmn);
> +	int x = id > bits;
> +	int y = id & ((1U << bits) - 1);
>  
> -	return cmn->xps + xp_idx;
> +	return cmn->xps + cmn->mesh_x * y + x;
>  }
>  static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
>  					 enum cmn_node_type type)
> @@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
>  	}
>  }
>  
> -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
> +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
>  {
>  	struct arm_cmn *cmn = s->private;
>  	struct arm_cmn_node *dn;
> +	u16 id = xp->id | d | (p << xp->deviceid_bits);
>  
>  	for (dn = cmn->dns; dn->type; dn++) {
> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>  		int pad = dn->logid < 10;
>  
>  		if (dn->type == CMN_TYPE_XP)
> @@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
>  		if (dn->type < CMN_TYPE_HNI)
>  			continue;
>  
> -		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
> +		if (dn->id != id)
>  			continue;
>  
>  		seq_printf(s, " %*c#%-*d  |", pad + 1, ' ', 3 - pad, dn->logid);
> @@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>  	y = cmn->mesh_y;
>  	while (y--) {
>  		int xp_base = cmn->mesh_x * y;
> +		struct arm_cmn_node *xp = cmn->xps + xp_base;
>  		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
>  
>  		for (x = 0; x < cmn->mesh_x; x++)
> @@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>  
>  		seq_printf(s, "\n%-2d   |", y);
>  		for (x = 0; x < cmn->mesh_x; x++) {
> -			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
> -
>  			for (p = 0; p < CMN_MAX_PORTS; p++)
> -				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
> +				port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
>  			seq_printf(s, " XP #%-3d|", xp_base + x);
>  		}
>  
>  		seq_puts(s, "\n     |");
>  		for (x = 0; x < cmn->mesh_x; x++) {
> -			s8 dtc = cmn->xps[xp_base + x].dtc;
> +			s8 dtc = xp[x].dtc;
>  
>  			if (dtc < 0)
>  				seq_puts(s, " DTC ?? |");
> @@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>  				seq_puts(s, arm_cmn_device_type(port[p][x]));
>  			seq_puts(s, "\n    0|");
>  			for (x = 0; x < cmn->mesh_x; x++)
> -				arm_cmn_show_logid(s, x, y, p, 0);
> +				arm_cmn_show_logid(s, xp + x, p, 0);
>  			seq_puts(s, "\n    1|");
>  			for (x = 0; x < cmn->mesh_x; x++)
> -				arm_cmn_show_logid(s, x, y, p, 1);
> +				arm_cmn_show_logid(s, xp + x, p, 1);
>  		}
>  		seq_puts(s, "\n-----+");
>  	}
> @@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
>  	}
>  
>  	if (!hw->num_dns) {
> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
> -
> -		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
> -			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
> +		dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
>  		return -EINVAL;
>  	}
>  
> @@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>  			arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
>  			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
>  		} else {
> -			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> +			struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
>  
>  			if (cmn->multi_dtm)
>  				nid.port %= 2;
> @@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
>  			continue;
>  
>  		xp = arm_cmn_node_to_xp(cmn, dn);
> +		dn->portid_bits = xp->portid_bits;
> +		dn->deviceid_bits = xp->deviceid_bits;
>  		dn->dtc = xp->dtc;
>  		dn->dtm = xp->dtm;
>  		if (cmn->multi_dtm)
> -			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
> +			dn->dtm += arm_cmn_nid(dn).port / 2;
>  
>  		if (dn->type == CMN_TYPE_DTC) {
>  			int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
> @@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>  		arm_cmn_init_dtm(dtm++, xp, 0);
>  		/*
>  		 * Keeping track of connected ports will let us filter out
> -		 * unnecessary XP events easily. We can also reliably infer the
> -		 * "extra device ports" configuration for the node ID format
> -		 * from this, since in that case we will see at least one XP
> -		 * with port 2 connected, for the HN-D.
> +		 * unnecessary XP events easily, and also infer the per-XP
> +		 * part of the node ID format.
>  		 */
>  		for (int p = 0; p < CMN_MAX_PORTS; p++)
>  			if (arm_cmn_device_connect_info(cmn, xp, p))
>  				xp_ports |= BIT(p);
>  
> -		if (cmn->multi_dtm && (xp_ports & 0xc))
> +		if (cmn->num_xps == 1) {
> +			xp->portid_bits = 3;
> +			xp->deviceid_bits = 2;
> +		} else if (xp_ports > 0x3) {
> +			xp->portid_bits = 2;
> +			xp->deviceid_bits = 1;
> +		} else {
> +			xp->portid_bits = 1;
> +			xp->deviceid_bits = 2;
> +		}
> +
> +		if (cmn->multi_dtm && (xp_ports > 0x3))
>  			arm_cmn_init_dtm(dtm++, xp, 1);
> -		if (cmn->multi_dtm && (xp_ports & 0x30))
> +		if (cmn->multi_dtm && (xp_ports > 0xf))
>  			arm_cmn_init_dtm(dtm++, xp, 2);
>  
>  		cmn->ports_used |= xp_ports;
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset
  2024-08-09 19:15 ` [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
@ 2024-08-16 10:00   ` Mark Rutland
  2024-08-16 18:33     ` Robin Murphy
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:00 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:41PM +0100, Robin Murphy wrote:
> Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just
> the CCLA_RNI combination type.

Was there some reason we used to think that was specific to CCLA_RNI
nodes, or was that just an oversight?

Looking at the CMN-700 TRM and scanning for pmu_event_sel, we have:

	16'h2000	por_ccg_ha_pmu_event_sel
	16'h2000	por_ccg_ra_pmu_event_sel
	16'h2008	por_ccla_pmu_event_sel
	16'h2000	por_dn_pmu_event_sel
	16'h2000	cmn_hns_pmu_event_sel
	16'h2000	por_hni_pmu_event_sel
	16'h2008	por_hnp_pmu_event_sel
	16'h2000	por_mxp_pmu_event_sel
	16'h2000	por_rnd_pmu_event_sel
	16'h2000	por_rni_pmu_event_sel
	16'h2000	por_sbsx_pmu_event_sel

> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index fd2122a37f22..0e2e12e2f4fb 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -2393,10 +2393,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>  			case CMN_TYPE_CXHA:
>  			case CMN_TYPE_CCRA:
>  			case CMN_TYPE_CCHA:
> -			case CMN_TYPE_CCLA:
>  			case CMN_TYPE_HNS:
>  				dn++;
>  				break;
> +			case CMN_TYPE_CCLA:
> +				dn->pmu_base += CMN_HNP_PMU_EVENT_SEL;
> +				dn++;
> +				break;

When reading this for the first time, it looks like a copy-paste error
since CMN_HNP_PMU_EVENT_SEL doesn't have any obvious relationship with
CCLA nodes.

I reckon it'd be worth adding CMN_CCLA_PMU_EVENT_SEL, and replacing the
existing comment above the definition of CMN_HNP_PMU_EVENT_SEL, e.g.

/*
 * Some nodes place common registers at different offsets from most
 * other nodes.
 */
#define CMN_HNP_PMU_EVENT_SEL		0x008
#define CMN_CCLA_PMU_EVENT_SEL		0x008

That way the switch looks less suspicious, and the comment is a bit more
helpful to anyone trying to figure out what's going on here.

With that:

Acked-by: Mark Rutland <mark.rutland@arm.com>

Mark.

>  			/* Nothing to see here */
>  			case CMN_TYPE_MPAM_S:
>  			case CMN_TYPE_MPAM_NS:
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
  2024-08-09 19:15 ` [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
@ 2024-08-16 10:14   ` Mark Rutland
  2024-08-19 15:00     ` Robin Murphy
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:14 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:42PM +0100, Robin Murphy wrote:
> While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
> up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
> out OK in practice. However CMN-700 did finally support up to 144 XPs,
> and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
> event on a maxed-out config. Oops.
> 
> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index 0e2e12e2f4fb..c9a2b21a7aec 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -563,7 +563,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
>  
>  struct arm_cmn_hw_event {
>  	struct arm_cmn_node *dn;
> -	u64 dtm_idx[4];
> +	u64 dtm_idx[5];

Can't we size this based on CMN_MAX_DIMENSION (or CMN_MAX_XPS or
CMN_MAX_DTMS), to make that clear?

From the desciription in the commit message it sounds like you need 2 *
CMN_MAX_XPS bits, i.e.

	#define DTM_IDX_BITS	(2 * CMN_MAX_XPS)

	u64 dtm_idx[DIV_ROUND_UP(DTM_IDX_BITS, 64)];

Mark.

>  	s8 dtc_idx[CMN_MAX_DTCS];
>  	u8 num_dns;
>  	u8 dtm_offset;
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/8] perf/arm-cmn: Improve build-time assertions
  2024-08-09 19:15 ` [PATCH 4/8] perf/arm-cmn: Improve build-time assertions Robin Murphy
@ 2024-08-16 10:20   ` Mark Rutland
  0 siblings, 0 replies; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:20 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:43PM +0100, Robin Murphy wrote:
> These days we can use static_assert() in the logical place rather than
> jamming a BUILD_BUG_ON() into the nearest function scope. And since it
> is now so convenient to do so, let's add an extra one to reinforce the
> dtm_idx bitmap as well.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index c9a2b21a7aec..8f7a1a6f8ab7 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -577,6 +577,9 @@ struct arm_cmn_hw_event {
>  	bool wide_sel;
>  	enum cmn_filter_select filter_sel;
>  };
> +static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));

Moving this out seems fine.

> +/* Currently XPs are the node type we can have most of; others top out at 128 */
> +static_assert(sizeof_field(struct arm_cmn_hw_event, dtm_idx) >= CMN_MAX_XPS / 4);

I reckon we can make this true by construction (as on the last patch),
and avoid the need for the assert entirely.

Mark.

>  
>  #define for_each_hw_dn(hw, dn, i) \
>  	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
> @@ -587,7 +590,6 @@ struct arm_cmn_hw_event {
>  
>  static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
>  {
> -	BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
>  	return (struct arm_cmn_hw_event *)&event->hw;
>  }
>  
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising
  2024-08-09 19:15 ` [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
@ 2024-08-16 10:25   ` Mark Rutland
  2024-08-19 16:35     ` Robin Murphy
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:25 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:44PM +0100, Robin Murphy wrote:
> By default, CMN has automatic clock-gating with the implication that a
> DTC's cycle counter may not increment while the domain is sufficiently
> idle.

Similar is true for the cycles event on the CPU side, so this has some
precedent.

> Given that we may have up to 4 DTCs to choose from when scheduling
> a cycles event, this may potentially lead to surprising results if
> trying to measure metrics based on activity in a different DTC domain
> from where cycles end up being counted. Make the reasonable assumption
> that if the user wants to count cycles, they almost certainly want to
> count all of the cycles, and disable clock gating while a DTC's cycle
> counter is in use.

As above, the default does match the CPU side behaviour, and a user
might be trying to determine how much clock gating occurs over some
period, so it's not necessarily right to always disable clock gating.
That might need to be an explicit option on the cycles event.

Do we always have the ability to disable clock gating, or can that be
locked down by system integration or FW?

Mark.

> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index 8f7a1a6f8ab7..4d128db2040c 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -115,6 +115,7 @@
>  /* The DTC node is where the magic happens */
>  #define CMN_DT_DTC_CTL			0x0a00
>  #define CMN_DT_DTC_CTL_DT_EN		BIT(0)
> +#define CMN_DT_DTC_CTL_CG_DISABLE	BIT(10)
>  
>  /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
>  #define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
> @@ -1544,9 +1545,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
>  	int i;
>  
>  	if (type == CMN_TYPE_DTC) {
> -		i = hw->dtc_idx[0];
> -		writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
> -		cmn->dtc[i].cc_active = true;
> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
> +
> +		writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
> +			       dtc->base + CMN_DT_DTC_CTL);
> +		writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
> +		dtc->cc_active = true;
>  	} else if (type == CMN_TYPE_WP) {
>  		u64 val = CMN_EVENT_WP_VAL(event);
>  		u64 mask = CMN_EVENT_WP_MASK(event);
> @@ -1575,8 +1579,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
>  	int i;
>  
>  	if (type == CMN_TYPE_DTC) {
> -		i = hw->dtc_idx[0];
> -		cmn->dtc[i].cc_active = false;
> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
> +
> +		dtc->cc_active = false;
> +		writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
>  	} else if (type == CMN_TYPE_WP) {
>  		for_each_hw_dn(hw, dn, i) {
>  			void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access
  2024-08-09 19:15 ` [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
@ 2024-08-16 10:29   ` Mark Rutland
  2024-08-19 16:41     ` Robin Murphy
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:29 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:45PM +0100, Robin Murphy wrote:
> Annoyingly, we're soon going to have to cope with PMU registers moving
> about. This will mostly be straightforward, except for the hard-coding
> of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
> those accessors to allow for encapsulating a variable offset without
> making a big mess all over.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
>  1 file changed, 36 insertions(+), 28 deletions(-)

Aside from a minoe comment below this looks fine to me.

>  struct arm_cmn_dtc {
>  	void __iomem *base;
> +	void __iomem *pmu_base;
>  	int irq;
> -	int irq_friend;
> +	s8 irq_friend;

Unrelated change?

AFAICT there's no reason for 'irq_friend' to change from 'int' to 's8',
and nothing in the commit message explains it.

Otherwise this all looks fine to me.

Mark.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 8/8] perf/arm-cmn: Support CMN S3
  2024-08-09 19:15 ` [PATCH 8/8] perf/arm-cmn: Support " Robin Murphy
@ 2024-08-16 10:32   ` Mark Rutland
  2024-08-23 23:38   ` Ilkka Koskinen
  1 sibling, 0 replies; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 10:32 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 09, 2024 at 08:15:47PM +0100, Robin Murphy wrote:
> CMN S3 is the latest and greatest evolution for 2024, although most of
> the new features don't impact the PMU, so from our point of view it ends
> up looking a lot like CMN-700 r3 still. We have some new device types to
> ignore, a mildly irritating rearrangement of the register layouts, and a
> scary new configuration option that makes it potentially unsafe to even
> walk the full discovery tree, let alone attempt to use the PMU.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

This looks fine to me, so:

Acked-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> ---
>  drivers/perf/arm-cmn.c | 117 ++++++++++++++++++++++++++---------------
>  1 file changed, 74 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index 12bbb689a1af..0d19163fba5a 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -42,24 +42,28 @@
>  #define CMN_CFGM_PERIPH_ID_23		0x0010
>  #define CMN_CFGM_PID2_REVISION		GENMASK_ULL(7, 4)
>  
> -#define CMN_CFGM_INFO_GLOBAL		0x900
> +#define CMN_CFGM_INFO_GLOBAL		0x0900
>  #define CMN_INFO_MULTIPLE_DTM_EN	BIT_ULL(63)
>  #define CMN_INFO_RSP_VC_NUM		GENMASK_ULL(53, 52)
>  #define CMN_INFO_DAT_VC_NUM		GENMASK_ULL(51, 50)
> +#define CMN_INFO_DEVICE_ISO_ENABLE	BIT_ULL(44)
>  
> -#define CMN_CFGM_INFO_GLOBAL_1		0x908
> +#define CMN_CFGM_INFO_GLOBAL_1		0x0908
>  #define CMN_INFO_SNP_VC_NUM		GENMASK_ULL(3, 2)
>  #define CMN_INFO_REQ_VC_NUM		GENMASK_ULL(1, 0)
>  
>  /* XPs also have some local topology info which has uses too */
>  #define CMN_MXP__CONNECT_INFO(p)	(0x0008 + 8 * (p))
> -#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(4, 0)
> +#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(5, 0)
>  
>  #define CMN_MAX_PORTS			6
>  #define CI700_CONNECT_INFO_P2_5_OFFSET	0x10
>  
>  /* PMU registers occupy the 3rd 4KB page of each node's region */
>  #define CMN_PMU_OFFSET			0x2000
> +/* ...except when they don't :( */
> +#define CMN_S3_DTM_OFFSET		0xa000
> +#define CMN_S3_PMU_OFFSET		0xd900
>  
>  /* For most nodes, this is all there is */
>  #define CMN_PMU_EVENT_SEL		0x000
> @@ -191,10 +195,11 @@ enum cmn_model {
>  	CMN650 = 2,
>  	CMN700 = 4,
>  	CI700 = 8,
> +	CMNS3 = 16,
>  	/* ...and then we can use bitmap tricks for commonality */
>  	CMN_ANY = -1,
>  	NOT_CMN600 = -2,
> -	CMN_650ON = CMN650 | CMN700,
> +	CMN_650ON = CMN650 | CMN700 | CMNS3,
>  };
>  
>  /* Actual part numbers and revision IDs defined by the hardware */
> @@ -203,6 +208,7 @@ enum cmn_part {
>  	PART_CMN650 = 0x436,
>  	PART_CMN700 = 0x43c,
>  	PART_CI700 = 0x43a,
> +	PART_CMN_S3 = 0x43e,
>  };
>  
>  /* CMN-600 r0px shouldn't exist in silicon, thankfully */
> @@ -254,6 +260,7 @@ enum cmn_node_type {
>  	CMN_TYPE_HNS = 0x200,
>  	CMN_TYPE_HNS_MPAM_S,
>  	CMN_TYPE_HNS_MPAM_NS,
> +	CMN_TYPE_APB = 0x1000,
>  	/* Not a real node type */
>  	CMN_TYPE_WP = 0x7770
>  };
> @@ -404,6 +411,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
>  		return CMN700;
>  	case PART_CI700:
>  		return CI700;
> +	case PART_CMN_S3:
> +		return CMNS3;
>  	default:
>  		return 0;
>  	};
> @@ -411,6 +420,11 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
>  
>  static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
>  {
> +	if (cmn->part == PART_CMN_S3) {
> +		if (dn->type == CMN_TYPE_XP)
> +			return CMN_S3_DTM_OFFSET;
> +		return CMN_S3_PMU_OFFSET;
> +	}
>  	return CMN_PMU_OFFSET;
>  }
>  
> @@ -467,6 +481,9 @@ static const char *arm_cmn_device_type(u8 type)
>  		case 0x1c: return "  MTSX  |";
>  		case 0x1d: return "  HN-V  |";
>  		case 0x1e: return "  CCG   |";
> +		case 0x20: return " RN-F_F |";
> +		case 0x21: return "RN-F_F_E|";
> +		case 0x22: return " SN-F_F |";
>  		default:   return "  ????  |";
>  	}
>  }
> @@ -777,8 +794,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
>  	CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
>  #define CMN_EVENT_CCRA(_name, _event)				\
>  	CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
> -#define CMN_EVENT_CCHA(_name, _event)				\
> -	CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
> +#define CMN_EVENT_CCHA(_model, _name, _event)				\
> +	CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
>  #define CMN_EVENT_CCLA(_name, _event)				\
>  	CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
>  #define CMN_EVENT_CCLA_RNI(_name, _event)				\
> @@ -1136,42 +1153,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
>  	CMN_EVENT_CCRA(wdb_alloc,			0x59),
>  	CMN_EVENT_CCRA(ssb_alloc,			0x5a),
>  
> -	CMN_EVENT_CCHA(rddatbyp,			0x61),
> -	CMN_EVENT_CCHA(chirsp_up_stall,			0x62),
> -	CMN_EVENT_CCHA(chidat_up_stall,			0x63),
> -	CMN_EVENT_CCHA(snppcrd_link0_stall,		0x64),
> -	CMN_EVENT_CCHA(snppcrd_link1_stall,		0x65),
> -	CMN_EVENT_CCHA(snppcrd_link2_stall,		0x66),
> -	CMN_EVENT_CCHA(reqtrk_occ,			0x67),
> -	CMN_EVENT_CCHA(rdb_occ,				0x68),
> -	CMN_EVENT_CCHA(rdbyp_occ,			0x69),
> -	CMN_EVENT_CCHA(wdb_occ,				0x6a),
> -	CMN_EVENT_CCHA(snptrk_occ,			0x6b),
> -	CMN_EVENT_CCHA(sdb_occ,				0x6c),
> -	CMN_EVENT_CCHA(snphaz_occ,			0x6d),
> -	CMN_EVENT_CCHA(reqtrk_alloc,			0x6e),
> -	CMN_EVENT_CCHA(rdb_alloc,			0x6f),
> -	CMN_EVENT_CCHA(rdbyp_alloc,			0x70),
> -	CMN_EVENT_CCHA(wdb_alloc,			0x71),
> -	CMN_EVENT_CCHA(snptrk_alloc,			0x72),
> -	CMN_EVENT_CCHA(sdb_alloc,			0x73),
> -	CMN_EVENT_CCHA(snphaz_alloc,			0x74),
> -	CMN_EVENT_CCHA(pb_rhu_req_occ,			0x75),
> -	CMN_EVENT_CCHA(pb_rhu_req_alloc,		0x76),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_req_occ,		0x77),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc,		0x78),
> -	CMN_EVENT_CCHA(pb_pcie_wr_req_occ,		0x79),
> -	CMN_EVENT_CCHA(pb_pcie_wr_req_alloc,		0x7a),
> -	CMN_EVENT_CCHA(pb_pcie_reg_req_occ,		0x7b),
> -	CMN_EVENT_CCHA(pb_pcie_reg_req_alloc,		0x7c),
> -	CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ,		0x7d),
> -	CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc,		0x7e),
> -	CMN_EVENT_CCHA(pb_rhu_dat_occ,			0x7f),
> -	CMN_EVENT_CCHA(pb_rhu_dat_alloc,		0x80),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ,		0x81),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc,		0x82),
> -	CMN_EVENT_CCHA(pb_pcie_wr_dat_occ,		0x83),
> -	CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc,		0x84),
> +	CMN_EVENT_CCHA(CMN_ANY, rddatbyp,		0x61),
> +	CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall,	0x62),
> +	CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall,	0x63),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall,	0x64),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall,	0x65),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall,	0x66),
> +	CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ,		0x67),
> +	CMN_EVENT_CCHA(CMN_ANY, rdb_occ,		0x68),
> +	CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ,		0x69),
> +	CMN_EVENT_CCHA(CMN_ANY, wdb_occ,		0x6a),
> +	CMN_EVENT_CCHA(CMN_ANY, snptrk_occ,		0x6b),
> +	CMN_EVENT_CCHA(CMN_ANY, sdb_occ,		0x6c),
> +	CMN_EVENT_CCHA(CMN_ANY, snphaz_occ,		0x6d),
> +	CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc,		0x6e),
> +	CMN_EVENT_CCHA(CMN_ANY, rdb_alloc,		0x6f),
> +	CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc,		0x70),
> +	CMN_EVENT_CCHA(CMN_ANY, wdb_alloc,		0x71),
> +	CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc,		0x72),
> +	CMN_EVENT_CCHA(CMN_ANY, db_alloc,		0x73),
> +	CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc,		0x74),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ,		0x75),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc,	0x76),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ,	0x77),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc,	0x78),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ,	0x79),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc,	0x7a),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ,	0x7b),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc,	0x7c),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ,	0x7d),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc,	0x7e),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ,		0x7f),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc,	0x80),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ,	0x81),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc,	0x82),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ,	0x83),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc,	0x84),
> +	CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall,		0x85),
>  
>  	CMN_EVENT_CCLA(rx_cxs,				0x21),
>  	CMN_EVENT_CCLA(tx_cxs,				0x22),
> @@ -1777,7 +1795,8 @@ static int arm_cmn_event_init(struct perf_event *event)
>  		/* ...but the DTM may depend on which port we're watching */
>  		if (cmn->multi_dtm)
>  			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
> -	} else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
> +	} else if (type == CMN_TYPE_XP &&
> +		   (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
>  		hw->wide_sel = true;
>  	}
>  
> @@ -2264,7 +2283,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>  	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
>  	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
>  
> +	/*
> +	 * With the device isolation feature, if firmware has neglected to enable
> +	 * an XP port then we risk locking up if we try to access anything behind
> +	 * it; however we also have no way to tell from Non-Secure whether any
> +	 * given port is disabled or not, so the only way to win is not to play...
> +	 */
>  	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
> +	if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
> +		dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
> +		return -ENODEV;
> +	}
>  	cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
>  	cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
>  	cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
> @@ -2423,6 +2452,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>  			case CMN_TYPE_CXLA:
>  			case CMN_TYPE_HNS_MPAM_S:
>  			case CMN_TYPE_HNS_MPAM_NS:
> +			case CMN_TYPE_APB:
>  				break;
>  			/*
>  			 * Split "optimised" combination nodes into separate
> @@ -2608,6 +2638,7 @@ static const struct of_device_id arm_cmn_of_match[] = {
>  	{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
>  	{ .compatible = "arm,cmn-650" },
>  	{ .compatible = "arm,cmn-700" },
> +	{ .compatible = "arm,cmn-s3" },
>  	{ .compatible = "arm,ci-700" },
>  	{}
>  };
> -- 
> 2.39.2.101.g768bb238c484.dirty
> 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-16  9:33   ` Mark Rutland
@ 2024-08-16 11:56     ` Robin Murphy
  2024-08-16 12:45       ` Mark Rutland
  0 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-16 11:56 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 2024-08-16 10:33 am, Mark Rutland wrote:
> Hi Robin,
> 
> On Fri, Aug 09, 2024 at 08:15:40PM +0100, Robin Murphy wrote:
>> It transpires that despite the explicit example to the contrary in the
>> CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
>> rather than a global one. To that end, rework node IDs yet again to
>> carry around the additional data necessary to decode them properly. At
>> this point the notion of fully decomposing an ID becomes more
>> impractical than it's worth, so unabstracting the XY mesh coordinates
>> (where 2/3 users were just debug anyway) ends up leaving things a bit
>> simpler overall.
>>
>> Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
> 
> Does this fix an observable functional issue? It's difficult to tell
> what the impact of this is from the commit message.
> 
> i.e. what is the impact:
> 
> * On the CMN-700 programmers model?
> 
> * As observed by a user of the PMU driver?

Yes, there are two areas where we functionally depend on decoding the 
bottom 3 bits of a node ID to the individual port/device numbers. One is 
just for the debugfs nicety, but the other is in arm_cmn_event_add() for 
setting input_sel to the appropriate event source.

This was revealed once some hardware turned up with a mix of 3-port and 
2-port XPs, and events from a node at port 1 device 0 on a 2-porter were 
not counting. It took a while to get to the "hang on, why does that ID 
end in 0x4 not 0x2?" moment...

> ... and is there anything in the manual that spells out that this is a
> per-XP property? I'm struggling to find that in the CMN-700 TRM, as it
> seems to talk about "mesh configuration(s) with extra device ports".

That's the thing, the only suggestion is where example 3-4 strongly 
implies it's global by going out of its way to demonstrate the 3-port 
format on a 2-port XP, despite that being the opposite of reality. (And 
yes, this has been raised with the documentation folks as well.)

Thansk,
Robin.

> 
> Mark.
> 
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>   drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
>>   1 file changed, 40 insertions(+), 54 deletions(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index c932d9d355cf..fd2122a37f22 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -24,14 +24,6 @@
>>   #define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
>>   #define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
>>   
>> -#define CMN_NODEID_DEVID(reg)		((reg) & 3)
>> -#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
>> -#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
>> -#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
>> -#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
>> -#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
>> -#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
>> -
>>   #define CMN_CHILD_INFO			0x0080
>>   #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
>>   #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
>> @@ -280,8 +272,11 @@ struct arm_cmn_node {
>>   	u16 id, logid;
>>   	enum cmn_node_type type;
>>   
>> +	/* XP properties really, but replicated to children for convenience */
>>   	u8 dtm;
>>   	s8 dtc;
>> +	u8 portid_bits:4;
>> +	u8 deviceid_bits:4;
>>   	/* DN/HN-F/CXHA */
>>   	struct {
>>   		u8 val : 4;
>> @@ -357,49 +352,33 @@ struct arm_cmn {
>>   static int arm_cmn_hp_state;
>>   
>>   struct arm_cmn_nodeid {
>> -	u8 x;
>> -	u8 y;
>>   	u8 port;
>>   	u8 dev;
>>   };
>>   
>>   static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
>>   {
>> -	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
>> +	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
>>   }
>>   
>> -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
>> +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
>>   {
>>   	struct arm_cmn_nodeid nid;
>>   
>> -	if (cmn->num_xps == 1) {
>> -		nid.x = 0;
>> -		nid.y = 0;
>> -		nid.port = CMN_NODEID_1x1_PID(id);
>> -		nid.dev = CMN_NODEID_DEVID(id);
>> -	} else {
>> -		int bits = arm_cmn_xyidbits(cmn);
>> -
>> -		nid.x = CMN_NODEID_X(id, bits);
>> -		nid.y = CMN_NODEID_Y(id, bits);
>> -		if (cmn->ports_used & 0xc) {
>> -			nid.port = CMN_NODEID_EXT_PID(id);
>> -			nid.dev = CMN_NODEID_EXT_DEVID(id);
>> -		} else {
>> -			nid.port = CMN_NODEID_PID(id);
>> -			nid.dev = CMN_NODEID_DEVID(id);
>> -		}
>> -	}
>> +	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
>> +	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
>>   	return nid;
>>   }
>>   
>>   static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
>>   					       const struct arm_cmn_node *dn)
>>   {
>> -	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>> -	int xp_idx = cmn->mesh_x * nid.y + nid.x;
>> +	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
>> +	int bits = arm_cmn_xyidbits(cmn);
>> +	int x = id > bits;
>> +	int y = id & ((1U << bits) - 1);
>>   
>> -	return cmn->xps + xp_idx;
>> +	return cmn->xps + cmn->mesh_x * y + x;
>>   }
>>   static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
>>   					 enum cmn_node_type type)
>> @@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
>>   	}
>>   }
>>   
>> -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
>> +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
>>   {
>>   	struct arm_cmn *cmn = s->private;
>>   	struct arm_cmn_node *dn;
>> +	u16 id = xp->id | d | (p << xp->deviceid_bits);
>>   
>>   	for (dn = cmn->dns; dn->type; dn++) {
>> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>>   		int pad = dn->logid < 10;
>>   
>>   		if (dn->type == CMN_TYPE_XP)
>> @@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
>>   		if (dn->type < CMN_TYPE_HNI)
>>   			continue;
>>   
>> -		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
>> +		if (dn->id != id)
>>   			continue;
>>   
>>   		seq_printf(s, " %*c#%-*d  |", pad + 1, ' ', 3 - pad, dn->logid);
>> @@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>   	y = cmn->mesh_y;
>>   	while (y--) {
>>   		int xp_base = cmn->mesh_x * y;
>> +		struct arm_cmn_node *xp = cmn->xps + xp_base;
>>   		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
>>   
>>   		for (x = 0; x < cmn->mesh_x; x++)
>> @@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>   
>>   		seq_printf(s, "\n%-2d   |", y);
>>   		for (x = 0; x < cmn->mesh_x; x++) {
>> -			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
>> -
>>   			for (p = 0; p < CMN_MAX_PORTS; p++)
>> -				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
>> +				port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
>>   			seq_printf(s, " XP #%-3d|", xp_base + x);
>>   		}
>>   
>>   		seq_puts(s, "\n     |");
>>   		for (x = 0; x < cmn->mesh_x; x++) {
>> -			s8 dtc = cmn->xps[xp_base + x].dtc;
>> +			s8 dtc = xp[x].dtc;
>>   
>>   			if (dtc < 0)
>>   				seq_puts(s, " DTC ?? |");
>> @@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>   				seq_puts(s, arm_cmn_device_type(port[p][x]));
>>   			seq_puts(s, "\n    0|");
>>   			for (x = 0; x < cmn->mesh_x; x++)
>> -				arm_cmn_show_logid(s, x, y, p, 0);
>> +				arm_cmn_show_logid(s, xp + x, p, 0);
>>   			seq_puts(s, "\n    1|");
>>   			for (x = 0; x < cmn->mesh_x; x++)
>> -				arm_cmn_show_logid(s, x, y, p, 1);
>> +				arm_cmn_show_logid(s, xp + x, p, 1);
>>   		}
>>   		seq_puts(s, "\n-----+");
>>   	}
>> @@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
>>   	}
>>   
>>   	if (!hw->num_dns) {
>> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
>> -
>> -		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
>> -			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
>> +		dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
>>   		return -EINVAL;
>>   	}
>>   
>> @@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>>   			arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
>>   			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
>>   		} else {
>> -			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>> +			struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
>>   
>>   			if (cmn->multi_dtm)
>>   				nid.port %= 2;
>> @@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
>>   			continue;
>>   
>>   		xp = arm_cmn_node_to_xp(cmn, dn);
>> +		dn->portid_bits = xp->portid_bits;
>> +		dn->deviceid_bits = xp->deviceid_bits;
>>   		dn->dtc = xp->dtc;
>>   		dn->dtm = xp->dtm;
>>   		if (cmn->multi_dtm)
>> -			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
>> +			dn->dtm += arm_cmn_nid(dn).port / 2;
>>   
>>   		if (dn->type == CMN_TYPE_DTC) {
>>   			int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
>> @@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>>   		arm_cmn_init_dtm(dtm++, xp, 0);
>>   		/*
>>   		 * Keeping track of connected ports will let us filter out
>> -		 * unnecessary XP events easily. We can also reliably infer the
>> -		 * "extra device ports" configuration for the node ID format
>> -		 * from this, since in that case we will see at least one XP
>> -		 * with port 2 connected, for the HN-D.
>> +		 * unnecessary XP events easily, and also infer the per-XP
>> +		 * part of the node ID format.
>>   		 */
>>   		for (int p = 0; p < CMN_MAX_PORTS; p++)
>>   			if (arm_cmn_device_connect_info(cmn, xp, p))
>>   				xp_ports |= BIT(p);
>>   
>> -		if (cmn->multi_dtm && (xp_ports & 0xc))
>> +		if (cmn->num_xps == 1) {
>> +			xp->portid_bits = 3;
>> +			xp->deviceid_bits = 2;
>> +		} else if (xp_ports > 0x3) {
>> +			xp->portid_bits = 2;
>> +			xp->deviceid_bits = 1;
>> +		} else {
>> +			xp->portid_bits = 1;
>> +			xp->deviceid_bits = 2;
>> +		}
>> +
>> +		if (cmn->multi_dtm && (xp_ports > 0x3))
>>   			arm_cmn_init_dtm(dtm++, xp, 1);
>> -		if (cmn->multi_dtm && (xp_ports & 0x30))
>> +		if (cmn->multi_dtm && (xp_ports > 0xf))
>>   			arm_cmn_init_dtm(dtm++, xp, 2);
>>   
>>   		cmn->ports_used |= xp_ports;
>> -- 
>> 2.39.2.101.g768bb238c484.dirty
>>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-16 11:56     ` Robin Murphy
@ 2024-08-16 12:45       ` Mark Rutland
  2024-08-16 13:21         ` Robin Murphy
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-16 12:45 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Fri, Aug 16, 2024 at 12:56:48PM +0100, Robin Murphy wrote:
> On 2024-08-16 10:33 am, Mark Rutland wrote:
> > Hi Robin,
> > 
> > On Fri, Aug 09, 2024 at 08:15:40PM +0100, Robin Murphy wrote:
> > > It transpires that despite the explicit example to the contrary in the
> > > CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
> > > rather than a global one. To that end, rework node IDs yet again to
> > > carry around the additional data necessary to decode them properly. At
> > > this point the notion of fully decomposing an ID becomes more
> > > impractical than it's worth, so unabstracting the XY mesh coordinates
> > > (where 2/3 users were just debug anyway) ends up leaving things a bit
> > > simpler overall.
> > > 
> > > Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
> > 
> > Does this fix an observable functional issue? It's difficult to tell
> > what the impact of this is from the commit message.
> > 
> > i.e. what is the impact:
> > 
> > * On the CMN-700 programmers model?
> > 
> > * As observed by a user of the PMU driver?
> 
> Yes, there are two areas where we functionally depend on decoding the bottom
> 3 bits of a node ID to the individual port/device numbers. One is just for
> the debugfs nicety, but the other is in arm_cmn_event_add() for setting
> input_sel to the appropriate event source.

Cool; and does treating this wrong just result in getting bad values out
in sysfs/perf, or is it possible that we have something like memory
corruption?

> This was revealed once some hardware turned up with a mix of 3-port and
> 2-port XPs, and events from a node at port 1 device 0 on a 2-porter were not
> counting. It took a while to get to the "hang on, why does that ID end in
> 0x4 not 0x2?" moment...

I see; so we were mislead by the documentation, then saw HW which
demonstrated this.

It'd be nice to say something like:

  The CMN-700 TRM implies that the "extra device ports" configuration is
  global to a CMN instance and the CMN PMU driver currently assumes
  this. Unfortunately this is not correct as this configuration is
  per-XP, and hardware exists where some XPs have extra device ports
  while others do not.

  The presence of extra decice ports affects how the bottom 3 bits of a
  node ID map to individual port/device numbers, and when the driver
  misinterprets this, it may expose incorrect information in syfs, and
  arm_cmn_event_add() may configure input_sel incorrectly, resulting in
  incorrect performance counter numbers.

  Fix this by reworking node IDs to carry the additional data necessary
  to decode them properly. At this point the notion of fully decomposing
  an ID becomes more impractical than it's worth, so unabstracting the
  XY mesh coordinates (where 2/3 users were just debug anyway) ends up
  leaving things a bit simpler overall.

With that wording (or something very similar), this looks good to me,
and with that:

Acked-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> > ... and is there anything in the manual that spells out that this is a
> > per-XP property? I'm struggling to find that in the CMN-700 TRM, as it
> > seems to talk about "mesh configuration(s) with extra device ports".
> 
> That's the thing, the only suggestion is where example 3-4 strongly implies
> it's global by going out of its way to demonstrate the 3-port format on a
> 2-port XP, despite that being the opposite of reality. (And yes, this has
> been raised with the documentation folks as well.)
> 
> Thansk,
> Robin.
> 
> > 
> > Mark.
> > 
> > > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > > ---
> > >   drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
> > >   1 file changed, 40 insertions(+), 54 deletions(-)
> > > 
> > > diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> > > index c932d9d355cf..fd2122a37f22 100644
> > > --- a/drivers/perf/arm-cmn.c
> > > +++ b/drivers/perf/arm-cmn.c
> > > @@ -24,14 +24,6 @@
> > >   #define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
> > >   #define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
> > > -#define CMN_NODEID_DEVID(reg)		((reg) & 3)
> > > -#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
> > > -#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
> > > -#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
> > > -#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
> > > -#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
> > > -#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
> > > -
> > >   #define CMN_CHILD_INFO			0x0080
> > >   #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
> > >   #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
> > > @@ -280,8 +272,11 @@ struct arm_cmn_node {
> > >   	u16 id, logid;
> > >   	enum cmn_node_type type;
> > > +	/* XP properties really, but replicated to children for convenience */
> > >   	u8 dtm;
> > >   	s8 dtc;
> > > +	u8 portid_bits:4;
> > > +	u8 deviceid_bits:4;
> > >   	/* DN/HN-F/CXHA */
> > >   	struct {
> > >   		u8 val : 4;
> > > @@ -357,49 +352,33 @@ struct arm_cmn {
> > >   static int arm_cmn_hp_state;
> > >   struct arm_cmn_nodeid {
> > > -	u8 x;
> > > -	u8 y;
> > >   	u8 port;
> > >   	u8 dev;
> > >   };
> > >   static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
> > >   {
> > > -	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
> > > +	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
> > >   }
> > > -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
> > > +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
> > >   {
> > >   	struct arm_cmn_nodeid nid;
> > > -	if (cmn->num_xps == 1) {
> > > -		nid.x = 0;
> > > -		nid.y = 0;
> > > -		nid.port = CMN_NODEID_1x1_PID(id);
> > > -		nid.dev = CMN_NODEID_DEVID(id);
> > > -	} else {
> > > -		int bits = arm_cmn_xyidbits(cmn);
> > > -
> > > -		nid.x = CMN_NODEID_X(id, bits);
> > > -		nid.y = CMN_NODEID_Y(id, bits);
> > > -		if (cmn->ports_used & 0xc) {
> > > -			nid.port = CMN_NODEID_EXT_PID(id);
> > > -			nid.dev = CMN_NODEID_EXT_DEVID(id);
> > > -		} else {
> > > -			nid.port = CMN_NODEID_PID(id);
> > > -			nid.dev = CMN_NODEID_DEVID(id);
> > > -		}
> > > -	}
> > > +	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
> > > +	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
> > >   	return nid;
> > >   }
> > >   static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
> > >   					       const struct arm_cmn_node *dn)
> > >   {
> > > -	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> > > -	int xp_idx = cmn->mesh_x * nid.y + nid.x;
> > > +	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
> > > +	int bits = arm_cmn_xyidbits(cmn);
> > > +	int x = id > bits;
> > > +	int y = id & ((1U << bits) - 1);
> > > -	return cmn->xps + xp_idx;
> > > +	return cmn->xps + cmn->mesh_x * y + x;
> > >   }
> > >   static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
> > >   					 enum cmn_node_type type)
> > > @@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
> > >   	}
> > >   }
> > > -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
> > > +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
> > >   {
> > >   	struct arm_cmn *cmn = s->private;
> > >   	struct arm_cmn_node *dn;
> > > +	u16 id = xp->id | d | (p << xp->deviceid_bits);
> > >   	for (dn = cmn->dns; dn->type; dn++) {
> > > -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> > >   		int pad = dn->logid < 10;
> > >   		if (dn->type == CMN_TYPE_XP)
> > > @@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
> > >   		if (dn->type < CMN_TYPE_HNI)
> > >   			continue;
> > > -		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
> > > +		if (dn->id != id)
> > >   			continue;
> > >   		seq_printf(s, " %*c#%-*d  |", pad + 1, ' ', 3 - pad, dn->logid);
> > > @@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
> > >   	y = cmn->mesh_y;
> > >   	while (y--) {
> > >   		int xp_base = cmn->mesh_x * y;
> > > +		struct arm_cmn_node *xp = cmn->xps + xp_base;
> > >   		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
> > >   		for (x = 0; x < cmn->mesh_x; x++)
> > > @@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
> > >   		seq_printf(s, "\n%-2d   |", y);
> > >   		for (x = 0; x < cmn->mesh_x; x++) {
> > > -			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
> > > -
> > >   			for (p = 0; p < CMN_MAX_PORTS; p++)
> > > -				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
> > > +				port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
> > >   			seq_printf(s, " XP #%-3d|", xp_base + x);
> > >   		}
> > >   		seq_puts(s, "\n     |");
> > >   		for (x = 0; x < cmn->mesh_x; x++) {
> > > -			s8 dtc = cmn->xps[xp_base + x].dtc;
> > > +			s8 dtc = xp[x].dtc;
> > >   			if (dtc < 0)
> > >   				seq_puts(s, " DTC ?? |");
> > > @@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
> > >   				seq_puts(s, arm_cmn_device_type(port[p][x]));
> > >   			seq_puts(s, "\n    0|");
> > >   			for (x = 0; x < cmn->mesh_x; x++)
> > > -				arm_cmn_show_logid(s, x, y, p, 0);
> > > +				arm_cmn_show_logid(s, xp + x, p, 0);
> > >   			seq_puts(s, "\n    1|");
> > >   			for (x = 0; x < cmn->mesh_x; x++)
> > > -				arm_cmn_show_logid(s, x, y, p, 1);
> > > +				arm_cmn_show_logid(s, xp + x, p, 1);
> > >   		}
> > >   		seq_puts(s, "\n-----+");
> > >   	}
> > > @@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
> > >   	}
> > >   	if (!hw->num_dns) {
> > > -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
> > > -
> > > -		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
> > > -			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
> > > +		dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
> > >   		return -EINVAL;
> > >   	}
> > > @@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> > >   			arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
> > >   			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
> > >   		} else {
> > > -			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> > > +			struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
> > >   			if (cmn->multi_dtm)
> > >   				nid.port %= 2;
> > > @@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
> > >   			continue;
> > >   		xp = arm_cmn_node_to_xp(cmn, dn);
> > > +		dn->portid_bits = xp->portid_bits;
> > > +		dn->deviceid_bits = xp->deviceid_bits;
> > >   		dn->dtc = xp->dtc;
> > >   		dn->dtm = xp->dtm;
> > >   		if (cmn->multi_dtm)
> > > -			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
> > > +			dn->dtm += arm_cmn_nid(dn).port / 2;
> > >   		if (dn->type == CMN_TYPE_DTC) {
> > >   			int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
> > > @@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
> > >   		arm_cmn_init_dtm(dtm++, xp, 0);
> > >   		/*
> > >   		 * Keeping track of connected ports will let us filter out
> > > -		 * unnecessary XP events easily. We can also reliably infer the
> > > -		 * "extra device ports" configuration for the node ID format
> > > -		 * from this, since in that case we will see at least one XP
> > > -		 * with port 2 connected, for the HN-D.
> > > +		 * unnecessary XP events easily, and also infer the per-XP
> > > +		 * part of the node ID format.
> > >   		 */
> > >   		for (int p = 0; p < CMN_MAX_PORTS; p++)
> > >   			if (arm_cmn_device_connect_info(cmn, xp, p))
> > >   				xp_ports |= BIT(p);
> > > -		if (cmn->multi_dtm && (xp_ports & 0xc))
> > > +		if (cmn->num_xps == 1) {
> > > +			xp->portid_bits = 3;
> > > +			xp->deviceid_bits = 2;
> > > +		} else if (xp_ports > 0x3) {
> > > +			xp->portid_bits = 2;
> > > +			xp->deviceid_bits = 1;
> > > +		} else {
> > > +			xp->portid_bits = 1;
> > > +			xp->deviceid_bits = 2;
> > > +		}
> > > +
> > > +		if (cmn->multi_dtm && (xp_ports > 0x3))
> > >   			arm_cmn_init_dtm(dtm++, xp, 1);
> > > -		if (cmn->multi_dtm && (xp_ports & 0x30))
> > > +		if (cmn->multi_dtm && (xp_ports > 0xf))
> > >   			arm_cmn_init_dtm(dtm++, xp, 2);
> > >   		cmn->ports_used |= xp_ports;
> > > -- 
> > > 2.39.2.101.g768bb238c484.dirty
> > > 


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-16 12:45       ` Mark Rutland
@ 2024-08-16 13:21         ` Robin Murphy
  0 siblings, 0 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-16 13:21 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 16/08/2024 1:45 pm, Mark Rutland wrote:
> On Fri, Aug 16, 2024 at 12:56:48PM +0100, Robin Murphy wrote:
>> On 2024-08-16 10:33 am, Mark Rutland wrote:
>>> Hi Robin,
>>>
>>> On Fri, Aug 09, 2024 at 08:15:40PM +0100, Robin Murphy wrote:
>>>> It transpires that despite the explicit example to the contrary in the
>>>> CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
>>>> rather than a global one. To that end, rework node IDs yet again to
>>>> carry around the additional data necessary to decode them properly. At
>>>> this point the notion of fully decomposing an ID becomes more
>>>> impractical than it's worth, so unabstracting the XY mesh coordinates
>>>> (where 2/3 users were just debug anyway) ends up leaving things a bit
>>>> simpler overall.
>>>>
>>>> Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
>>>
>>> Does this fix an observable functional issue? It's difficult to tell
>>> what the impact of this is from the commit message.
>>>
>>> i.e. what is the impact:
>>>
>>> * On the CMN-700 programmers model?
>>>
>>> * As observed by a user of the PMU driver?
>>
>> Yes, there are two areas where we functionally depend on decoding the bottom
>> 3 bits of a node ID to the individual port/device numbers. One is just for
>> the debugfs nicety, but the other is in arm_cmn_event_add() for setting
>> input_sel to the appropriate event source.
> 
> Cool; and does treating this wrong just result in getting bad values out
> in sysfs/perf, or is it possible that we have something like memory
> corruption?

Yup, it just means we'll configure the DTM to count an event input which 
most likely doesn't exist, and thus get 0, plus debugfs looks a little 
wonky, e.g.:

     1|        |
-----+--------+--------
0    | XP #0  | XP #1
      | DTC 0  | DTC 0
      |........|........
   0  |  RN-I  |  RN-D
     0|   #0   |   #0
     1|        |
   1  |  HN-T  |
     0|        |
     1|        |
   2  |        |
     0|   #1   |
     1|        |
   3  |        |
     0|        |
     1|        |
-----+--------+--------

(the HN-T node type still shows up in the right place since that comes 
directly from the per-port connect_info registers, but its corresponding 
logical ID is looked up via the node ID, thus mistakenly decoded as 
belonging to port 2 and output on the wrong line)

>> This was revealed once some hardware turned up with a mix of 3-port and
>> 2-port XPs, and events from a node at port 1 device 0 on a 2-porter were not
>> counting. It took a while to get to the "hang on, why does that ID end in
>> 0x4 not 0x2?" moment...
> 
> I see; so we were mislead by the documentation, then saw HW which
> demonstrated this.
> 
> It'd be nice to say something like:
> 
>    The CMN-700 TRM implies that the "extra device ports" configuration is
>    global to a CMN instance and the CMN PMU driver currently assumes
>    this. Unfortunately this is not correct as this configuration is
>    per-XP, and hardware exists where some XPs have extra device ports
>    while others do not.
> 
>    The presence of extra decice ports affects how the bottom 3 bits of a
>    node ID map to individual port/device numbers, and when the driver
>    misinterprets this, it may expose incorrect information in syfs, and
>    arm_cmn_event_add() may configure input_sel incorrectly, resulting in
>    incorrect performance counter numbers.
> 
>    Fix this by reworking node IDs to carry the additional data necessary
>    to decode them properly. At this point the notion of fully decomposing
>    an ID becomes more impractical than it's worth, so unabstracting the
>    XY mesh coordinates (where 2/3 users were just debug anyway) ends up
>    leaving things a bit simpler overall.
> 
> With that wording (or something very similar), this looks good to me,
> and with that:

Sure, I'll spell out the implications properly - I do tend to forget 
that the people reading these won't just be those working on the driver 
itself, but also those assessing for backports in general.

Cheers,
Robin.

> 
> Acked-by: Mark Rutland <mark.rutland@arm.com>
> 
> Mark.
> 
>>> ... and is there anything in the manual that spells out that this is a
>>> per-XP property? I'm struggling to find that in the CMN-700 TRM, as it
>>> seems to talk about "mesh configuration(s) with extra device ports".
>>
>> That's the thing, the only suggestion is where example 3-4 strongly implies
>> it's global by going out of its way to demonstrate the 3-port format on a
>> 2-port XP, despite that being the opposite of reality. (And yes, this has
>> been raised with the documentation folks as well.)
>>
>> Thansk,
>> Robin.
>>
>>>
>>> Mark.
>>>
>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>>> ---
>>>>    drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
>>>>    1 file changed, 40 insertions(+), 54 deletions(-)
>>>>
>>>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>>>> index c932d9d355cf..fd2122a37f22 100644
>>>> --- a/drivers/perf/arm-cmn.c
>>>> +++ b/drivers/perf/arm-cmn.c
>>>> @@ -24,14 +24,6 @@
>>>>    #define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
>>>>    #define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
>>>> -#define CMN_NODEID_DEVID(reg)		((reg) & 3)
>>>> -#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
>>>> -#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
>>>> -#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
>>>> -#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
>>>> -#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
>>>> -#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
>>>> -
>>>>    #define CMN_CHILD_INFO			0x0080
>>>>    #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
>>>>    #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
>>>> @@ -280,8 +272,11 @@ struct arm_cmn_node {
>>>>    	u16 id, logid;
>>>>    	enum cmn_node_type type;
>>>> +	/* XP properties really, but replicated to children for convenience */
>>>>    	u8 dtm;
>>>>    	s8 dtc;
>>>> +	u8 portid_bits:4;
>>>> +	u8 deviceid_bits:4;
>>>>    	/* DN/HN-F/CXHA */
>>>>    	struct {
>>>>    		u8 val : 4;
>>>> @@ -357,49 +352,33 @@ struct arm_cmn {
>>>>    static int arm_cmn_hp_state;
>>>>    struct arm_cmn_nodeid {
>>>> -	u8 x;
>>>> -	u8 y;
>>>>    	u8 port;
>>>>    	u8 dev;
>>>>    };
>>>>    static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
>>>>    {
>>>> -	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
>>>> +	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
>>>>    }
>>>> -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
>>>> +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
>>>>    {
>>>>    	struct arm_cmn_nodeid nid;
>>>> -	if (cmn->num_xps == 1) {
>>>> -		nid.x = 0;
>>>> -		nid.y = 0;
>>>> -		nid.port = CMN_NODEID_1x1_PID(id);
>>>> -		nid.dev = CMN_NODEID_DEVID(id);
>>>> -	} else {
>>>> -		int bits = arm_cmn_xyidbits(cmn);
>>>> -
>>>> -		nid.x = CMN_NODEID_X(id, bits);
>>>> -		nid.y = CMN_NODEID_Y(id, bits);
>>>> -		if (cmn->ports_used & 0xc) {
>>>> -			nid.port = CMN_NODEID_EXT_PID(id);
>>>> -			nid.dev = CMN_NODEID_EXT_DEVID(id);
>>>> -		} else {
>>>> -			nid.port = CMN_NODEID_PID(id);
>>>> -			nid.dev = CMN_NODEID_DEVID(id);
>>>> -		}
>>>> -	}
>>>> +	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
>>>> +	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
>>>>    	return nid;
>>>>    }
>>>>    static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
>>>>    					       const struct arm_cmn_node *dn)
>>>>    {
>>>> -	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>>>> -	int xp_idx = cmn->mesh_x * nid.y + nid.x;
>>>> +	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
>>>> +	int bits = arm_cmn_xyidbits(cmn);
>>>> +	int x = id > bits;
>>>> +	int y = id & ((1U << bits) - 1);
>>>> -	return cmn->xps + xp_idx;
>>>> +	return cmn->xps + cmn->mesh_x * y + x;
>>>>    }
>>>>    static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
>>>>    					 enum cmn_node_type type)
>>>> @@ -485,13 +464,13 @@ static const char *arm_cmn_device_type(u8 type)
>>>>    	}
>>>>    }
>>>> -static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
>>>> +static void arm_cmn_show_logid(struct seq_file *s, const struct arm_cmn_node *xp, int p, int d)
>>>>    {
>>>>    	struct arm_cmn *cmn = s->private;
>>>>    	struct arm_cmn_node *dn;
>>>> +	u16 id = xp->id | d | (p << xp->deviceid_bits);
>>>>    	for (dn = cmn->dns; dn->type; dn++) {
>>>> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>>>>    		int pad = dn->logid < 10;
>>>>    		if (dn->type == CMN_TYPE_XP)
>>>> @@ -500,7 +479,7 @@ static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
>>>>    		if (dn->type < CMN_TYPE_HNI)
>>>>    			continue;
>>>> -		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
>>>> +		if (dn->id != id)
>>>>    			continue;
>>>>    		seq_printf(s, " %*c#%-*d  |", pad + 1, ' ', 3 - pad, dn->logid);
>>>> @@ -521,6 +500,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>>>    	y = cmn->mesh_y;
>>>>    	while (y--) {
>>>>    		int xp_base = cmn->mesh_x * y;
>>>> +		struct arm_cmn_node *xp = cmn->xps + xp_base;
>>>>    		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
>>>>    		for (x = 0; x < cmn->mesh_x; x++)
>>>> @@ -528,16 +508,14 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>>>    		seq_printf(s, "\n%-2d   |", y);
>>>>    		for (x = 0; x < cmn->mesh_x; x++) {
>>>> -			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
>>>> -
>>>>    			for (p = 0; p < CMN_MAX_PORTS; p++)
>>>> -				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
>>>> +				port[p][x] = arm_cmn_device_connect_info(cmn, xp + x, p);
>>>>    			seq_printf(s, " XP #%-3d|", xp_base + x);
>>>>    		}
>>>>    		seq_puts(s, "\n     |");
>>>>    		for (x = 0; x < cmn->mesh_x; x++) {
>>>> -			s8 dtc = cmn->xps[xp_base + x].dtc;
>>>> +			s8 dtc = xp[x].dtc;
>>>>    			if (dtc < 0)
>>>>    				seq_puts(s, " DTC ?? |");
>>>> @@ -554,10 +532,10 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>>>>    				seq_puts(s, arm_cmn_device_type(port[p][x]));
>>>>    			seq_puts(s, "\n    0|");
>>>>    			for (x = 0; x < cmn->mesh_x; x++)
>>>> -				arm_cmn_show_logid(s, x, y, p, 0);
>>>> +				arm_cmn_show_logid(s, xp + x, p, 0);
>>>>    			seq_puts(s, "\n    1|");
>>>>    			for (x = 0; x < cmn->mesh_x; x++)
>>>> -				arm_cmn_show_logid(s, x, y, p, 1);
>>>> +				arm_cmn_show_logid(s, xp + x, p, 1);
>>>>    		}
>>>>    		seq_puts(s, "\n-----+");
>>>>    	}
>>>> @@ -1815,10 +1793,7 @@ static int arm_cmn_event_init(struct perf_event *event)
>>>>    	}
>>>>    	if (!hw->num_dns) {
>>>> -		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
>>>> -
>>>> -		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
>>>> -			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
>>>> +		dev_dbg(cmn->dev, "invalid node 0x%x type 0x%x\n", nodeid, type);
>>>>    		return -EINVAL;
>>>>    	}
>>>> @@ -1921,7 +1896,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>>>>    			arm_cmn_claim_wp_idx(dtm, event, d, wp_idx, i);
>>>>    			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
>>>>    		} else {
>>>> -			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>>>> +			struct arm_cmn_nodeid nid = arm_cmn_nid(dn);
>>>>    			if (cmn->multi_dtm)
>>>>    				nid.port %= 2;
>>>> @@ -2168,10 +2143,12 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
>>>>    			continue;
>>>>    		xp = arm_cmn_node_to_xp(cmn, dn);
>>>> +		dn->portid_bits = xp->portid_bits;
>>>> +		dn->deviceid_bits = xp->deviceid_bits;
>>>>    		dn->dtc = xp->dtc;
>>>>    		dn->dtm = xp->dtm;
>>>>    		if (cmn->multi_dtm)
>>>> -			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
>>>> +			dn->dtm += arm_cmn_nid(dn).port / 2;
>>>>    		if (dn->type == CMN_TYPE_DTC) {
>>>>    			int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
>>>> @@ -2341,18 +2318,27 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>>>>    		arm_cmn_init_dtm(dtm++, xp, 0);
>>>>    		/*
>>>>    		 * Keeping track of connected ports will let us filter out
>>>> -		 * unnecessary XP events easily. We can also reliably infer the
>>>> -		 * "extra device ports" configuration for the node ID format
>>>> -		 * from this, since in that case we will see at least one XP
>>>> -		 * with port 2 connected, for the HN-D.
>>>> +		 * unnecessary XP events easily, and also infer the per-XP
>>>> +		 * part of the node ID format.
>>>>    		 */
>>>>    		for (int p = 0; p < CMN_MAX_PORTS; p++)
>>>>    			if (arm_cmn_device_connect_info(cmn, xp, p))
>>>>    				xp_ports |= BIT(p);
>>>> -		if (cmn->multi_dtm && (xp_ports & 0xc))
>>>> +		if (cmn->num_xps == 1) {
>>>> +			xp->portid_bits = 3;
>>>> +			xp->deviceid_bits = 2;
>>>> +		} else if (xp_ports > 0x3) {
>>>> +			xp->portid_bits = 2;
>>>> +			xp->deviceid_bits = 1;
>>>> +		} else {
>>>> +			xp->portid_bits = 1;
>>>> +			xp->deviceid_bits = 2;
>>>> +		}
>>>> +
>>>> +		if (cmn->multi_dtm && (xp_ports > 0x3))
>>>>    			arm_cmn_init_dtm(dtm++, xp, 1);
>>>> -		if (cmn->multi_dtm && (xp_ports & 0x30))
>>>> +		if (cmn->multi_dtm && (xp_ports > 0xf))
>>>>    			arm_cmn_init_dtm(dtm++, xp, 2);
>>>>    		cmn->ports_used |= xp_ports;
>>>> -- 
>>>> 2.39.2.101.g768bb238c484.dirty
>>>>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset
  2024-08-16 10:00   ` Mark Rutland
@ 2024-08-16 18:33     ` Robin Murphy
  2024-08-23 23:18       ` Ilkka Koskinen
  0 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-16 18:33 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 16/08/2024 11:00 am, Mark Rutland wrote:
> On Fri, Aug 09, 2024 at 08:15:41PM +0100, Robin Murphy wrote:
>> Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just
>> the CCLA_RNI combination type.
> 
> Was there some reason we used to think that was specific to CCLA_RNI
> nodes, or was that just an oversight?

I imagine it was just oversight/confusion helped by the original r0p0 
TRM listing both a por_ccla_pmu_event_sel and a 
por_ccla_rni_pmu_event_sel as CCLA registers, which I could well believe 
I misread at a glance while scrolling up and down.

> Looking at the CMN-700 TRM and scanning for pmu_event_sel, we have:
> 
> 	16'h2000	por_ccg_ha_pmu_event_sel
> 	16'h2000	por_ccg_ra_pmu_event_sel
> 	16'h2008	por_ccla_pmu_event_sel
> 	16'h2000	por_dn_pmu_event_sel
> 	16'h2000	cmn_hns_pmu_event_sel
> 	16'h2000	por_hni_pmu_event_sel
> 	16'h2008	por_hnp_pmu_event_sel
> 	16'h2000	por_mxp_pmu_event_sel
> 	16'h2000	por_rnd_pmu_event_sel
> 	16'h2000	por_rni_pmu_event_sel
> 	16'h2000	por_sbsx_pmu_event_sel
> 
>> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>   drivers/perf/arm-cmn.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index fd2122a37f22..0e2e12e2f4fb 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -2393,10 +2393,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>>   			case CMN_TYPE_CXHA:
>>   			case CMN_TYPE_CCRA:
>>   			case CMN_TYPE_CCHA:
>> -			case CMN_TYPE_CCLA:
>>   			case CMN_TYPE_HNS:
>>   				dn++;
>>   				break;
>> +			case CMN_TYPE_CCLA:
>> +				dn->pmu_base += CMN_HNP_PMU_EVENT_SEL;
>> +				dn++;
>> +				break;
> 
> When reading this for the first time, it looks like a copy-paste error
> since CMN_HNP_PMU_EVENT_SEL doesn't have any obvious relationship with
> CCLA nodes.
> 
> I reckon it'd be worth adding CMN_CCLA_PMU_EVENT_SEL, and replacing the
> existing comment above the definition of CMN_HNP_PMU_EVENT_SEL, e.g.
> 
> /*
>   * Some nodes place common registers at different offsets from most
>   * other nodes.
>   */
> #define CMN_HNP_PMU_EVENT_SEL		0x008
> #define CMN_CCLA_PMU_EVENT_SEL		0x008
> 
> That way the switch looks less suspicious, and the comment is a bit more
> helpful to anyone trying to figure out what's going on here.

Sure, that's a reasonable argument.

> With that:
> 
> Acked-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Robin.

> 
> Mark.
> 
>>   			/* Nothing to see here */
>>   			case CMN_TYPE_MPAM_S:
>>   			case CMN_TYPE_MPAM_NS:
>> -- 
>> 2.39.2.101.g768bb238c484.dirty
>>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
  2024-08-16 10:14   ` Mark Rutland
@ 2024-08-19 15:00     ` Robin Murphy
  2024-08-20  9:27       ` Mark Rutland
  2024-08-23 23:58       ` Ilkka Koskinen
  0 siblings, 2 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-19 15:00 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 16/08/2024 11:14 am, Mark Rutland wrote:
> On Fri, Aug 09, 2024 at 08:15:42PM +0100, Robin Murphy wrote:
>> While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
>> up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
>> out OK in practice. However CMN-700 did finally support up to 144 XPs,
>> and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
>> event on a maxed-out config. Oops.
>>
>> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>   drivers/perf/arm-cmn.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index 0e2e12e2f4fb..c9a2b21a7aec 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -563,7 +563,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
>>   
>>   struct arm_cmn_hw_event {
>>   	struct arm_cmn_node *dn;
>> -	u64 dtm_idx[4];
>> +	u64 dtm_idx[5];
> 
> Can't we size this based on CMN_MAX_DIMENSION (or CMN_MAX_XPS or
> CMN_MAX_DTMS), to make that clear?

Fair enough, I did go back and forth a little on that idea, but reached 
the opposite conclusion that documenting this with the assert to 
deliberately make it *not* look straightforward was nicer than wrestling 
with an accurate name for the logical quantity here, which strictly 
would be something like CMN_MAX_NODES_PER_TYPE_WE_CARE_ABOUT (there can 
already be up to 256 RN-Fs, but those aren't visible to the PMU).

I'll have another think on that approach - I do concur that the assert 
isn't *functionally* any better than automatically sizing the array.

Thanks,
Robin.

>  From the desciription in the commit message it sounds like you need 2 *
> CMN_MAX_XPS bits, i.e.
> 
> 	#define DTM_IDX_BITS	(2 * CMN_MAX_XPS)
> 
> 	u64 dtm_idx[DIV_ROUND_UP(DTM_IDX_BITS, 64)];
> 
> Mark.
> 
>>   	s8 dtc_idx[CMN_MAX_DTCS];
>>   	u8 num_dns;
>>   	u8 dtm_offset;
>> -- 
>> 2.39.2.101.g768bb238c484.dirty
>>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising
  2024-08-16 10:25   ` Mark Rutland
@ 2024-08-19 16:35     ` Robin Murphy
  0 siblings, 0 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-19 16:35 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 16/08/2024 11:25 am, Mark Rutland wrote:
> On Fri, Aug 09, 2024 at 08:15:44PM +0100, Robin Murphy wrote:
>> By default, CMN has automatic clock-gating with the implication that a
>> DTC's cycle counter may not increment while the domain is sufficiently
>> idle.
> 
> Similar is true for the cycles event on the CPU side, so this has some
> precedent.
> 
>> Given that we may have up to 4 DTCs to choose from when scheduling
>> a cycles event, this may potentially lead to surprising results if
>> trying to measure metrics based on activity in a different DTC domain
>> from where cycles end up being counted. Make the reasonable assumption
>> that if the user wants to count cycles, they almost certainly want to
>> count all of the cycles, and disable clock gating while a DTC's cycle
>> counter is in use.
> 
> As above, the default does match the CPU side behaviour,

It may end up looking equivalent with only a single DTC domain, but the 
effect with multiple DTCs is as if any cpu_cycles event always counted 
on CPU0 (or more recently, any random CPU) - that's the real issue here.

> and a user
> might be trying to determine how much clock gating occurs over some
> period, so it's not necessarily right to always disable clock gating.
> That might need to be an explicit option on the cycles event.

I have also been wondering whether there would be any use-case for a 
"dtc_active_cycles" event, but it would would have to be targeted at a 
specific DTC node ID or domain number to be meaningful, so would still 
be a new thing either way, but even then it seems it would be incredibly 
niche - you couldn't get an actual measure of idleness without a second 
DTC to count ungated cycles over the same period, and it could only be 
correlated with targeted single-node events, since we don't have support 
or a syntax for aggregating only within a specific DTC domain.

Yes, some of that could be achieved, but it would represent significant 
further complication to the driver and interface, in aid of something 
which to the best of my knowledge nobody's actually asking for. What 
users *have* asked me, though, is why they were seeing inconsistent 
dtc_cycles counts, hence this patch.

> Do we always have the ability to disable clock gating, or can that be
> locked down by system integration or FW?

Yes, the entire PMU may be locked down by a secure override, but 
otherwise if we do have use of the DTC then there is no limitation on 
cg_disable itself.

Thanks,
Robin.

> 
> Mark.
> 
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>   drivers/perf/arm-cmn.c | 16 +++++++++++-----
>>   1 file changed, 11 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index 8f7a1a6f8ab7..4d128db2040c 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -115,6 +115,7 @@
>>   /* The DTC node is where the magic happens */
>>   #define CMN_DT_DTC_CTL			0x0a00
>>   #define CMN_DT_DTC_CTL_DT_EN		BIT(0)
>> +#define CMN_DT_DTC_CTL_CG_DISABLE	BIT(10)
>>   
>>   /* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
>>   #define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
>> @@ -1544,9 +1545,12 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
>>   	int i;
>>   
>>   	if (type == CMN_TYPE_DTC) {
>> -		i = hw->dtc_idx[0];
>> -		writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
>> -		cmn->dtc[i].cc_active = true;
>> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
>> +
>> +		writel_relaxed(CMN_DT_DTC_CTL_DT_EN | CMN_DT_DTC_CTL_CG_DISABLE,
>> +			       dtc->base + CMN_DT_DTC_CTL);
>> +		writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
>> +		dtc->cc_active = true;
>>   	} else if (type == CMN_TYPE_WP) {
>>   		u64 val = CMN_EVENT_WP_VAL(event);
>>   		u64 mask = CMN_EVENT_WP_MASK(event);
>> @@ -1575,8 +1579,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
>>   	int i;
>>   
>>   	if (type == CMN_TYPE_DTC) {
>> -		i = hw->dtc_idx[0];
>> -		cmn->dtc[i].cc_active = false;
>> +		struct arm_cmn_dtc *dtc = cmn->dtc + hw->dtc_idx[0];
>> +
>> +		dtc->cc_active = false;
>> +		writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
>>   	} else if (type == CMN_TYPE_WP) {
>>   		for_each_hw_dn(hw, dn, i) {
>>   			void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
>> -- 
>> 2.39.2.101.g768bb238c484.dirty
>>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access
  2024-08-16 10:29   ` Mark Rutland
@ 2024-08-19 16:41     ` Robin Murphy
  2024-08-20  9:23       ` Mark Rutland
  0 siblings, 1 reply; 32+ messages in thread
From: Robin Murphy @ 2024-08-19 16:41 UTC (permalink / raw)
  To: Mark Rutland; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On 16/08/2024 11:29 am, Mark Rutland wrote:
> On Fri, Aug 09, 2024 at 08:15:45PM +0100, Robin Murphy wrote:
>> Annoyingly, we're soon going to have to cope with PMU registers moving
>> about. This will mostly be straightforward, except for the hard-coding
>> of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
>> those accessors to allow for encapsulating a variable offset without
>> making a big mess all over.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>   drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
>>   1 file changed, 36 insertions(+), 28 deletions(-)
> 
> Aside from a minoe comment below this looks fine to me.
> 
>>   struct arm_cmn_dtc {
>>   	void __iomem *base;
>> +	void __iomem *pmu_base;
>>   	int irq;
>> -	int irq_friend;
>> +	s8 irq_friend;
> 
> Unrelated change?
> 
> AFAICT there's no reason for 'irq_friend' to change from 'int' to 's8',
> and nothing in the commit message explains it.

Oops, I had meant to note in the commit message that this is a little 
structure repacking where there was a hole already, to compensate for 
adding the new member... I shall un-forget that for v2.

> Otherwise this all looks fine to me.

Thanks,
Robin.

> 
> Mark.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access
  2024-08-19 16:41     ` Robin Murphy
@ 2024-08-20  9:23       ` Mark Rutland
  2024-08-23 23:46         ` Ilkka Koskinen
  0 siblings, 1 reply; 32+ messages in thread
From: Mark Rutland @ 2024-08-20  9:23 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Mon, Aug 19, 2024 at 05:41:30PM +0100, Robin Murphy wrote:
> On 16/08/2024 11:29 am, Mark Rutland wrote:
> > On Fri, Aug 09, 2024 at 08:15:45PM +0100, Robin Murphy wrote:
> > > Annoyingly, we're soon going to have to cope with PMU registers moving
> > > about. This will mostly be straightforward, except for the hard-coding
> > > of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
> > > those accessors to allow for encapsulating a variable offset without
> > > making a big mess all over.
> > > 
> > > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > > ---
> > >   drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
> > >   1 file changed, 36 insertions(+), 28 deletions(-)
> > 
> > Aside from a minoe comment below this looks fine to me.
> > 
> > >   struct arm_cmn_dtc {
> > >   	void __iomem *base;
> > > +	void __iomem *pmu_base;
> > >   	int irq;
> > > -	int irq_friend;
> > > +	s8 irq_friend;
> > 
> > Unrelated change?
> > 
> > AFAICT there's no reason for 'irq_friend' to change from 'int' to 's8',
> > and nothing in the commit message explains it.
> 
> Oops, I had meant to note in the commit message that this is a little
> structure repacking where there was a hole already, to compensate for adding
> the new member... I shall un-forget that for v2.

Cool, with that:

Acked-by: Mark Rutland <mark.rutland@arm.com>

Mark.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
  2024-08-19 15:00     ` Robin Murphy
@ 2024-08-20  9:27       ` Mark Rutland
  2024-08-23 23:58       ` Ilkka Koskinen
  1 sibling, 0 replies; 32+ messages in thread
From: Mark Rutland @ 2024-08-20  9:27 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, linux-arm-kernel, linux-kernel, ilkka

On Mon, Aug 19, 2024 at 04:00:06PM +0100, Robin Murphy wrote:
> On 16/08/2024 11:14 am, Mark Rutland wrote:
> > On Fri, Aug 09, 2024 at 08:15:42PM +0100, Robin Murphy wrote:
> > > While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
> > > up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
> > > out OK in practice. However CMN-700 did finally support up to 144 XPs,
> > > and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
> > > event on a maxed-out config. Oops.
> > > 
> > > Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
> > > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > > ---
> > >   drivers/perf/arm-cmn.c | 2 +-
> > >   1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> > > index 0e2e12e2f4fb..c9a2b21a7aec 100644
> > > --- a/drivers/perf/arm-cmn.c
> > > +++ b/drivers/perf/arm-cmn.c
> > > @@ -563,7 +563,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
> > >   struct arm_cmn_hw_event {
> > >   	struct arm_cmn_node *dn;
> > > -	u64 dtm_idx[4];
> > > +	u64 dtm_idx[5];
> > 
> > Can't we size this based on CMN_MAX_DIMENSION (or CMN_MAX_XPS or
> > CMN_MAX_DTMS), to make that clear?
> 
> Fair enough, I did go back and forth a little on that idea, but reached the
> opposite conclusion that documenting this with the assert to deliberately
> make it *not* look straightforward was nicer than wrestling with an accurate
> name for the logical quantity here, which strictly would be something like
> CMN_MAX_NODES_PER_TYPE_WE_CARE_ABOUT (there can already be up to 256 RN-Fs,
> but those aren't visible to the PMU).
> 
> I'll have another think on that approach - I do concur that the assert isn't
> *functionally* any better than automatically sizing the array.

FWIW, I'm happy with the value being an over-estimate, and with needing
a comment. What I'm really after is that the sizing of dtm_idx is clear
at the definition of dtm_idx, without an arbitrary-looking number.

Mark.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset
  2024-08-16 18:33     ` Robin Murphy
@ 2024-08-23 23:18       ` Ilkka Koskinen
  0 siblings, 0 replies; 32+ messages in thread
From: Ilkka Koskinen @ 2024-08-23 23:18 UTC (permalink / raw)
  To: Robin Murphy; +Cc: Mark Rutland, will, linux-arm-kernel, linux-kernel, ilkka


Hi Robin,

Sorry for replying late but I just got back from my vacation


On Fri, 16 Aug 2024, Robin Murphy wrote:
> On 16/08/2024 11:00 am, Mark Rutland wrote:
>> On Fri, Aug 09, 2024 at 08:15:41PM +0100, Robin Murphy wrote:
>>> Apparently pmu_event_sel is offset by 8 for all CCLA nodes, not just
>>> the CCLA_RNI combination type.
>> 
>> Was there some reason we used to think that was specific to CCLA_RNI
>> nodes, or was that just an oversight?
>
> I imagine it was just oversight/confusion helped by the original r0p0 TRM 
> listing both a por_ccla_pmu_event_sel and a por_ccla_rni_pmu_event_sel as 
> CCLA registers, which I could well believe I misread at a glance while 
> scrolling up and down.
>
>> Looking at the CMN-700 TRM and scanning for pmu_event_sel, we have:
>>
>> 	16'h2000	por_ccg_ha_pmu_event_sel
>> 	16'h2000	por_ccg_ra_pmu_event_sel
>> 	16'h2008	por_ccla_pmu_event_sel
>> 	16'h2000	por_dn_pmu_event_sel
>> 	16'h2000	cmn_hns_pmu_event_sel
>> 	16'h2000	por_hni_pmu_event_sel
>> 	16'h2008	por_hnp_pmu_event_sel
>> 	16'h2000	por_mxp_pmu_event_sel
>> 	16'h2000	por_rnd_pmu_event_sel
>> 	16'h2000	por_rni_pmu_event_sel
>> 	16'h2000	por_sbsx_pmu_event_sel
>> 
>>> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>> ---
>>>   drivers/perf/arm-cmn.c | 5 ++++-
>>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>>> index fd2122a37f22..0e2e12e2f4fb 100644
>>> --- a/drivers/perf/arm-cmn.c
>>> +++ b/drivers/perf/arm-cmn.c
>>> @@ -2393,10 +2393,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, 
>>> unsigned int rgn_offset)
>>>   			case CMN_TYPE_CXHA:
>>>   			case CMN_TYPE_CCRA:
>>>   			case CMN_TYPE_CCHA:
>>> -			case CMN_TYPE_CCLA:
>>>   			case CMN_TYPE_HNS:
>>>   				dn++;
>>>   				break;
>>> +			case CMN_TYPE_CCLA:
>>> +				dn->pmu_base += CMN_HNP_PMU_EVENT_SEL;
>>> +				dn++;
>>> +				break;
>> 
>> When reading this for the first time, it looks like a copy-paste error
>> since CMN_HNP_PMU_EVENT_SEL doesn't have any obvious relationship with
>> CCLA nodes.
>> 
>> I reckon it'd be worth adding CMN_CCLA_PMU_EVENT_SEL, and replacing the
>> existing comment above the definition of CMN_HNP_PMU_EVENT_SEL, e.g.
>> 
>> /*
>>   * Some nodes place common registers at different offsets from most
>>   * other nodes.
>>   */
>> #define CMN_HNP_PMU_EVENT_SEL		0x008
>> #define CMN_CCLA_PMU_EVENT_SEL		0x008
>> 
>> That way the switch looks less suspicious, and the comment is a bit more
>> helpful to anyone trying to figure out what's going on here.
>
> Sure, that's a reasonable argument.
>
>> With that:
>> 
>> Acked-by: Mark Rutland <mark.rutland@arm.com>
>
> Thanks,
> Robin.


I like Mark's idea. With the change,

Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>

Cheers, Ilkka

>
>> 
>> Mark.
>>
>>>   			/* Nothing to see here */
>>>   			case CMN_TYPE_MPAM_S:
>>>   			case CMN_TYPE_MPAM_NS:
>>> -- 
>>> 2.39.2.101.g768bb238c484.dirty
>>> 
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 8/8] perf/arm-cmn: Support CMN S3
  2024-08-09 19:15 ` [PATCH 8/8] perf/arm-cmn: Support " Robin Murphy
  2024-08-16 10:32   ` Mark Rutland
@ 2024-08-23 23:38   ` Ilkka Koskinen
  1 sibling, 0 replies; 32+ messages in thread
From: Ilkka Koskinen @ 2024-08-23 23:38 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, mark.rutland, linux-arm-kernel, linux-kernel, ilkka


Hi Robin,

On Fri, 9 Aug 2024, Robin Murphy wrote:
> CMN S3 is the latest and greatest evolution for 2024, although most of
> the new features don't impact the PMU, so from our point of view it ends
> up looking a lot like CMN-700 r3 still. We have some new device types to
> ignore, a mildly irritating rearrangement of the register layouts, and a
> scary new configuration option that makes it potentially unsafe to even
> walk the full discovery tree, let alone attempt to use the PMU.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

I didn't look at CMN S3 spec but the patch looks good to me.

Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>


Cheers, Ilkka

> ---
> drivers/perf/arm-cmn.c | 117 ++++++++++++++++++++++++++---------------
> 1 file changed, 74 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index 12bbb689a1af..0d19163fba5a 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -42,24 +42,28 @@
> #define CMN_CFGM_PERIPH_ID_23		0x0010
> #define CMN_CFGM_PID2_REVISION		GENMASK_ULL(7, 4)
>
> -#define CMN_CFGM_INFO_GLOBAL		0x900
> +#define CMN_CFGM_INFO_GLOBAL		0x0900
> #define CMN_INFO_MULTIPLE_DTM_EN	BIT_ULL(63)
> #define CMN_INFO_RSP_VC_NUM		GENMASK_ULL(53, 52)
> #define CMN_INFO_DAT_VC_NUM		GENMASK_ULL(51, 50)
> +#define CMN_INFO_DEVICE_ISO_ENABLE	BIT_ULL(44)
>
> -#define CMN_CFGM_INFO_GLOBAL_1		0x908
> +#define CMN_CFGM_INFO_GLOBAL_1		0x0908
> #define CMN_INFO_SNP_VC_NUM		GENMASK_ULL(3, 2)
> #define CMN_INFO_REQ_VC_NUM		GENMASK_ULL(1, 0)
>
> /* XPs also have some local topology info which has uses too */
> #define CMN_MXP__CONNECT_INFO(p)	(0x0008 + 8 * (p))
> -#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(4, 0)
> +#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(5, 0)
>
> #define CMN_MAX_PORTS			6
> #define CI700_CONNECT_INFO_P2_5_OFFSET	0x10
>
> /* PMU registers occupy the 3rd 4KB page of each node's region */
> #define CMN_PMU_OFFSET			0x2000
> +/* ...except when they don't :( */
> +#define CMN_S3_DTM_OFFSET		0xa000
> +#define CMN_S3_PMU_OFFSET		0xd900
>
> /* For most nodes, this is all there is */
> #define CMN_PMU_EVENT_SEL		0x000
> @@ -191,10 +195,11 @@ enum cmn_model {
> 	CMN650 = 2,
> 	CMN700 = 4,
> 	CI700 = 8,
> +	CMNS3 = 16,
> 	/* ...and then we can use bitmap tricks for commonality */
> 	CMN_ANY = -1,
> 	NOT_CMN600 = -2,
> -	CMN_650ON = CMN650 | CMN700,
> +	CMN_650ON = CMN650 | CMN700 | CMNS3,
> };
>
> /* Actual part numbers and revision IDs defined by the hardware */
> @@ -203,6 +208,7 @@ enum cmn_part {
> 	PART_CMN650 = 0x436,
> 	PART_CMN700 = 0x43c,
> 	PART_CI700 = 0x43a,
> +	PART_CMN_S3 = 0x43e,
> };
>
> /* CMN-600 r0px shouldn't exist in silicon, thankfully */
> @@ -254,6 +260,7 @@ enum cmn_node_type {
> 	CMN_TYPE_HNS = 0x200,
> 	CMN_TYPE_HNS_MPAM_S,
> 	CMN_TYPE_HNS_MPAM_NS,
> +	CMN_TYPE_APB = 0x1000,
> 	/* Not a real node type */
> 	CMN_TYPE_WP = 0x7770
> };
> @@ -404,6 +411,8 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
> 		return CMN700;
> 	case PART_CI700:
> 		return CI700;
> +	case PART_CMN_S3:
> +		return CMNS3;
> 	default:
> 		return 0;
> 	};
> @@ -411,6 +420,11 @@ static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
>
> static int arm_cmn_pmu_offset(const struct arm_cmn *cmn, const struct arm_cmn_node *dn)
> {
> +	if (cmn->part == PART_CMN_S3) {
> +		if (dn->type == CMN_TYPE_XP)
> +			return CMN_S3_DTM_OFFSET;
> +		return CMN_S3_PMU_OFFSET;
> +	}
> 	return CMN_PMU_OFFSET;
> }
>
> @@ -467,6 +481,9 @@ static const char *arm_cmn_device_type(u8 type)
> 		case 0x1c: return "  MTSX  |";
> 		case 0x1d: return "  HN-V  |";
> 		case 0x1e: return "  CCG   |";
> +		case 0x20: return " RN-F_F |";
> +		case 0x21: return "RN-F_F_E|";
> +		case 0x22: return " SN-F_F |";
> 		default:   return "  ????  |";
> 	}
> }
> @@ -777,8 +794,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
> 	CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
> #define CMN_EVENT_CCRA(_name, _event)				\
> 	CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
> -#define CMN_EVENT_CCHA(_name, _event)				\
> -	CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
> +#define CMN_EVENT_CCHA(_model, _name, _event)				\
> +	CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
> #define CMN_EVENT_CCLA(_name, _event)				\
> 	CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
> #define CMN_EVENT_CCLA_RNI(_name, _event)				\
> @@ -1136,42 +1153,43 @@ static struct attribute *arm_cmn_event_attrs[] = {
> 	CMN_EVENT_CCRA(wdb_alloc,			0x59),
> 	CMN_EVENT_CCRA(ssb_alloc,			0x5a),
>
> -	CMN_EVENT_CCHA(rddatbyp,			0x61),
> -	CMN_EVENT_CCHA(chirsp_up_stall,			0x62),
> -	CMN_EVENT_CCHA(chidat_up_stall,			0x63),
> -	CMN_EVENT_CCHA(snppcrd_link0_stall,		0x64),
> -	CMN_EVENT_CCHA(snppcrd_link1_stall,		0x65),
> -	CMN_EVENT_CCHA(snppcrd_link2_stall,		0x66),
> -	CMN_EVENT_CCHA(reqtrk_occ,			0x67),
> -	CMN_EVENT_CCHA(rdb_occ,				0x68),
> -	CMN_EVENT_CCHA(rdbyp_occ,			0x69),
> -	CMN_EVENT_CCHA(wdb_occ,				0x6a),
> -	CMN_EVENT_CCHA(snptrk_occ,			0x6b),
> -	CMN_EVENT_CCHA(sdb_occ,				0x6c),
> -	CMN_EVENT_CCHA(snphaz_occ,			0x6d),
> -	CMN_EVENT_CCHA(reqtrk_alloc,			0x6e),
> -	CMN_EVENT_CCHA(rdb_alloc,			0x6f),
> -	CMN_EVENT_CCHA(rdbyp_alloc,			0x70),
> -	CMN_EVENT_CCHA(wdb_alloc,			0x71),
> -	CMN_EVENT_CCHA(snptrk_alloc,			0x72),
> -	CMN_EVENT_CCHA(sdb_alloc,			0x73),
> -	CMN_EVENT_CCHA(snphaz_alloc,			0x74),
> -	CMN_EVENT_CCHA(pb_rhu_req_occ,			0x75),
> -	CMN_EVENT_CCHA(pb_rhu_req_alloc,		0x76),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_req_occ,		0x77),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc,		0x78),
> -	CMN_EVENT_CCHA(pb_pcie_wr_req_occ,		0x79),
> -	CMN_EVENT_CCHA(pb_pcie_wr_req_alloc,		0x7a),
> -	CMN_EVENT_CCHA(pb_pcie_reg_req_occ,		0x7b),
> -	CMN_EVENT_CCHA(pb_pcie_reg_req_alloc,		0x7c),
> -	CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ,		0x7d),
> -	CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc,		0x7e),
> -	CMN_EVENT_CCHA(pb_rhu_dat_occ,			0x7f),
> -	CMN_EVENT_CCHA(pb_rhu_dat_alloc,		0x80),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ,		0x81),
> -	CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc,		0x82),
> -	CMN_EVENT_CCHA(pb_pcie_wr_dat_occ,		0x83),
> -	CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc,		0x84),
> +	CMN_EVENT_CCHA(CMN_ANY, rddatbyp,		0x61),
> +	CMN_EVENT_CCHA(CMN_ANY, chirsp_up_stall,	0x62),
> +	CMN_EVENT_CCHA(CMN_ANY, chidat_up_stall,	0x63),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link0_stall,	0x64),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link1_stall,	0x65),
> +	CMN_EVENT_CCHA(CMN_ANY, snppcrd_link2_stall,	0x66),
> +	CMN_EVENT_CCHA(CMN_ANY, reqtrk_occ,		0x67),
> +	CMN_EVENT_CCHA(CMN_ANY, rdb_occ,		0x68),
> +	CMN_EVENT_CCHA(CMN_ANY, rdbyp_occ,		0x69),
> +	CMN_EVENT_CCHA(CMN_ANY, wdb_occ,		0x6a),
> +	CMN_EVENT_CCHA(CMN_ANY, snptrk_occ,		0x6b),
> +	CMN_EVENT_CCHA(CMN_ANY, sdb_occ,		0x6c),
> +	CMN_EVENT_CCHA(CMN_ANY, snphaz_occ,		0x6d),
> +	CMN_EVENT_CCHA(CMN_ANY, reqtrk_alloc,		0x6e),
> +	CMN_EVENT_CCHA(CMN_ANY, rdb_alloc,		0x6f),
> +	CMN_EVENT_CCHA(CMN_ANY, rdbyp_alloc,		0x70),
> +	CMN_EVENT_CCHA(CMN_ANY, wdb_alloc,		0x71),
> +	CMN_EVENT_CCHA(CMN_ANY, snptrk_alloc,		0x72),
> +	CMN_EVENT_CCHA(CMN_ANY, db_alloc,		0x73),
> +	CMN_EVENT_CCHA(CMN_ANY, snphaz_alloc,		0x74),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_occ,		0x75),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_req_alloc,	0x76),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_occ,	0x77),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_req_alloc,	0x78),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_occ,	0x79),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_req_alloc,	0x7a),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_occ,	0x7b),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_reg_req_alloc,	0x7c),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_occ,	0x7d),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_rsvd_req_alloc,	0x7e),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_occ,		0x7f),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_dat_alloc,	0x80),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_occ,	0x81),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_rhu_pcie_dat_alloc,	0x82),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_occ,	0x83),
> +	CMN_EVENT_CCHA(CMN_ANY, pb_pcie_wr_dat_alloc,	0x84),
> +	CMN_EVENT_CCHA(CMNS3, chirsp1_up_stall,		0x85),
>
> 	CMN_EVENT_CCLA(rx_cxs,				0x21),
> 	CMN_EVENT_CCLA(tx_cxs,				0x22),
> @@ -1777,7 +1795,8 @@ static int arm_cmn_event_init(struct perf_event *event)
> 		/* ...but the DTM may depend on which port we're watching */
> 		if (cmn->multi_dtm)
> 			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
> -	} else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
> +	} else if (type == CMN_TYPE_XP &&
> +		   (cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
> 		hw->wide_sel = true;
> 	}
>
> @@ -2264,7 +2283,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
> 	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
> 	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
>
> +	/*
> +	 * With the device isolation feature, if firmware has neglected to enable
> +	 * an XP port then we risk locking up if we try to access anything behind
> +	 * it; however we also have no way to tell from Non-Secure whether any
> +	 * given port is disabled or not, so the only way to win is not to play...
> +	 */
> 	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
> +	if (reg & CMN_INFO_DEVICE_ISO_ENABLE) {
> +		dev_err(cmn->dev, "Device isolation enabled, not continuing due to risk of lockup\n");
> +		return -ENODEV;
> +	}
> 	cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
> 	cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
> 	cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
> @@ -2423,6 +2452,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
> 			case CMN_TYPE_CXLA:
> 			case CMN_TYPE_HNS_MPAM_S:
> 			case CMN_TYPE_HNS_MPAM_NS:
> +			case CMN_TYPE_APB:
> 				break;
> 			/*
> 			 * Split "optimised" combination nodes into separate
> @@ -2608,6 +2638,7 @@ static const struct of_device_id arm_cmn_of_match[] = {
> 	{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
> 	{ .compatible = "arm,cmn-650" },
> 	{ .compatible = "arm,cmn-700" },
> +	{ .compatible = "arm,cmn-s3" },
> 	{ .compatible = "arm,ci-700" },
> 	{}
> };
> -- 
> 2.39.2.101.g768bb238c484.dirty
>
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access
  2024-08-20  9:23       ` Mark Rutland
@ 2024-08-23 23:46         ` Ilkka Koskinen
  0 siblings, 0 replies; 32+ messages in thread
From: Ilkka Koskinen @ 2024-08-23 23:46 UTC (permalink / raw)
  To: Mark Rutland; +Cc: Robin Murphy, will, linux-arm-kernel, linux-kernel, ilkka



On Tue, 20 Aug 2024, Mark Rutland wrote:

> On Mon, Aug 19, 2024 at 05:41:30PM +0100, Robin Murphy wrote:
>> On 16/08/2024 11:29 am, Mark Rutland wrote:
>>> On Fri, Aug 09, 2024 at 08:15:45PM +0100, Robin Murphy wrote:
>>>> Annoyingly, we're soon going to have to cope with PMU registers moving
>>>> about. This will mostly be straightforward, except for the hard-coding
>>>> of CMN_PMU_OFFSET for the DTC PMU registers. As a first step, refactor
>>>> those accessors to allow for encapsulating a variable offset without
>>>> making a big mess all over.
>>>>
>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>>> ---
>>>>   drivers/perf/arm-cmn.c | 64 ++++++++++++++++++++++++------------------
>>>>   1 file changed, 36 insertions(+), 28 deletions(-)
>>>
>>> Aside from a minoe comment below this looks fine to me.
>>>
>>>>   struct arm_cmn_dtc {
>>>>   	void __iomem *base;
>>>> +	void __iomem *pmu_base;
>>>>   	int irq;
>>>> -	int irq_friend;
>>>> +	s8 irq_friend;
>>>
>>> Unrelated change?
>>>
>>> AFAICT there's no reason for 'irq_friend' to change from 'int' to 's8',
>>> and nothing in the commit message explains it.
>>
>> Oops, I had meant to note in the commit message that this is a little
>> structure repacking where there was a hole already, to compensate for adding
>> the new member... I shall un-forget that for v2.
>
> Cool, with that:
>
> Acked-by: Mark Rutland <mark.rutland@arm.com>
>
> Mark.
>

With the change, the patch looks good to me too

Reviewed-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>

Cheers, Ilkka



^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough
  2024-08-19 15:00     ` Robin Murphy
  2024-08-20  9:27       ` Mark Rutland
@ 2024-08-23 23:58       ` Ilkka Koskinen
  1 sibling, 0 replies; 32+ messages in thread
From: Ilkka Koskinen @ 2024-08-23 23:58 UTC (permalink / raw)
  To: Robin Murphy; +Cc: Mark Rutland, will, linux-arm-kernel, linux-kernel, ilkka



On Mon, 19 Aug 2024, Robin Murphy wrote:

> On 16/08/2024 11:14 am, Mark Rutland wrote:
>> On Fri, Aug 09, 2024 at 08:15:42PM +0100, Robin Murphy wrote:
>>> While CMN_MAX_DIMENSION was bumped to 12 for CMN-650, that only supports
>>> up to a 10x10 mesh, so bumping dtm_idx to 256 bits at the time worked
>>> out OK in practice. However CMN-700 did finally support up to 144 XPs,
>>> and thus needs a worst-case 288 bits of dtm_idx for an aggregated XP
>>> event on a maxed-out config. Oops.
>>> 
>>> Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support")
>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>> ---
>>>   drivers/perf/arm-cmn.c | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>> 
>>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>>> index 0e2e12e2f4fb..c9a2b21a7aec 100644
>>> --- a/drivers/perf/arm-cmn.c
>>> +++ b/drivers/perf/arm-cmn.c
>>> @@ -563,7 +563,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, 
>>> int id) {}
>>>     struct arm_cmn_hw_event {
>>>   	struct arm_cmn_node *dn;
>>> -	u64 dtm_idx[4];
>>> +	u64 dtm_idx[5];
>> 
>> Can't we size this based on CMN_MAX_DIMENSION (or CMN_MAX_XPS or
>> CMN_MAX_DTMS), to make that clear?
>
> Fair enough, I did go back and forth a little on that idea, but reached the 
> opposite conclusion that documenting this with the assert to deliberately 
> make it *not* look straightforward was nicer than wrestling with an accurate 
> name for the logical quantity here, which strictly would be something like 
> CMN_MAX_NODES_PER_TYPE_WE_CARE_ABOUT (there can already be up to 256 RN-Fs, 
> but those aren't visible to the PMU).
>
> I'll have another think on that approach - I do concur that the assert isn't 
> *functionally* any better than automatically sizing the array.

I'm ok with the patch but automatic sizing would be nice as there would be 
one less thing to verify when the mesh size is increased again.

Cheers, Ilkka

>
> Thanks,
> Robin.
>
>>  From the desciription in the commit message it sounds like you need 2 *
>> CMN_MAX_XPS bits, i.e.
>>
>> 	#define DTM_IDX_BITS	(2 * CMN_MAX_XPS)
>>
>> 	u64 dtm_idx[DIV_ROUND_UP(DTM_IDX_BITS, 64)];
>> 
>> Mark.
>>
>>>   	s8 dtc_idx[CMN_MAX_DTCS];
>>>   	u8 num_dns;
>>>   	u8 dtm_offset;
>>> -- 
>>> 2.39.2.101.g768bb238c484.dirty
>>> 
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
  2024-08-16  9:33   ` Mark Rutland
@ 2024-08-24  1:00   ` Ilkka Koskinen
  2024-08-27 13:51     ` Robin Murphy
  1 sibling, 1 reply; 32+ messages in thread
From: Ilkka Koskinen @ 2024-08-24  1:00 UTC (permalink / raw)
  To: Robin Murphy; +Cc: will, mark.rutland, linux-arm-kernel, linux-kernel, ilkka



On Fri, 9 Aug 2024, Robin Murphy wrote:
> It transpires that despite the explicit example to the contrary in the
> CMN-700 TRM, the "extra device ports" config is in fact a per-XP thing
> rather than a global one. To that end, rework node IDs yet again to
> carry around the additional data necessary to decode them properly. At
> this point the notion of fully decomposing an ID becomes more
> impractical than it's worth, so unabstracting the XY mesh coordinates
> (where 2/3 users were just debug anyway) ends up leaving things a bit
> simpler overall.
>
> Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/perf/arm-cmn.c | 94 ++++++++++++++++++------------------------
> 1 file changed, 40 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index c932d9d355cf..fd2122a37f22 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c

> @@ -357,49 +352,33 @@ struct arm_cmn {
> static int arm_cmn_hp_state;
>
> struct arm_cmn_nodeid {
> -	u8 x;
> -	u8 y;
> 	u8 port;
> 	u8 dev;
> };
>
> static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
> {
> -	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
> +	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1));
> }
>
> -static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
> +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn_node *dn)
> {
> 	struct arm_cmn_nodeid nid;
>
> -	if (cmn->num_xps == 1) {
> -		nid.x = 0;
> -		nid.y = 0;
> -		nid.port = CMN_NODEID_1x1_PID(id);
> -		nid.dev = CMN_NODEID_DEVID(id);
> -	} else {
> -		int bits = arm_cmn_xyidbits(cmn);
> -
> -		nid.x = CMN_NODEID_X(id, bits);
> -		nid.y = CMN_NODEID_Y(id, bits);
> -		if (cmn->ports_used & 0xc) {
> -			nid.port = CMN_NODEID_EXT_PID(id);
> -			nid.dev = CMN_NODEID_EXT_DEVID(id);
> -		} else {
> -			nid.port = CMN_NODEID_PID(id);
> -			nid.dev = CMN_NODEID_DEVID(id);
> -		}
> -	}
> +	nid.dev = dn->id & ((1U << dn->deviceid_bits) - 1);
> +	nid.port = (dn->id >> dn->deviceid_bits) & ((1U << dn->portid_bits) - 1);
> 	return nid;
> }
>
> static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
> 					       const struct arm_cmn_node *dn)
> {
> -	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
> -	int xp_idx = cmn->mesh_x * nid.y + nid.x;
> +	int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
> +	int bits = arm_cmn_xyidbits(cmn);
> +	int x = id > bits;
                   ^^^

Instead of comparison, shouldn't that be bit shift?

Cheers, Ilkka

> +	int y = id & ((1U << bits) - 1);
>
> -	return cmn->xps + xp_idx;
> +	return cmn->xps + cmn->mesh_x * y + x;
> }
> static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
> 					 enum cmn_node_type type)
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again.
  2024-08-24  1:00   ` Ilkka Koskinen
@ 2024-08-27 13:51     ` Robin Murphy
  0 siblings, 0 replies; 32+ messages in thread
From: Robin Murphy @ 2024-08-27 13:51 UTC (permalink / raw)
  To: Ilkka Koskinen; +Cc: will, mark.rutland, linux-arm-kernel, linux-kernel

On 24/08/2024 2:00 am, Ilkka Koskinen wrote:
[...]
>> static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
>>                            const struct arm_cmn_node *dn)
>> {
>> -    struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
>> -    int xp_idx = cmn->mesh_x * nid.y + nid.x;
>> +    int id = dn->id >> (dn->portid_bits + dn->deviceid_bits);
>> +    int bits = arm_cmn_xyidbits(cmn);
>> +    int x = id > bits;
>                    ^^^
> 
> Instead of comparison, shouldn't that be bit shift?

Oh dear indeed... and of course the effects of that would only show up 
on hardware configurations I don't have. Good catch, thanks!

Cheers,
Robin.


^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2024-08-27 13:52 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-09 19:15 [PATCH 0/8] perf/arm-cmn: Fixes and updates Robin Murphy
2024-08-09 19:15 ` [PATCH 1/8] perf/arm-cmn: Refactor node ID handling. Again Robin Murphy
2024-08-16  9:33   ` Mark Rutland
2024-08-16 11:56     ` Robin Murphy
2024-08-16 12:45       ` Mark Rutland
2024-08-16 13:21         ` Robin Murphy
2024-08-24  1:00   ` Ilkka Koskinen
2024-08-27 13:51     ` Robin Murphy
2024-08-09 19:15 ` [PATCH 2/8] perf/arm-cmn: Fix CCLA register offset Robin Murphy
2024-08-16 10:00   ` Mark Rutland
2024-08-16 18:33     ` Robin Murphy
2024-08-23 23:18       ` Ilkka Koskinen
2024-08-09 19:15 ` [PATCH 3/8] perf/arm-cmn: Ensure dtm_idx is big enough Robin Murphy
2024-08-16 10:14   ` Mark Rutland
2024-08-19 15:00     ` Robin Murphy
2024-08-20  9:27       ` Mark Rutland
2024-08-23 23:58       ` Ilkka Koskinen
2024-08-09 19:15 ` [PATCH 4/8] perf/arm-cmn: Improve build-time assertions Robin Murphy
2024-08-16 10:20   ` Mark Rutland
2024-08-09 19:15 ` [PATCH 5/8] perf/arm-cmn: Make cycle counts less surprising Robin Murphy
2024-08-16 10:25   ` Mark Rutland
2024-08-19 16:35     ` Robin Murphy
2024-08-09 19:15 ` [PATCH 6/8] perf/arm-cmn: Refactor DTC PMU register access Robin Murphy
2024-08-16 10:29   ` Mark Rutland
2024-08-19 16:41     ` Robin Murphy
2024-08-20  9:23       ` Mark Rutland
2024-08-23 23:46         ` Ilkka Koskinen
2024-08-09 19:15 ` [PATCH 7/8] dt-bindings: perf: arm-cmn: Add CMN S3 Robin Murphy
2024-08-15 15:20   ` Rob Herring (Arm)
2024-08-09 19:15 ` [PATCH 8/8] perf/arm-cmn: Support " Robin Murphy
2024-08-16 10:32   ` Mark Rutland
2024-08-23 23:38   ` Ilkka Koskinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).