Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
@ 2026-05-03 15:51 Aviv Bakal
  2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
  0 siblings, 1 reply; 7+ messages in thread
From: Aviv Bakal @ 2026-05-03 15:51 UTC (permalink / raw)
  To: robin.murphy, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, avivb, zeev,
	blakgeof

Graviton5 uses a customised CMN-S3 implementation where certain
discovery registers report zeroed fields. Add the following workarounds:

 - Introduce a dedicated ACPI HID to identify the Graviton5 CMN variant.
 - Derive the DTC domain from the XP node ID, since the unit info
   register reports it as zero.
 - Set the DTC logical ID from the XP's logical ID, since the node info
   register's logical ID field is also zeroed.

Signed-off-by: Aviv Bakal <avivb@amazon.com>
---
 drivers/perf/arm-cmn.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index f5305c8fdca4..368fe1a86bfb 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -31,7 +31,8 @@
 #define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
 #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
 
-#define CMN_MAX_DIMENSION		12
+/* Some implementations use a mesh larger than the architectural max of 12 */
+#define CMN_MAX_DIMENSION		14
 #define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
 #define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
 
@@ -214,6 +215,8 @@ enum cmn_part {
 	PART_CMN700 = 0x43c,
 	PART_CI700 = 0x43a,
 	PART_CMN_S3 = 0x43e,
+	/* Synthetic part number, overridden to PART_CMN_S3 during discovery */
+	PART_GRAVITON5 = 0xa5,
 };
 
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -2221,6 +2224,18 @@ static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_reg
 	return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
 }
 
+static unsigned int arm_cmn_graviton5_dtc_domain(u16 xp_id)
+{
+	unsigned int x = (xp_id >> 7) & 0xf;
+	unsigned int y = (xp_id >> 3) & 0xf;
+
+	/*
+	 * The unit info register reads as zero; derive the DTC domain from
+	 * the XP's mesh coordinates over the 10x14 mesh.
+	 */
+	return (x / 5) + (y / 7) * 2;
+}
+
 static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
 {
 	int level;
@@ -2266,6 +2281,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	u64 reg;
 	int i, j;
 	size_t sz;
+	bool graviton5_workaround = false;
 
 	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
 	if (cfg.type != CMN_TYPE_CFG)
@@ -2276,6 +2292,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
 	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
 	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+
+	/* Graviton5 has a customised CMN-S3 which needs some fixups */
+	if (cmn->part == PART_GRAVITON5) {
+		cmn->part = PART_CMN_S3;
+		graviton5_workaround = true;
+	}
+
 	/* 600AE is close enough that it's not really worth more complexity */
 	if (part == PART_CMN600AE)
 		part = PART_CMN600;
@@ -2365,6 +2388,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 		if (cmn->part == PART_CMN600)
 			xp->dtc = -1;
+		else if (graviton5_workaround)
+			xp->dtc = arm_cmn_graviton5_dtc_domain(xp->id);
 		else
 			xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
 
@@ -2443,6 +2468,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 			switch (dn->type) {
 			case CMN_TYPE_DTC:
+				if (graviton5_workaround) {
+					/* Node info logical ID is zeroed; use the XP's */
+					dn->logid = xp->logid;
+				}
 				cmn->num_dtcs++;
 				dn++;
 				break;
@@ -2658,6 +2687,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = {
 	{ "ARMHC650" },
 	{ "ARMHC700" },
 	{ "ARMHC003" },
+	{ "AMZN0070", PART_GRAVITON5 },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
  2026-05-03 15:51 [PATCH] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
@ 2026-05-04 13:39 ` Aviv Bakal
  2026-05-05  2:31   ` kernel test robot
                     ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Aviv Bakal @ 2026-05-04 13:39 UTC (permalink / raw)
  To: robin.murphy, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, zeev, blakgeof

Graviton5 uses a customised CMN-S3 implementation where certain
discovery registers report zeroed fields. Add the following workarounds:

 - Introduce a dedicated ACPI HID to identify the Graviton5 CMN variant.
 - Derive the DTC domain from the XP node ID, since the unit info
   register reports it as zero.
 - Set the DTC logical ID from the computed domain ID, since the node
   info register's logical ID field is also zeroed.

Signed-off-by: Aviv Bakal <avivb@amazon.com>
---
v2:
 - Use computed domain ID (xp->dtc) instead of XP logical ID for DTC
   logical ID assignment.

 drivers/perf/arm-cmn.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index f5305c8fdca4..8ee3f8638602 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -31,7 +31,8 @@
 #define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
 #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
 
-#define CMN_MAX_DIMENSION		12
+/* Some implementations use a mesh larger than the architectural max of 12 */
+#define CMN_MAX_DIMENSION		14
 #define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
 #define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
 
@@ -214,6 +215,8 @@ enum cmn_part {
 	PART_CMN700 = 0x43c,
 	PART_CI700 = 0x43a,
 	PART_CMN_S3 = 0x43e,
+	/* Synthetic part number, overridden to PART_CMN_S3 during discovery */
+	PART_GRAVITON5 = 0xa5,
 };
 
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -2221,6 +2224,18 @@ static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_reg
 	return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
 }
 
+static unsigned int arm_cmn_graviton5_dtc_domain(u16 xp_id)
+{
+	unsigned int x = (xp_id >> 7) & 0xf;
+	unsigned int y = (xp_id >> 3) & 0xf;
+
+	/*
+	 * The unit info register reads as zero; derive the DTC domain from
+	 * the XP's mesh coordinates over the 10x14 mesh.
+	 */
+	return (x / 5) + (y / 7) * 2;
+}
+
 static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
 {
 	int level;
@@ -2266,6 +2281,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	u64 reg;
 	int i, j;
 	size_t sz;
+	bool graviton5_workaround = false;
 
 	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
 	if (cfg.type != CMN_TYPE_CFG)
@@ -2276,6 +2292,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
 	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
 	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+
+	/* Graviton5 has a customised CMN-S3 which needs some fixups */
+	if (cmn->part == PART_GRAVITON5) {
+		cmn->part = PART_CMN_S3;
+		graviton5_workaround = true;
+	}
+
 	/* 600AE is close enough that it's not really worth more complexity */
 	if (part == PART_CMN600AE)
 		part = PART_CMN600;
@@ -2365,6 +2388,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 		if (cmn->part == PART_CMN600)
 			xp->dtc = -1;
+		else if (graviton5_workaround)
+			xp->dtc = arm_cmn_graviton5_dtc_domain(xp->id);
 		else
 			xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
 
@@ -2443,6 +2468,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 			switch (dn->type) {
 			case CMN_TYPE_DTC:
+				if (graviton5_workaround) {
+					/* Node info logical ID is zeroed; use the domain ID */
+					dn->logid = xp->dtc;
+				}
 				cmn->num_dtcs++;
 				dn++;
 				break;
@@ -2658,6 +2687,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = {
 	{ "ARMHC650" },
 	{ "ARMHC700" },
 	{ "ARMHC003" },
+	{ "AMZN0070", PART_GRAVITON5 },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
  2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
@ 2026-05-05  2:31   ` kernel test robot
  2026-05-21 16:02   ` Robin Murphy
  2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
  2 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2026-05-05  2:31 UTC (permalink / raw)
  To: Aviv Bakal, robin.murphy, will, mark.rutland
  Cc: llvm, oe-kbuild-all, linux-arm-kernel, linux-perf-users,
	linux-kernel, zeev, blakgeof

Hi Aviv,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm-perf/for-next/perf]
[also build test ERROR on soc/for-next linus/master v7.1-rc2 next-20260430]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Aviv-Bakal/perf-arm-cmn-Add-workarounds-for-CMN-S3-on-Graviton5/20260505-011858
base:   https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git for-next/perf
patch link:    https://lore.kernel.org/r/20260504133923.23373-1-avivb%40amazon.com
patch subject: [PATCH v2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
config: i386-buildonly-randconfig-004-20260505 (https://download.01.org/0day-ci/archive/20260505/202605051052.zOemYJY9-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260505/202605051052.zOemYJY9-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202605051052.zOemYJY9-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/perf/arm-cmn.c:617:15: error: static assertion failed due to requirement 'sizeof(struct arm_cmn_hw_event) <= __builtin_offsetof(struct hw_perf_event, target)': sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target)
     617 | static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
         | ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   drivers/perf/arm-cmn.c:617:47: note: expression evaluates to '104 <= 96'
     617 | static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
         | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   1 error generated.


vim +617 drivers/perf/arm-cmn.c

a88fa6c28b867a Robin Murphy   2021-12-03  600  
0ba64770a2f2e5 Robin Murphy   2020-09-18  601  struct arm_cmn_hw_event {
0ba64770a2f2e5 Robin Murphy   2020-09-18  602  	struct arm_cmn_node *dn;
359414b33e00ba Robin Murphy   2024-09-02  603  	u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
7633ec2c262fab Robin Murphy   2023-10-20  604  	s8 dtc_idx[CMN_MAX_DTCS];
0ba64770a2f2e5 Robin Murphy   2020-09-18  605  	u8 num_dns;
60d1504070c22c Robin Murphy   2021-12-03  606  	u8 dtm_offset;
4a112585ebe8cb Ilkka Koskinen 2024-06-17  607  
4a112585ebe8cb Ilkka Koskinen 2024-06-17  608  	/*
4a112585ebe8cb Ilkka Koskinen 2024-06-17  609  	 * WP config registers are divided to UP and DOWN events. We need to
4a112585ebe8cb Ilkka Koskinen 2024-06-17  610  	 * keep to track only one of them.
4a112585ebe8cb Ilkka Koskinen 2024-06-17  611  	 */
4a112585ebe8cb Ilkka Koskinen 2024-06-17  612  	DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
4a112585ebe8cb Ilkka Koskinen 2024-06-17  613  
23760a0144173e Robin Murphy   2022-04-18  614  	bool wide_sel;
65adf71398f5af Robin Murphy   2022-04-18  615  	enum cmn_filter_select filter_sel;
0ba64770a2f2e5 Robin Murphy   2020-09-18  616  };
ff436cee694ee8 Robin Murphy   2024-09-02 @617  static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
0ba64770a2f2e5 Robin Murphy   2020-09-18  618  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
  2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
  2026-05-05  2:31   ` kernel test robot
@ 2026-05-21 16:02   ` Robin Murphy
  2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
  2 siblings, 0 replies; 7+ messages in thread
From: Robin Murphy @ 2026-05-21 16:02 UTC (permalink / raw)
  To: Aviv Bakal, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, zeev, blakgeof

On 2026-05-04 2:39 pm, Aviv Bakal wrote:
> Graviton5 uses a customised CMN-S3 implementation where certain
> discovery registers report zeroed fields. Add the following workarounds:
> 
>   - Introduce a dedicated ACPI HID to identify the Graviton5 CMN variant.
>   - Derive the DTC domain from the XP node ID, since the unit info
>     register reports it as zero.
>   - Set the DTC logical ID from the computed domain ID, since the node
>     info register's logical ID field is also zeroed.
> 
> Signed-off-by: Aviv Bakal <avivb@amazon.com>
> ---
> v2:
>   - Use computed domain ID (xp->dtc) instead of XP logical ID for DTC
>     logical ID assignment.
> 
>   drivers/perf/arm-cmn.c | 32 +++++++++++++++++++++++++++++++-
>   1 file changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index f5305c8fdca4..8ee3f8638602 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -31,7 +31,8 @@
>   #define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
>   #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
>   
> -#define CMN_MAX_DIMENSION		12
> +/* Some implementations use a mesh larger than the architectural max of 12 */
> +#define CMN_MAX_DIMENSION		14
>   #define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
>   #define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
>   
> @@ -214,6 +215,8 @@ enum cmn_part {
>   	PART_CMN700 = 0x43c,
>   	PART_CI700 = 0x43a,
>   	PART_CMN_S3 = 0x43e,
> +	/* Synthetic part number, overridden to PART_CMN_S3 during discovery */
> +	PART_GRAVITON5 = 0xa5,
>   };
>   
>   /* CMN-600 r0px shouldn't exist in silicon, thankfully */
> @@ -2221,6 +2224,18 @@ static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_reg
>   	return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
>   }
>   
> +static unsigned int arm_cmn_graviton5_dtc_domain(u16 xp_id)
> +{
> +	unsigned int x = (xp_id >> 7) & 0xf;
> +	unsigned int y = (xp_id >> 3) & 0xf;
> +
> +	/*
> +	 * The unit info register reads as zero; derive the DTC domain from
> +	 * the XP's mesh coordinates over the 10x14 mesh.
> +	 */
> +	return (x / 5) + (y / 7) * 2;
> +}
> +
>   static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
>   {
>   	int level;
> @@ -2266,6 +2281,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>   	u64 reg;
>   	int i, j;
>   	size_t sz;
> +	bool graviton5_workaround = false;
>   
>   	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
>   	if (cfg.type != CMN_TYPE_CFG)
> @@ -2276,6 +2292,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>   	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
>   	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
>   	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
> +
> +	/* Graviton5 has a customised CMN-S3 which needs some fixups */
> +	if (cmn->part == PART_GRAVITON5) {
> +		cmn->part = PART_CMN_S3;
> +		graviton5_workaround = true;
> +	}
> +
>   	/* 600AE is close enough that it's not really worth more complexity */
>   	if (part == PART_CMN600AE)
>   		part = PART_CMN600;
> @@ -2365,6 +2388,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>   
>   		if (cmn->part == PART_CMN600)
>   			xp->dtc = -1;
> +		else if (graviton5_workaround)
> +			xp->dtc = arm_cmn_graviton5_dtc_domain(xp->id);
>   		else
>   			xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
>   
> @@ -2443,6 +2468,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
>   
>   			switch (dn->type) {
>   			case CMN_TYPE_DTC:
> +				if (graviton5_workaround) {
> +					/* Node info logical ID is zeroed; use the domain ID */
> +					dn->logid = xp->dtc;

No, this really should be xp->logid - other than DTC0 whose logical ID 
is always forced to 0, but if it naturally lines up that way anyway then 
all the better - since that is consistent with how the tooling generates 
a regular hardware configuration. The cmn->dtc array ends up sorted by 
logical ID as that is a guaranteed stable order for all CMN versions (as 
the domain numbers themselves aren't always known), and it is the 
interrupt order defined by the firmware bindings which we need to match. 
If it's not guaranteed that the actual domain numbers are in the same 
order then we have an existing bug in general (I'll have to check...)

Otherwise, this looks OK to me - in fact surprisingly pleasant and 
unobtrusive given that it's a pretty horrible hardware issue to work 
around. I guess we get lucky that it's an easy topology to compute.

Thanks,
Robin.

> +				}
>   				cmn->num_dtcs++;
>   				dn++;
>   				break;
> @@ -2658,6 +2687,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = {
>   	{ "ARMHC650" },
>   	{ "ARMHC700" },
>   	{ "ARMHC003" },
> +	{ "AMZN0070", PART_GRAVITON5 },
>   	{}
>   };
>   MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 0/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
  2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
  2026-05-05  2:31   ` kernel test robot
  2026-05-21 16:02   ` Robin Murphy
@ 2026-05-24 15:38   ` Aviv Bakal
  2026-05-24 15:38     ` [PATCH v3 1/2] perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event Aviv Bakal
  2026-05-24 15:38     ` [PATCH v3 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
  2 siblings, 2 replies; 7+ messages in thread
From: Aviv Bakal @ 2026-05-24 15:38 UTC (permalink / raw)
  To: robin.murphy, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, avivb, zeev,
	blakgeof

This series adds support for Graviton5's customised CMN-S3 which has
zeroed discovery registers.

Robin, I understand moving driver state into the core perf header isn't
ideal, but I couldn't find another way to grow the struct. The v2
submission failed the kernel test robot build on i386 (COMPILE_TEST) due
to arm_cmn_hw_event exceeding the static_assert against the 'target'
field offset when CMN_MAX_DIMENSION is increased beyond 12.

Patch 1 moves struct arm_cmn_hw_event into the hw_perf_event union to
resolve this. I'd appreciate your feedback on this approach, or any
alternative you'd suggest.

Patch 2 adds the Graviton5 workarounds themselves (unchanged from v2
except for the DTC logid fix below).

Changes since v2:
 - Revert DTC logical ID assignment back to xp->logid (per Robin's
   review)
 - Add patch 1/2 to move arm_cmn_hw_event into hw_perf_event union
   to resolve 32-bit build failure

Aviv Bakal (2):
  perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event
  perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5

 drivers/perf/arm-cmn.c     | 55 +++++++++++++++++++++-----------------
 include/linux/perf_event.h | 22 +++++++++++++++
 2 files changed, 52 insertions(+), 25 deletions(-)

-- 
2.47.3



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 1/2] perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event
  2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
@ 2026-05-24 15:38     ` Aviv Bakal
  2026-05-24 15:38     ` [PATCH v3 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
  1 sibling, 0 replies; 7+ messages in thread
From: Aviv Bakal @ 2026-05-24 15:38 UTC (permalink / raw)
  To: robin.murphy, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, avivb, zeev,
	blakgeof

In order to increase CMN_MAX_DIMENSION beyond 12 (required for meshes
larger than 12x12, such as Graviton5), the arm_cmn_hw_event struct must
grow. Since it is overlaid on the beginning of hw_perf_event via an
unsafe cast, increasing its size would violate the static_assert that
guards against overflowing into the 'target' field.

Resolve this by moving struct arm_cmn_hw_event into the hw_perf_event
union as a proper named member, eliminating the cast in to_cmn_hw() and
making the size reservation explicit. Set CMN_MAX_DIMENSION to 14 to
accommodate larger mesh topologies.

Signed-off-by: Aviv Bakal <avivb@amazon.com>
---
 drivers/perf/arm-cmn.c     | 26 +-------------------------
 include/linux/perf_event.h | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index f5305c8fdca4..3443b819afed 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -31,13 +31,8 @@
 #define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
 #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
 
-#define CMN_MAX_DIMENSION		12
-#define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
 #define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
 
-/* Currently XPs are the node type we can have most of; others top out at 128 */
-#define CMN_MAX_NODES_PER_EVENT		CMN_MAX_XPS
-
 /* The CFG node has various info besides the discovery tree */
 #define CMN_CFGM_PERIPH_ID_01		0x0008
 #define CMN_CFGM_PID0_PART_0		GENMASK_ULL(7, 0)
@@ -148,7 +143,6 @@
 #define CMN_DT_PMSRR_SS_REQ		BIT(0)
 
 #define CMN_DT_NUM_COUNTERS		8
-#define CMN_MAX_DTCS			4
 
 /*
  * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles,
@@ -595,24 +589,6 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id)
 static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
 #endif
 
-struct arm_cmn_hw_event {
-	struct arm_cmn_node *dn;
-	u64 dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
-	s8 dtc_idx[CMN_MAX_DTCS];
-	u8 num_dns;
-	u8 dtm_offset;
-
-	/*
-	 * WP config registers are divided to UP and DOWN events. We need to
-	 * keep to track only one of them.
-	 */
-	DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
-
-	bool wide_sel;
-	enum cmn_filter_select filter_sel;
-};
-static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event, target));
-
 #define for_each_hw_dn(hw, dn, i) \
 	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
 
@@ -622,7 +598,7 @@ static_assert(sizeof(struct arm_cmn_hw_event) <= offsetof(struct hw_perf_event,
 
 static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
 {
-	return (struct arm_cmn_hw_event *)&event->hw;
+	return &event->hw.cmn;
 }
 
 static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 48d851fbd8ea..c38576a8e338 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -119,6 +119,7 @@ struct perf_branch_stack {
 };
 
 struct task_struct;
+struct arm_cmn_node;
 
 /*
  * extra PMU register associated with an event
@@ -200,6 +201,27 @@ struct hw_perf_event {
 			u64	conf;
 			u64	conf1;
 		};
+#ifdef CONFIG_ARM_CMN
+/* Some implementations use a mesh larger than the architectural max of 12 */
+#define CMN_MAX_DIMENSION		14
+#define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
+#define CMN_MAX_NODES_PER_EVENT		CMN_MAX_XPS
+#define CMN_MAX_DTCS			4
+		struct arm_cmn_hw_event { /* arm_cmn */
+			/*
+			 * CMN PMU event state overlaid on hw_perf_event.
+			 * Must fit before the 'target' field.
+			 */
+			struct arm_cmn_node	*dn;
+			u64			dtm_idx[DIV_ROUND_UP(CMN_MAX_NODES_PER_EVENT * 2, 64)];
+			s8			dtc_idx[CMN_MAX_DTCS];
+			u8			num_dns;
+			u8			dtm_offset;
+			DECLARE_BITMAP(wp_idx, CMN_MAX_XPS);
+			bool			wide_sel;
+			int			filter_sel;
+		} cmn;
+#endif
 	};
 	/*
 	 * If the event is a per task event, this will point to the task in
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5
  2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
  2026-05-24 15:38     ` [PATCH v3 1/2] perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event Aviv Bakal
@ 2026-05-24 15:38     ` Aviv Bakal
  1 sibling, 0 replies; 7+ messages in thread
From: Aviv Bakal @ 2026-05-24 15:38 UTC (permalink / raw)
  To: robin.murphy, will, mark.rutland
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel, avivb, zeev,
	blakgeof

Graviton5 uses a customised CMN-S3 implementation where certain
discovery registers report zeroed fields. Add the following workarounds:

 - Introduce a dedicated ACPI HID to identify the Graviton5 CMN variant.
 - Derive the DTC domain from the XP node ID, since the unit info
   register reports it as zero.
 - Set the DTC logical ID from the XP's logical ID, since the node info
   register's logical ID field is also zeroed.

Signed-off-by: Aviv Bakal <avivb@amazon.com>
---
 drivers/perf/arm-cmn.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 3443b819afed..0184e598777a 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -208,6 +208,8 @@ enum cmn_part {
 	PART_CMN700 = 0x43c,
 	PART_CI700 = 0x43a,
 	PART_CMN_S3 = 0x43e,
+	/* Synthetic part number, overridden to PART_CMN_S3 during discovery */
+	PART_GRAVITON5 = 0xa5,
 };
 
 /* CMN-600 r0px shouldn't exist in silicon, thankfully */
@@ -2197,6 +2199,18 @@ static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_reg
 	return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
 }
 
+static unsigned int arm_cmn_graviton5_dtc_domain(u16 xp_id)
+{
+	unsigned int x = (xp_id >> 7) & 0xf;
+	unsigned int y = (xp_id >> 3) & 0xf;
+
+	/*
+	 * The unit info register reads as zero; derive the DTC domain from
+	 * the XP's mesh coordinates over the 10x14 mesh.
+	 */
+	return (x / 5) + (y / 7) * 2;
+}
+
 static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
 {
 	int level;
@@ -2242,6 +2256,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	u64 reg;
 	int i, j;
 	size_t sz;
+	bool graviton5_workaround = false;
 
 	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
 	if (cfg.type != CMN_TYPE_CFG)
@@ -2252,6 +2267,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
 	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
 	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+
+	/* Graviton5 has a customised CMN-S3 which needs some fixups */
+	if (cmn->part == PART_GRAVITON5) {
+		cmn->part = PART_CMN_S3;
+		graviton5_workaround = true;
+	}
+
 	/* 600AE is close enough that it's not really worth more complexity */
 	if (part == PART_CMN600AE)
 		part = PART_CMN600;
@@ -2341,6 +2363,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 		if (cmn->part == PART_CMN600)
 			xp->dtc = -1;
+		else if (graviton5_workaround)
+			xp->dtc = arm_cmn_graviton5_dtc_domain(xp->id);
 		else
 			xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
 
@@ -2419,6 +2443,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 
 			switch (dn->type) {
 			case CMN_TYPE_DTC:
+				if (graviton5_workaround) {
+					/* Node info logical ID is zeroed; use the XP's */
+					dn->logid = xp->logid;
+				}
 				cmn->num_dtcs++;
 				dn++;
 				break;
@@ -2634,6 +2662,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = {
 	{ "ARMHC650" },
 	{ "ARMHC700" },
 	{ "ARMHC003" },
+	{ "AMZN0070", PART_GRAVITON5 },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-05-24 15:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-03 15:51 [PATCH] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal
2026-05-04 13:39 ` [PATCH v2] " Aviv Bakal
2026-05-05  2:31   ` kernel test robot
2026-05-21 16:02   ` Robin Murphy
2026-05-24 15:38   ` [PATCH v3 0/2] " Aviv Bakal
2026-05-24 15:38     ` [PATCH v3 1/2] perf/arm-cmn: Move struct arm_cmn_hw_event into struct hw_perf_event Aviv Bakal
2026-05-24 15:38     ` [PATCH v3 2/2] perf/arm-cmn: Add workarounds for CMN-S3 on Graviton5 Aviv Bakal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox