Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] arm64: dts: imx93-9x9-qsb: Add tianma,tm050rdh03 panel
From: Liu Ying @ 2026-04-07  9:15 UTC (permalink / raw)
  To: Frank Li, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley
  Cc: imx, linux-arm-kernel, devicetree, linux-kernel, Liu Ying

Support tianma,tm050rdh03 DPI panel on i.MX93 9x9 QSB.

The panel connects with the QSB board through an adapter board[1]
designed by NXP.

Link: https://www.nxp.com/design/design-center/development-boards-and-designs/parallel-lcd-display:TM050RDH03-41 [1]
Signed-off-by: Liu Ying <victor.liu@nxp.com>
---
 arch/arm64/boot/dts/freescale/Makefile             |   2 +
 .../imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi       | 110 +++++++++++++++++++++
 .../imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso       | 106 +-------------------
 .../freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso |  14 +++
 4 files changed, 127 insertions(+), 105 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile
index 711e36cc2c99..6315fb8390ff 100644
--- a/arch/arm64/boot/dts/freescale/Makefile
+++ b/arch/arm64/boot/dts/freescale/Makefile
@@ -455,9 +455,11 @@ dtb-$(CONFIG_ARCH_MXC) += imx93-9x9-qsb.dtb
 imx93-9x9-qsb-can1-dtbs += imx93-9x9-qsb.dtb imx93-9x9-qsb-can1.dtbo
 imx93-9x9-qsb-i3c-dtbs += imx93-9x9-qsb.dtb imx93-9x9-qsb-i3c.dtbo
 imx93-9x9-qsb-ontat-kd50g21-40nt-a1-dtbs += imx93-9x9-qsb.dtb imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtbo
+imx93-9x9-qsb-tianma-tm050rdh03-dtbs += imx93-9x9-qsb.dtb imx93-9x9-qsb-tianma-tm050rdh03.dtbo
 dtb-$(CONFIG_ARCH_MXC) += imx93-9x9-qsb-can1.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx93-9x9-qsb-i3c.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtb
+dtb-$(CONFIG_ARCH_MXC) += imx93-9x9-qsb-tianma-tm050rdh03.dtb
 
 dtb-$(CONFIG_ARCH_MXC) += imx93-11x11-evk.dtb
 dtb-$(CONFIG_ARCH_MXC) += imx93-11x11-frdm.dtb
diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi
new file mode 100644
index 000000000000..d167c9fc3b8f
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2026 NXP
+ */
+
+/dts-v1/;
+/plugin/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include "imx93-pinfunc.h"
+
+&{/} {
+	backlight: backlight {
+		compatible = "gpio-backlight";
+		gpios = <&pcal6524 2 GPIO_ACTIVE_HIGH>;
+	};
+
+	panel {
+		compatible = "ontat,kd50g21-40nt-a1";
+		backlight = <&backlight>;
+		power-supply = <&reg_rpi_3v3>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&dpi_to_panel>;
+			};
+		};
+	};
+};
+
+&dpi_bridge {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lcdif>;
+	status = "okay";
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@1 {
+			reg = <1>;
+
+			dpi_to_panel: endpoint {
+				remote-endpoint = <&panel_in>;
+				bus-width = <18>;
+			};
+		};
+	};
+};
+
+&iomuxc {
+	pinctrl_lcdif: lcdifgrp {
+		fsl,pins = <
+			MX93_PAD_GPIO_IO00__MEDIAMIX_DISP_CLK		0x31e
+			MX93_PAD_GPIO_IO01__MEDIAMIX_DISP_DE		0x31e
+			MX93_PAD_GPIO_IO02__MEDIAMIX_DISP_VSYNC		0x31e
+			MX93_PAD_GPIO_IO03__MEDIAMIX_DISP_HSYNC		0x31e
+			MX93_PAD_GPIO_IO04__MEDIAMIX_DISP_DATA00	0x31e
+			MX93_PAD_GPIO_IO05__MEDIAMIX_DISP_DATA01	0x31e
+			MX93_PAD_GPIO_IO06__MEDIAMIX_DISP_DATA02	0x31e
+			MX93_PAD_GPIO_IO07__MEDIAMIX_DISP_DATA03	0x31e
+			MX93_PAD_GPIO_IO08__MEDIAMIX_DISP_DATA04	0x31e
+			MX93_PAD_GPIO_IO09__MEDIAMIX_DISP_DATA05	0x31e
+			MX93_PAD_GPIO_IO10__MEDIAMIX_DISP_DATA06	0x31e
+			MX93_PAD_GPIO_IO11__MEDIAMIX_DISP_DATA07	0x31e
+			MX93_PAD_GPIO_IO12__MEDIAMIX_DISP_DATA08	0x31e
+			MX93_PAD_GPIO_IO13__MEDIAMIX_DISP_DATA09	0x31e
+			MX93_PAD_GPIO_IO14__MEDIAMIX_DISP_DATA10	0x31e
+			MX93_PAD_GPIO_IO15__MEDIAMIX_DISP_DATA11	0x31e
+			MX93_PAD_GPIO_IO16__MEDIAMIX_DISP_DATA12	0x31e
+			MX93_PAD_GPIO_IO17__MEDIAMIX_DISP_DATA13	0x31e
+			MX93_PAD_GPIO_IO18__MEDIAMIX_DISP_DATA14	0x31e
+			MX93_PAD_GPIO_IO19__MEDIAMIX_DISP_DATA15	0x31e
+			MX93_PAD_GPIO_IO20__MEDIAMIX_DISP_DATA16	0x31e
+			MX93_PAD_GPIO_IO21__MEDIAMIX_DISP_DATA17	0x31e
+		>;
+	};
+};
+
+&lcdif {
+	status = "okay";
+};
+
+&media_blk_ctrl {
+	status = "okay";
+};
+
+&pcal6524 {
+	/*
+	 * exp-sel-hog has property 'output-low' while DT overlay doesn't
+	 * support /delete-property/. Both 'output-low' and 'output-high'
+	 * will exist under hog nodes if DT overlay file sets 'output-high'.
+	 * Workaround is to disable this hog and create new hog with
+	 * 'output-high'.
+	 */
+	exp-sel-hog {
+		status = "disabled";
+	};
+
+	exp-high-sel-hog {
+		gpio-hog;
+		gpios = <22 GPIO_ACTIVE_HIGH>;
+		output-high;
+	};
+};
+
+&sai3 {
+	/* disable due to GPIO12 and GPIO17~20 pin conflicts with LCDIF */
+	status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso
index d167c9fc3b8f..356533a7b513 100644
--- a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso
+++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso
@@ -3,108 +3,4 @@
  * Copyright 2026 NXP
  */
 
-/dts-v1/;
-/plugin/;
-
-#include <dt-bindings/gpio/gpio.h>
-#include "imx93-pinfunc.h"
-
-&{/} {
-	backlight: backlight {
-		compatible = "gpio-backlight";
-		gpios = <&pcal6524 2 GPIO_ACTIVE_HIGH>;
-	};
-
-	panel {
-		compatible = "ontat,kd50g21-40nt-a1";
-		backlight = <&backlight>;
-		power-supply = <&reg_rpi_3v3>;
-
-		port {
-			panel_in: endpoint {
-				remote-endpoint = <&dpi_to_panel>;
-			};
-		};
-	};
-};
-
-&dpi_bridge {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_lcdif>;
-	status = "okay";
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@1 {
-			reg = <1>;
-
-			dpi_to_panel: endpoint {
-				remote-endpoint = <&panel_in>;
-				bus-width = <18>;
-			};
-		};
-	};
-};
-
-&iomuxc {
-	pinctrl_lcdif: lcdifgrp {
-		fsl,pins = <
-			MX93_PAD_GPIO_IO00__MEDIAMIX_DISP_CLK		0x31e
-			MX93_PAD_GPIO_IO01__MEDIAMIX_DISP_DE		0x31e
-			MX93_PAD_GPIO_IO02__MEDIAMIX_DISP_VSYNC		0x31e
-			MX93_PAD_GPIO_IO03__MEDIAMIX_DISP_HSYNC		0x31e
-			MX93_PAD_GPIO_IO04__MEDIAMIX_DISP_DATA00	0x31e
-			MX93_PAD_GPIO_IO05__MEDIAMIX_DISP_DATA01	0x31e
-			MX93_PAD_GPIO_IO06__MEDIAMIX_DISP_DATA02	0x31e
-			MX93_PAD_GPIO_IO07__MEDIAMIX_DISP_DATA03	0x31e
-			MX93_PAD_GPIO_IO08__MEDIAMIX_DISP_DATA04	0x31e
-			MX93_PAD_GPIO_IO09__MEDIAMIX_DISP_DATA05	0x31e
-			MX93_PAD_GPIO_IO10__MEDIAMIX_DISP_DATA06	0x31e
-			MX93_PAD_GPIO_IO11__MEDIAMIX_DISP_DATA07	0x31e
-			MX93_PAD_GPIO_IO12__MEDIAMIX_DISP_DATA08	0x31e
-			MX93_PAD_GPIO_IO13__MEDIAMIX_DISP_DATA09	0x31e
-			MX93_PAD_GPIO_IO14__MEDIAMIX_DISP_DATA10	0x31e
-			MX93_PAD_GPIO_IO15__MEDIAMIX_DISP_DATA11	0x31e
-			MX93_PAD_GPIO_IO16__MEDIAMIX_DISP_DATA12	0x31e
-			MX93_PAD_GPIO_IO17__MEDIAMIX_DISP_DATA13	0x31e
-			MX93_PAD_GPIO_IO18__MEDIAMIX_DISP_DATA14	0x31e
-			MX93_PAD_GPIO_IO19__MEDIAMIX_DISP_DATA15	0x31e
-			MX93_PAD_GPIO_IO20__MEDIAMIX_DISP_DATA16	0x31e
-			MX93_PAD_GPIO_IO21__MEDIAMIX_DISP_DATA17	0x31e
-		>;
-	};
-};
-
-&lcdif {
-	status = "okay";
-};
-
-&media_blk_ctrl {
-	status = "okay";
-};
-
-&pcal6524 {
-	/*
-	 * exp-sel-hog has property 'output-low' while DT overlay doesn't
-	 * support /delete-property/. Both 'output-low' and 'output-high'
-	 * will exist under hog nodes if DT overlay file sets 'output-high'.
-	 * Workaround is to disable this hog and create new hog with
-	 * 'output-high'.
-	 */
-	exp-sel-hog {
-		status = "disabled";
-	};
-
-	exp-high-sel-hog {
-		gpio-hog;
-		gpios = <22 GPIO_ACTIVE_HIGH>;
-		output-high;
-	};
-};
-
-&sai3 {
-	/* disable due to GPIO12 and GPIO17~20 pin conflicts with LCDIF */
-	status = "disabled";
-};
+#include "imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi"
diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso
new file mode 100644
index 000000000000..c233797ec28c
--- /dev/null
+++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright 2026 NXP
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include "imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi"
+
+&{/} {
+	panel {
+		compatible = "tianma,tm050rdh03";
+		enable-gpios = <&pcal6524 8 GPIO_ACTIVE_HIGH>;
+	};
+};

---
base-commit: 816f193dd0d95246f208590924dd962b192def78
change-id: 20260407-tianma-tm050rdh03-imx93-9x9-qsb-6e4bbbde3d08

Best regards,
-- 
Liu Ying <victor.liu@nxp.com>



^ permalink raw reply related

* RE: [PATCH V10 03/13] PCI: dwc: Parse Root Port nodes in dw_pcie_host_init()
From: Sherry Sun @ 2026-04-07  9:18 UTC (permalink / raw)
  To: Manivannan Sadhasivam
  Cc: robh@kernel.org, krzk+dt@kernel.org, conor+dt@kernel.org,
	Frank Li, s.hauer@pengutronix.de, kernel@pengutronix.de,
	festevam@gmail.com, lpieralisi@kernel.org, kwilczynski@kernel.org,
	bhelgaas@google.com, Hongxing Zhu, l.stach@pengutronix.de,
	imx@lists.linux.dev, linux-pci@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <gxqcmujdzlzcoawn4rkttasftuyusqtvycu7oagogxaw4yggeo@ww6rjdwbyj2w>

> On Tue, Apr 07, 2026 at 03:21:30AM +0000, Sherry Sun wrote:
> > > On Thu, Apr 02, 2026 at 05:50:57PM +0800, Sherry Sun wrote:
> > > > Add support for parsing Root Port child nodes in
> > > > dw_pcie_host_init() using pci_host_common_parse_ports(). This
> > > > allows DWC-based drivers to specify Root Port properties (like
> > > > reset GPIOs) in individual Root Port nodes rather than in the host bridge
> node.
> > > >
> > > > Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
> > > > ---
> > > >  drivers/pci/controller/dwc/pcie-designware-host.c | 8 ++++++++
> > > >  1 file changed, 8 insertions(+)
> > > >
> > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > index da152c31bb2e..f6fca984fb34 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > @@ -20,6 +20,7 @@
> > > >  #include <linux/platform_device.h>
> > > >
> > > >  #include "../../pci.h"
> > > > +#include "../pci-host-common.h"
> > > >  #include "pcie-designware.h"
> > > >
> > > >  static struct pci_ops dw_pcie_ops; @@ -581,6 +582,13 @@ int
> > > > dw_pcie_host_init(struct dw_pcie_rp *pp)
> > > >
> > > >         pp->bridge = bridge;
> > > >
> > > > +       /* Parse Root Port nodes if present */
> > > > +       ret = pci_host_common_parse_ports(dev, bridge);
> > > > +       if (ret && ret != -ENOENT) {
> > > > +               dev_err(dev, "Failed to parse Root Port nodes: %d\n", ret);
> > > > +               return ret;
> > >
> > > Won't this change break drivers that parse Root Ports on their own?
> > > Either you need to modify them also in this change or call this API
> > > from imx6 driver and let other drivers switch to it in a phased manner.
> > >
> > > I perfer the latter.
> >
> > Hi Mani, sorry I didn't fully get your point here, there are no
> > changes to this part V10, for drivers that parse Root Ports on their
> > own, here pci_host_common_parse_ports() will return -ENOENT, so
> > nothing break as we discussed this in V8
> https://lore.ke/
> rnel.org%2Fall%2Fdcl3bdljrdzgeaybrg3dc5uaxkebkjns7pajix6mxxftao5g4m%40
> vm3ywyyp4ujh%2F&data=05%7C02%7Csherry.sun%40nxp.com%7Cd9faef64b
> 8154bdbc6ee08de94724b22%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%
> 7C0%7C639111415791802118%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1
> hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIl
> dUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=POsurqr9RqBCnaQyeXDK2HQTN
> a4Nc0tfl7thSiM9qHA%3D&reserved=0.
> >
>
> So if this API gets called first, it will acquire PERST# from the Root Port node
> and if the controller drivers try to do the same in their own parsing code,
> PERST# request will return -EBUSY and the probe will fail.
>
> On the other hand, if the controller drivers parse PERST# first, this API will
> return -EBUSY and will result in probe failure.
>
> Only way to fix this issue would be to call this API from imx6 driver for now
> and start migrating other drivers later.
>

Ok, get your point here. Your assumption is based on the premise that the controller
driver parse the reset-gpios in the Root Port node, not that most controller drivers
now use reset under the host bridge node. For reset-gpios in the Root Port node,
they should eventually switch to this common API.

Anyway, I will call this API in imx6 driver at this stage to avoid impact other platforms.

Best Regards
Sherry


^ permalink raw reply

* Re: [PATCH v2 1/3] arm64: mm: Fix rodata=full block mapping support for realm guests
From: Catalin Marinas @ 2026-04-07  9:19 UTC (permalink / raw)
  To: Ryan Roberts
  Cc: Will Deacon, David Hildenbrand (Arm), Dev Jain, Yang Shi,
	Suzuki K Poulose, Jinjiang Tu, Kevin Brodsky, linux-arm-kernel,
	linux-kernel, stable
In-Reply-To: <160ec79a-f842-421a-bfde-5b4da32b3b4e@arm.com>

On Tue, Apr 07, 2026 at 09:33:50AM +0100, Ryan Roberts wrote:
> On 02/04/2026 21:43, Catalin Marinas wrote:
> > On Mon, Mar 30, 2026 at 05:17:02PM +0100, Ryan Roberts wrote:
> >>  int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
> >>  {
> >>  	int ret;
> >>  
> >> -	/*
> >> -	 * !BBML2_NOABORT systems should not be trying to change permissions on
> >> -	 * anything that is not pte-mapped in the first place. Just return early
> >> -	 * and let the permission change code raise a warning if not already
> >> -	 * pte-mapped.
> >> -	 */
> >> -	if (!system_supports_bbml2_noabort())
> >> -		return 0;
> >> -
> >>  	/*
> >>  	 * If the region is within a pte-mapped area, there is no need to try to
> >>  	 * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
> >>  	 * change permissions from atomic context so for those cases (which are
> >>  	 * always pte-mapped), we must not go any further because taking the
> >> -	 * mutex below may sleep.
> >> +	 * mutex below may sleep. Do not call force_pte_mapping() here because
> >> +	 * it could return a confusing result if called from a secondary cpu
> >> +	 * prior to finalizing caps. Instead, linear_map_requires_bbml2 gives us
> >> +	 * what we need.
> >>  	 */
> >> -	if (force_pte_mapping() || is_kfence_address((void *)start))
> >> +	if (!linear_map_requires_bbml2 || is_kfence_address((void *)start))
> >>  		return 0;
> >>  
> >> +	if (!system_supports_bbml2_noabort()) {
> >> +		/*
> >> +		 * !BBML2_NOABORT systems should not be trying to change
> >> +		 * permissions on anything that is not pte-mapped in the first
> >> +		 * place. Just return early and let the permission change code
> >> +		 * raise a warning if not already pte-mapped.
> >> +		 */
> >> +		if (system_capabilities_finalized())
> >> +			return 0;
> >> +
> >> +		/*
> >> +		 * Boot-time: split_kernel_leaf_mapping_locked() allocates from
> >> +		 * page allocator. Can't split until it's available.
> >> +		 */
> >> +		if (WARN_ON(!page_alloc_available))
> >> +			return -EBUSY;
> >> +
> >> +		/*
> >> +		 * Boot-time: Started secondary cpus but don't know if they
> >> +		 * support BBML2_NOABORT yet. Can't allow splitting in this
> >> +		 * window in case they don't.
> >> +		 */
> >> +		if (WARN_ON(num_online_cpus() > 1))
> >> +			return -EBUSY;
> >> +	}
> > 
> > I think sashiko is over cautions here
> > (https://sashiko.dev/#/patchset/20260330161705.3349825-1-ryan.roberts@arm.com)
> > but it has a somewhat valid point from the perspective of
> > num_online_cpus() semantics. We have have num_online_cpus() == 1 while
> > having a secondary CPU just booted and with its MMU enabled. I don't
> > think we can have any asynchronous tasks running at that point to
> > trigger a spit though. Even async_init() is called after smp_init().
> 
> Yes I saw the Sashiko report, but we had previously had a (private) discussion
> where I thought we had already concluded that this approach is safe in practice
> due to the way that the boot cpu brings the secondaries online.

Yes, I thought I'd mention it. I don't see how this could go wrong in
practice as we don't expect any page splitting on the primary CPU while
it waits for the secondaries to come up.

> > An option may be to attempt cpus_read_trylock() as this lock is taken by
> > _cpu_up(). If it fails, return -EBUSY, otherwise check num_online_cpus()
> > and unlock (and return -EBUSY if secondaries already started).
> 
> That sounds neat; I could dig deeper and have a go at something like this if you
> want?

I don't think it's worth it. We'll probably need to keep the lock for
the duration of the splitting (though that's only before the cpu
features are fully initialised). It's something we can look into if we
ever see an issue here. For now, the num_online_cpus() test will do.

> > Another thing I couldn't get my head around - IIUC is_realm_world()
> > won't return true for map_mem() yet (if in a realm). Can we have realms
> > on hardware that does not support BBML2_NOABORT? We may not have
> > configuration with rodata_full set (it should be complementary to realm
> > support).
> 
> My understanding is that this is a pre-existing (and known) bug. It's not
> related to the "map linear map by large leaves and split dynamically" feature so
> wasn't attempting to fix it.

Yes, it's an existing bug I noticed while trying to understand the code
paths. It doesn't need any action on this series but we should try to
fix it separately.

-- 
Catalin


^ permalink raw reply

* Re: [PATCH] arm64: clear_page[s] using memset
From: Linus Walleij @ 2026-04-07  9:25 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Will Deacon, Marc Zyngier, Oliver Upton, Joey Gouly,
	Suzuki K Poulose, Zenghui Yu, linux-arm-kernel, kvmarm
In-Reply-To: <ac7YJPo4Np2zxxEp@arm.com>

On Thu, Apr 2, 2026 at 10:57 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Fri, Mar 06, 2026 at 09:57:50AM +0100, Linus Walleij wrote:
> > There is no need to try to second-guess the compiler when
> > clearing memory. Just call memset() like everyone else.
>
> Hmm, that "like everyone else" made me think - why not move this to
> generic code and only the 1-2 platforms that need their own should
> override it? Could we do the same with copy_page()?
>
> Sorry, more work all of a sudden ;).

I actually had that planned, I just wanted to know if this would
be fine for arm64 first so that there is a user.

It seems you are on board so I will send a 2-patch series
next.

> > Since memset() already has an architecture-local MOPS
> > optimization, we do not need to do anything else to preserve
> > the MOPS optimization.
>
> The custom clear_page() had the (very small) advantage that it can skip
> the length/alignment checks as they are always page-size.

Yeah, it seems that is so fast that it doesn't really matter.

> > While at it, implement the shorthand for directly calling
> > the new prototype clear_pages() for larger page chunks.
> >
> > No performance regressions can be seen, the fastpath
> > benchmarks differences are in the noise.
>
> I assume the benchmarks ran on real hardware (had to ask, last time you
> mentioned qemu ;)).

Quoting my own commit message hehe:

> No performance regressions can be seen, the fastpath
> benchmarks differences are in the noise.

This was tested on hardware with Ryan Robert's fastpath tool.

Yours,
Linus Walleij


^ permalink raw reply

* Re: [PATCH v2 1/3] arm64: mm: Fix rodata=full block mapping support for realm guests
From: Catalin Marinas @ 2026-04-07  9:32 UTC (permalink / raw)
  To: Ryan Roberts
  Cc: Will Deacon, David Hildenbrand (Arm), Dev Jain, Yang Shi,
	Suzuki K Poulose, Jinjiang Tu, Kevin Brodsky, linux-arm-kernel,
	linux-kernel, stable
In-Reply-To: <beacee23-c177-47a1-b8b5-743844b617a8@arm.com>

On Tue, Apr 07, 2026 at 09:43:42AM +0100, Ryan Roberts wrote:
> On 03/04/2026 11:31, Catalin Marinas wrote:
> > On Thu, Apr 02, 2026 at 09:43:59PM +0100, Catalin Marinas wrote:
> >> Another thing I couldn't get my head around - IIUC is_realm_world()
> >> won't return true for map_mem() yet (if in a realm). Can we have realms
> >> on hardware that does not support BBML2_NOABORT? We may not have
> >> configuration with rodata_full set (it should be complementary to realm
> >> support).
> > 
> > With rodata_full==false, can_set_direct_map() returns false initially
> > but after arm64_rsi_init() it starts returning true if is_realm_world().
> > The side-effect is that map_mem() goes for block mappings and
> > linear_map_requires_bbml2 set to false. Later on,
> > linear_map_maybe_split_to_ptes() will skip the splitting.
> > 
> > Unless I'm missing something, is_realm_world() calls in
> > force_pte_mapping() and can_set_direct_map() are useless. I'd remove
> > them and either require BBML2_NOABORT with CCA or get the user to force
> > rodata_full when running in realms. Or move arm64_rsi_init() even
> > earlier?
> 
> I'd need Suzuki to comment on this. As I said in the other mail, I was treating
> this like a pre-existing bug. But I guess linear_map_requires_bbml2 ending up
> wrong is a problem here. I'm not sure it's quite as simple as requiring
> BBML2_NOABORT with CCA as we still need can_set_direct_map() to return true if
> we are in a realm.

can_set_direct_map() == true is not a property of the realm but rather a
requirement. In the absence of BBML2_NOABORT, I guess the test was added
under the assumption that force_pte_mapping() also returns true if
is_realm_world(). We might as well add a variable or static label to
track whether can_set_direct_map() is possible and avoid tests that
duplicate force_pte_mapping().

This won't solve the is_realm_world() changing polarity during boot but
at least we know it won't suddenly make can_set_direct_map() return
true when it shouldn't.

-- 
Catalin


^ permalink raw reply

* RE: [PATCH 0/7] soc: aspeed: Add AST2600 eSPI controller support
From: YH Chung @ 2026-04-07  9:36 UTC (permalink / raw)
  To: Arnd Bergmann, Andrew Jeffery, Conor Dooley
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Joel Stanley,
	Ryan Chen, Philipp Zabel, devicetree@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-aspeed@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	openbmc@lists.ozlabs.org, maciej.lawniczak@intel.com, Mark Brown
In-Reply-To: <KL1PR0601MB42763DAD359305DEBA4B769D9057A@KL1PR0601MB4276.apcprd06.prod.outlook.com>

Hi Arnd,

Thanks for the comments and questions.

> These all seem to be viable options, but I still think we should focus on
> agreeing on a design for the low-level hardware interface and whether this
> can or should be abstracted between SoC vendor specific drivers before
> trying to solve the user interface side.
 
Could you share your thoughts on whether it would make sense to accept our
eSPI driver as is, and whether it should live under the SoC vendor-specific
directories? Any comment would be greatly appreciated.

Thanks,
YunHsuan

^ permalink raw reply

* Re: [Upstream] Re: [PATCH] arm64: dts: imx{91,93}-phyboard-segin: Add peb-av-18 overlay
From: Frank Li @ 2026-04-07  9:37 UTC (permalink / raw)
  To: Primoz Fiser
  Cc: Florijan Plohl, Sascha Hauer, Pengutronix Kernel Team,
	Fabio Estevam, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	imx, linux-arm-kernel, devicetree, linux-kernel, upstream
In-Reply-To: <707e106b-f444-4c26-9816-c53c8eda8cbb@norik.com>

On Tue, Apr 07, 2026 at 08:14:08AM +0200, Primoz Fiser wrote:
> Hi Frank, Florijan,
>
> On 4/6/26 04:56, Frank Li wrote:
> > On Fri, Apr 03, 2026 at 10:29:00AM +0200, Florijan Plohl wrote:
> >> Hello,
> >>
> >> On 4/2/26 15:50, Frank Li wrote:
> >>> On Thu, Apr 02, 2026 at 09:08:26AM +0200, Florijan Plohl wrote:
> >>>> Add overlay for the PEB-AV-18 adapter on phyBOARD-Segin-i.MX91/93.
> >>> what's means PEB-AV-18? Is it random board name?
> >> The PEB-AV-18 is PHYTEC designation for Audio/Video adapter modules that can
> >> be used to connect displays on their boards.
> >>
> >> I will improve commit message to add more such information in v2.
> >>
> >>>
> >>>
> >>>> The supported LCD is Powertip PH800480T032-ZHC19 panel (AC220).
> >>>>
> >>>> Signed-off-by: Florijan Plohl <florijan.plohl@norik.com>
> >>>> ---
> >>>>   arch/arm64/boot/dts/freescale/Makefile        |   4 +
> >>>>   .../imx91-phyboard-segin-peb-av-18.dtso       | 142 ++++++++++++++++++
> >>>>   .../imx93-phyboard-segin-peb-av-18.dtso       | 142 ++++++++++++++++++
> >>> Any difference between 91 and 93, can use one overlay file?
> >>>
> >>> Frank
> >>
> >> Can you suggest how to do so?
> >>
> >> There are imx93-pinfunc.h and imx91-pinfunc.h which are not unified
> >> between imx91 and imx93.
> >
> > I suggest move pinmux setting to mainboard's dts files, which provide
> > plug adaptor header, signal should be descripted in mainboard's dts file,
> > which provide an unified label to overlay file.
>
> Yeah, that would be one way of doing it.
>
> However, the phycore dtsi and phyboard dts are kept simple by design
> choice. This way, all optional pinctrls and peripherals are kept
> separate from the board device-tree to maintain clutter low.
>
> For v2 I would prefer to keep as is (current downstream implementation)
> or at least use this approach:
>
> imx91-93-phyboard-segin-peb-av-18.dtsi
> |
>  -> imx91-phyboard-segin-peb-av-18.dtso
> |
>  -> imx93-phyboard-segin-peb-av-18.dtso

It is better than v1's method.

Frank
>
> BR,
> Primoz
>
> >
> > Frank
> >
> >>
> >> So we can only create common dtsi like so:
> >>
> >> imx91-93-phyboard-segin-peb-av-18.dtsi
> >>
> >> and still use separate dtsos:
> >>
> >> imx91-phyboard-segin-peb-av-18.dtso
> >> imx93-phyboard-segin-peb-av-18.dtso
> >>
> >> Is that your idea?
> >>
> >> BR,
> >>
> >> Florijan Plohl
> >>
> >>>> --
> >>>> 2.43.0
> >>>>
> > _______________________________________________
> > upstream mailing list -- upstream@lists.phytec.de
> > To unsubscribe send an email to upstream-leave@lists.phytec.de
>
> --
> Primoz Fiser
> phone: +386-41-390-545
> email: primoz.fiser@norik.com
> --
> Norik systems d.o.o.
> Your embedded software partner
> Slovenia, EU
> phone: +386-41-540-545
> email: info@norik.com
>


^ permalink raw reply

* Re: [PATCH v4 3/4] PCI: tegra: Add Tegra264 support
From: Thierry Reding @ 2026-04-07  9:38 UTC (permalink / raw)
  To: Manivannan Sadhasivam
  Cc: Bjorn Helgaas, Lorenzo Pieralisi, Krzysztof Wilczyński,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, Thierry Reding,
	Jonathan Hunter, Karthikeyan Mitran, Hou Zhiqiang,
	Thomas Petazzoni, Pali Rohár, Michal Simek, Kevin Xie,
	linux-pci, devicetree, linux-tegra, linux-kernel,
	linux-arm-kernel, Thierry Reding, Manikanta Maddireddy
In-Reply-To: <iaoee5r5e2w52fap7ex23wdikbuvpjpesinedgjkehsedszhzo@64yoo2avmxle>

[-- Attachment #1: Type: text/plain, Size: 12894 bytes --]

On Thu, Apr 02, 2026 at 11:02:02PM +0530, Manivannan Sadhasivam wrote:
> On Thu, Apr 02, 2026 at 04:27:37PM +0200, Thierry Reding wrote:
> > From: Thierry Reding <treding@nvidia.com>
> > 
> > Add a driver for the PCIe controller found on NVIDIA Tegra264 SoCs. The
> > driver is very small, with its main purpose being to set up the address
> > translation registers and then creating a standard PCI host using ECAM.
> > 
> > Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> > Signed-off-by: Thierry Reding <treding@nvidia.com>
> 
> What is the rationale for adding a new driver? Can't you reuse the existing one?
> If so, that should be mentioned in the description.

Which existing one? Tegra PCI controllers for previou generations
(Tegra194 and Tegra234) were DesignWare IP, but Tegra264 is an internal
IP, so the programming is entirely different. I'll add something to that
effect to the commit message.

> > diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
> > index 5aaed8ac6e44..6ead04f7bd6e 100644
> > --- a/drivers/pci/controller/Kconfig
> > +++ b/drivers/pci/controller/Kconfig
> > @@ -254,7 +254,15 @@ config PCI_TEGRA
> >  	select IRQ_MSI_LIB
> >  	help
> >  	  Say Y here if you want support for the PCIe host controller found
> > -	  on NVIDIA Tegra SoCs.
> > +	  on NVIDIA Tegra SoCs (Tegra20 through Tegra186).
> > +
> > +config PCIE_TEGRA264
> > +	bool "NVIDIA Tegra264 PCIe controller"
> 
> This driver seems to be using external MSI controller. So it can be built as a
> module. Also, you have the remove() callback for some reason.

Okay, I can turn this into a tristate symbol.

> > +	depends on ARCH_TEGRA || COMPILE_TEST
> > +	depends on PCI_MSI
> 
> Why?

I suppose it's not necessary in the sense of it being a build
dependency. At runtime, however, the root complex is not useful if PCI
MSI is not enabled. We can drop this dependency and rely on .config to
have it enabled as needed.

> > diff --git a/drivers/pci/controller/pcie-tegra264.c b/drivers/pci/controller/pcie-tegra264.c
> > new file mode 100644
> > index 000000000000..3ce1ad971bdb
> > --- /dev/null
> > +++ b/drivers/pci/controller/pcie-tegra264.c
[...]
> > +struct tegra264_pcie {
> > +	struct device *dev;
> > +	bool link_up;
> 
> Keep bool types at the end to avoid holes.

Done.

> > +static int tegra264_pcie_parse_dt(struct tegra264_pcie *pcie)
> > +{
> > +	int err;
> > +
> > +	pcie->wake_gpio = devm_gpiod_get_optional(pcie->dev, "nvidia,pex-wake",
> 
> You should switch to standard 'wake-gpios' property.

Will do.

> > +						  GPIOD_IN);
> > +	if (IS_ERR(pcie->wake_gpio))
> > +		return PTR_ERR(pcie->wake_gpio);
> > +
> > +	if (pcie->wake_gpio) {
> 
> Since you are bailing out above, you don't need this check.

I think we still want to have this check to handle the case of optional
wake GPIOs. Not all controllers may have this wired up and
devm_gpiod_get_optional() will return NULL (not an ERR_PTR()-encoded
error) if the wake-gpios property is missing.

> > +static void tegra264_pcie_bpmp_set_rp_state(struct tegra264_pcie *pcie)
> 
> I don't think this function name is self explanatory. Looks like it is turning
> off the PCIe controller, so how about tegra264_pcie_power_off()?

Agreed. The name is a relic from when this was potentially being used to
toggle on and off the controller. But it's only used for disabling, so
tegra264_pcie__power_off() sounds much better.

> > +{
> > +	struct tegra_bpmp_message msg = {};
> > +	struct mrq_pcie_request req = {};
> > +	int err;
> > +
> > +	req.cmd = CMD_PCIE_RP_CONTROLLER_OFF;
> > +	req.rp_ctrlr_off.rp_controller = pcie->ctl_id;
> > +
> > +	msg.mrq = MRQ_PCIE;
> > +	msg.tx.data = &req;
> > +	msg.tx.size = sizeof(req);
> > +
> > +	err = tegra_bpmp_transfer(pcie->bpmp, &msg);
> > +	if (err)
> > +		dev_info(pcie->dev, "failed to turn off PCIe #%u: %pe\n",
> 
> Why not dev_err()?
> 
> > +			 pcie->ctl_id, ERR_PTR(err));
> > +
> > +	if (msg.rx.ret)
> > +		dev_info(pcie->dev, "failed to turn off PCIe #%u: %d\n",
> 
> Same here.

These are not fatal errors and are safe to ignore. dev_err() seemed too
strong for this. They also really shouldn't happen. Though I now realize
that's a bad argument, or rather, actually an argument for making them
dev_err() so that they do stand out if they really should happen.

> 
> > +			 pcie->ctl_id, msg.rx.ret);
> > +}
> > +
> > +static void tegra264_pcie_icc_set(struct tegra264_pcie *pcie)
> > +{
> > +	u32 value, speed, width, bw;
> > +	int err;
> > +
> > +	value = readw(pcie->ecam + XTL_RC_PCIE_CFG_LINK_STATUS);
> > +	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, value);
> > +	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, value);
> > +
> > +	bw = width * (PCIE_SPEED2MBS_ENC(speed) / BITS_PER_BYTE);
> > +	value = MBps_to_icc(bw);
> 
> So this becomes, 'width * (PCIE_SPEED2MBS_ENC(speed) / 8) * 1000 / 8'. But don't
> you want, 'width * (PCIE_SPEED2MBS_ENC(speed)) * 1000 / 8'?

This is M*B*ps_to_icc(), not M*b*ps_to_icc(), so we do in fact get the
latter. I almost fell for this as well because I got confused by some of
these macros being all-caps and other times the case actually mattering.

> > +	err = icc_set_bw(pcie->icc_path, bw, bw);
> > +	if (err < 0)
> > +		dev_err(pcie->dev,
> > +			"failed to request bandwidth (%u MBps): %pe\n",
> > +			bw, ERR_PTR(err));
> 
> So you don't want to error out if this fails?

No. This is not a fatal error and the system will continue to work,
albeit perhaps at suboptimal performance. Given that Ethernet and mass
storage are connected to these, a failure to set the bandwidth and
erroring out here may leave the system unusable, but continuing on would
let the system boot and update firmware, kernel or whatever to recover.

I'll add a comment explaining this.

[...]
> > +static void tegra264_pcie_init(struct tegra264_pcie *pcie)
> > +{
> > +	enum pci_bus_speed speed;
> > +	unsigned int i;
> > +	u32 value;
> > +
> > +	/* bring the link out of reset */
> 
> s/link/controller or endpoint?

This controls the PERST# signal, so I guess "endpoint" would be more
correct.

> > +	value = readl(pcie->xtl + XTL_RC_MGMT_PERST_CONTROL);
> > +	value |= XTL_RC_MGMT_PERST_CONTROL_PERST_O_N;
> > +	writel(value, pcie->xtl + XTL_RC_MGMT_PERST_CONTROL);
> > +
> > +	if (!tegra_is_silicon()) {
> 
> This looks like some pre-silicon validation thing. Do you really want it to be
> present in the upstream driver?

At this point there is silicon for this chip, but we've been trying to
get some of the pre-silicon code merged upstream as well because
occasionally people will want to run upstream on simulation, even after
silicon is available. At other times we may want to reuse these drivers
on future chips during pre-silicon validation.

Obviously there needs to be a balance. We don't want to have excessive
amounts of code specifically for pre-silicon validation, but in
relatively simple cases like this it is useful.

> 
> > +		dev_info(pcie->dev,
> > +			 "skipping link state for PCIe #%u in simulation\n",
> > +			 pcie->ctl_id);
> > +		pcie->link_up = true;
> > +		return;
> > +	}
> > +
> > +	for (i = 0; i < PCIE_LINK_WAIT_MAX_RETRIES; i++) {
> > +		if (tegra264_pcie_link_up(pcie, NULL))
> > +			break;
> > +
> > +		usleep_range(PCIE_LINK_WAIT_US_MIN, PCIE_LINK_WAIT_US_MAX);
> > +	}
> > +
> > +	if (tegra264_pcie_link_up(pcie, &speed)) {
> 
> Why are you doing it for the second time?

It's just a last-resort check to see if it's really not come up after
the retries. Also, in this call we're actually interested in retrieving
the detected link speed.

> 
> > +		/* Per PCIe r5.0, 6.6.1 wait for 100ms after DLL up */
> 
> No need of this comment.

Fair enough. This was perhaps more useful in earlier versions of the
patch before the line below used the standardize wait time.

[...]
> > +static int tegra264_pcie_probe(struct platform_device *pdev)
> > +{
> > +	struct device *dev = &pdev->dev;
> > +	struct pci_host_bridge *bridge;
> > +	struct tegra264_pcie *pcie;
> > +	struct resource_entry *bus;
> > +	struct resource *res;
> > +	int err;
> > +
> > +	bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct tegra264_pcie));
> > +	if (!bridge)
> > +		return dev_err_probe(dev, -ENOMEM,
> > +				     "failed to allocate host bridge\n");
> > +
> > +	pcie = pci_host_bridge_priv(bridge);
> > +	platform_set_drvdata(pdev, pcie);
> > +	pcie->bridge = bridge;
> > +	pcie->dev = dev;
> > +
> > +	err = pinctrl_pm_select_default_state(dev);
> 
> I questioned this before:
> https://lore.kernel.org/linux-pci/o5sxxdikdjwd76zsedvkpsl54nw6wrhopwsflt43y5st67mrub@uuw3yfjfqthd/

I'll remove this. Looks like we should be fine with just relying on the
default state being set by the pinctrl core. We might need to move it
into the resume callback.

> > +	if (err < 0)
> > +		return dev_err_probe(dev, err,
> > +				     "failed to configure sideband pins\n");
> > +
> > +	err = tegra264_pcie_parse_dt(pcie);
> > +	if (err < 0)
> > +		return dev_err_probe(dev, err, "failed to parse device tree");
> > +
> > +	pcie->xal = devm_platform_ioremap_resource_byname(pdev, "xal");
> > +	if (IS_ERR(pcie->xal))
> > +		return dev_err_probe(dev, PTR_ERR(pcie->xal),
> > +				     "failed to map XAL memory\n");
> > +
> > +	pcie->xtl = devm_platform_ioremap_resource_byname(pdev, "xtl-pri");
> > +	if (IS_ERR(pcie->xtl))
> > +		return dev_err_probe(dev, PTR_ERR(pcie->xtl),
> > +				     "failed to map XTL-PRI memory\n");
> > +
> > +	bus = resource_list_first_type(&bridge->windows, IORESOURCE_BUS);
> > +	if (!bus)
> > +		return dev_err_probe(dev, -ENODEV,
> > +				     "failed to get bus resources\n");
> > +
> > +	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ecam");
> > +	if (!res)
> > +		return dev_err_probe(dev, -ENXIO,
> > +				     "failed to get ECAM resource\n");
> > +
> > +	pcie->icc_path = devm_of_icc_get(&pdev->dev, "write");
> > +	if (IS_ERR(pcie->icc_path))
> > +		return dev_err_probe(&pdev->dev, PTR_ERR(pcie->icc_path),
> > +				     "failed to get ICC");
> > +
> > +	/*
> > +	 * Parse BPMP property only for silicon, as interaction with BPMP is
> > +	 * not needed for other platforms.
> > +	 */
> > +	if (tegra_is_silicon()) {
> > +		pcie->bpmp = tegra_bpmp_get_with_id(dev, &pcie->ctl_id);
> > +		if (IS_ERR(pcie->bpmp))
> > +			return dev_err_probe(dev, PTR_ERR(pcie->bpmp),
> > +					     "failed to get BPMP\n");
> > +	}
> > +
> 
> pm_runtime_set_active()
> 
> > +	pm_runtime_enable(dev);
> 
> devm_pm_runtime_enable()?

Looks like I can even use devm_pm_runtime_set_active_enabled() to
combine the two.

> 
> > +	pm_runtime_get_sync(dev);
> > +
> > +	/* sanity check that programmed ranges match what's in DT */
> > +	if (!tegra264_pcie_check_ranges(pdev)) {
> > +		err = -EINVAL;
> > +		goto put_pm;
> > +	}
> > +
> > +	pcie->cfg = pci_ecam_create(dev, res, bus->res, &pci_generic_ecam_ops);
> > +	if (IS_ERR(pcie->cfg)) {
> > +		err = dev_err_probe(dev, PTR_ERR(pcie->cfg),
> > +				    "failed to create ECAM\n");
> > +		goto put_pm;
> > +	}
> > +
> > +	bridge->ops = (struct pci_ops *)&pci_generic_ecam_ops.pci_ops;
> > +	bridge->sysdata = pcie->cfg;
> > +	pcie->ecam = pcie->cfg->win;
> > +
> > +	tegra264_pcie_init(pcie);
> > +
> > +	if (!pcie->link_up)
> > +		goto free;
> 
> goto free_ecam;

It's not clear to me, but are you suggesting to rename the existing
"free" label to "free_ecam"? I can do that.

> > +	err = pci_host_probe(bridge);
> > +	if (err < 0) {
> > +		dev_err(dev, "failed to register host: %pe\n", ERR_PTR(err));
> 
> dev_err_probe()

Okay.

> 
> > +		goto free;
> > +	}
> > +
> > +	return err;
> 
> return 0;

Done.

[...]
> > +static int tegra264_pcie_resume_noirq(struct device *dev)
> > +{
> > +	struct tegra264_pcie *pcie = dev_get_drvdata(dev);
> > +	int err;
> > +
> > +	if (pcie->wake_gpio && device_may_wakeup(dev)) {
> > +		err = disable_irq_wake(pcie->wake_irq);
> > +		if (err < 0)
> > +			dev_err(dev, "failed to disable wake IRQ: %pe\n",
> > +				ERR_PTR(err));
> > +	}
> > +
> > +	if (pcie->link_up == false)
> > +		return 0;
> > +
> > +	tegra264_pcie_init(pcie);
> > +
> 
> Why do you need init() here without deinit() in tegra264_pcie_suspend_noirq()?

That's because when we come out of suspend the link may have gone down
again, so we need to take the endpoint out of reset to retrigger the
link training. I think we could possibly explicitly clear that PERST_O_N
bit in the PERST_CONTROL register in a new tegra264_pcie_deinit() to
mirror what tegra264_pcie_init() does, but it's automatically done by
firmware anyway, so not needed.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH] arm64: clear_page[s] using memset
From: Catalin Marinas @ 2026-04-07  9:42 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Will Deacon, Marc Zyngier, Oliver Upton, Joey Gouly,
	Suzuki K Poulose, Zenghui Yu, linux-arm-kernel, kvmarm
In-Reply-To: <CAD++jL=G81=ueo7djqUETToE459kpFGniWvBwnYbJtmXrm8MFw@mail.gmail.com>

On Tue, Apr 07, 2026 at 11:25:55AM +0200, Linus Walleij wrote:
> On Thu, Apr 2, 2026 at 10:57 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
> > On Fri, Mar 06, 2026 at 09:57:50AM +0100, Linus Walleij wrote:
> > > There is no need to try to second-guess the compiler when
> > > clearing memory. Just call memset() like everyone else.
> >
> > Hmm, that "like everyone else" made me think - why not move this to
> > generic code and only the 1-2 platforms that need their own should
> > override it? Could we do the same with copy_page()?
> >
> > Sorry, more work all of a sudden ;).
> 
> I actually had that planned, I just wanted to know if this would
> be fine for arm64 first so that there is a user.
> 
> It seems you are on board so I will send a 2-patch series
> next.

We might as well merge the arm64 patch first and do the cross-arch
cleanup separately, queued via akpm's tree.

> > > While at it, implement the shorthand for directly calling
> > > the new prototype clear_pages() for larger page chunks.
> > >
> > > No performance regressions can be seen, the fastpath
> > > benchmarks differences are in the noise.
> >
> > I assume the benchmarks ran on real hardware (had to ask, last time you
> > mentioned qemu ;)).
> 
> Quoting my own commit message hehe:
> 
> > No performance regressions can be seen, the fastpath
> > benchmarks differences are in the noise.
> 
> This was tested on hardware with Ryan Robert's fastpath tool.

It wasn't clear to me where the fastpath tool ran ;).

Thanks.

-- 
Catalin


^ permalink raw reply

* Re: [PATCH 3/3] arm64: dts: imx8mp-ab2: Correct interrupt flags
From: Shengjiu Wang @ 2026-04-07  9:49 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Frank Li,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam, Marek Vasut,
	Peng Fan, Fedor Ross, Shawn Guo, Shengjiu Wang, Viorel Suman,
	devicetree, imx, linux-arm-kernel, linux-kernel
In-Reply-To: <20260406063810.25531-6-krzysztof.kozlowski@oss.qualcomm.com>

On Mon, Apr 6, 2026 at 2:39 PM Krzysztof Kozlowski
<krzysztof.kozlowski@oss.qualcomm.com> wrote:
>
> GPIO_ACTIVE_x flags are not correct in the context of interrupt flags.
> These are simple defines so they could be used in DTS but they will not
> have the same meaning:
> 1. GPIO_ACTIVE_HIGH = 0 => IRQ_TYPE_NONE
> 2. GPIO_ACTIVE_LOW  = 1 => IRQ_TYPE_EDGE_RISING
>
> Correct the interrupt flags, assuming the author of the code wanted the
> same logical behavior behind the name "ACTIVE_xxx", this is:
> ACTIVE_LOW  => IRQ_TYPE_LEVEL_LOW
>
> Fixes: bf68c18150ef ("arm64: dts: imx8mp-ab2: add support for NXP i.MX8MP audio board (version 2)")
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

Thanks for the fix.

Reviewed-by: Shengjiu Wang <shengjiu.wang@nxp.com>

Best regards
Shengjiu Wang
> ---
>  arch/arm64/boot/dts/freescale/imx8mp-ab2.dts | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/boot/dts/freescale/imx8mp-ab2.dts b/arch/arm64/boot/dts/freescale/imx8mp-ab2.dts
> index dbbc0df0e3d1..443e4fd5b9bf 100644
> --- a/arch/arm64/boot/dts/freescale/imx8mp-ab2.dts
> +++ b/arch/arm64/boot/dts/freescale/imx8mp-ab2.dts
> @@ -281,7 +281,7 @@ pca9450: pmic@25 {
>                 compatible = "nxp,pca9450c";
>                 reg = <0x25>;
>                 interrupt-parent = <&gpio1>;
> -               interrupts = <3 GPIO_ACTIVE_LOW>;
> +               interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
>                 pinctrl-0 = <&pinctrl_pmic>;
>
>                 regulators {
> --
> 2.51.0
>
>


^ permalink raw reply

* [PATCH V2] spi: zynq-qspi: Simplify clock handling with devm_clk_get_enabled()
From: Pei Xiao @ 2026-04-07  9:55 UTC (permalink / raw)
  To: michal.simek, broonie, linux-spi, linux-kernel, linux-arm-kernel; +Cc: Pei Xiao

Replace devm_clk_get() followed by clk_prepare_enable() with
devm_clk_get_enabled() for both "pclk" and "ref_clk". This removes
the need for explicit clock enable and disable calls, as the managed
API automatically disables the clocks on device removal or probe
failure.

Remove the now-unnecessary clk_disable_unprepare() calls from the
probe error paths and the remove callback. Simplify error handling
by jumping directly to the remove_ctlr label.

Signed-off-by: Pei Xiao <xiaopei01@kylinos.cn>
---
changlog in v2: remove clk enable in setup_op function
---
 drivers/spi/spi-zynq-qspi.c | 42 ++++++-------------------------------
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 5232483c4a3a..af252500195c 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -381,21 +381,10 @@ static int zynq_qspi_setup_op(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct zynq_qspi *qspi = spi_controller_get_devdata(ctlr);
-	int ret;
 
 	if (ctlr->busy)
 		return -EBUSY;
 
-	ret = clk_enable(qspi->refclk);
-	if (ret)
-		return ret;
-
-	ret = clk_enable(qspi->pclk);
-	if (ret) {
-		clk_disable(qspi->refclk);
-		return ret;
-	}
-
 	zynq_qspi_write(qspi, ZYNQ_QSPI_ENABLE_OFFSET,
 			ZYNQ_QSPI_ENABLE_ENABLE_MASK);
 
@@ -661,7 +650,7 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 		goto remove_ctlr;
 	}
 
-	xqspi->pclk = devm_clk_get(&pdev->dev, "pclk");
+	xqspi->pclk = devm_clk_get_enabled(&pdev->dev, "pclk");
 	if (IS_ERR(xqspi->pclk)) {
 		dev_err(&pdev->dev, "pclk clock not found.\n");
 		ret = PTR_ERR(xqspi->pclk);
@@ -670,36 +659,24 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 
 	init_completion(&xqspi->data_completion);
 
-	xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk");
+	xqspi->refclk = devm_clk_get_enabled(&pdev->dev, "ref_clk");
 	if (IS_ERR(xqspi->refclk)) {
 		dev_err(&pdev->dev, "ref_clk clock not found.\n");
 		ret = PTR_ERR(xqspi->refclk);
 		goto remove_ctlr;
 	}
 
-	ret = clk_prepare_enable(xqspi->pclk);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to enable APB clock.\n");
-		goto remove_ctlr;
-	}
-
-	ret = clk_prepare_enable(xqspi->refclk);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to enable device clock.\n");
-		goto clk_dis_pclk;
-	}
-
 	xqspi->irq = platform_get_irq(pdev, 0);
 	if (xqspi->irq < 0) {
 		ret = xqspi->irq;
-		goto clk_dis_all;
+		goto remove_ctlr;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq,
 			       0, pdev->name, xqspi);
 	if (ret != 0) {
 		ret = -ENXIO;
 		dev_err(&pdev->dev, "request_irq failed\n");
-		goto clk_dis_all;
+		goto remove_ctlr;
 	}
 
 	ret = of_property_read_u32(np, "num-cs",
@@ -709,7 +686,7 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 	} else if (num_cs > ZYNQ_QSPI_MAX_NUM_CS) {
 		ret = -EINVAL;
 		dev_err(&pdev->dev, "only 2 chip selects are available\n");
-		goto clk_dis_all;
+		goto remove_ctlr;
 	} else {
 		ctlr->num_chipselect = num_cs;
 	}
@@ -728,15 +705,11 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret) {
 		dev_err(&pdev->dev, "devm_spi_register_controller failed\n");
-		goto clk_dis_all;
+		goto remove_ctlr;
 	}
 
 	return ret;
 
-clk_dis_all:
-	clk_disable_unprepare(xqspi->refclk);
-clk_dis_pclk:
-	clk_disable_unprepare(xqspi->pclk);
 remove_ctlr:
 	spi_controller_put(ctlr);
 
@@ -758,9 +731,6 @@ static void zynq_qspi_remove(struct platform_device *pdev)
 	struct zynq_qspi *xqspi = platform_get_drvdata(pdev);
 
 	zynq_qspi_write(xqspi, ZYNQ_QSPI_ENABLE_OFFSET, 0);
-
-	clk_disable_unprepare(xqspi->refclk);
-	clk_disable_unprepare(xqspi->pclk);
 }
 
 static const struct of_device_id zynq_qspi_of_match[] = {
-- 
2.25.1



^ permalink raw reply related

* Re: [PATCH] arm64: dts: imx93-9x9-qsb: Add tianma,tm050rdh03 panel
From: Frank Li @ 2026-04-07  9:55 UTC (permalink / raw)
  To: Liu Ying
  Cc: Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, imx, linux-arm-kernel,
	devicetree, linux-kernel
In-Reply-To: <20260407-tianma-tm050rdh03-imx93-9x9-qsb-v1-1-24d514a62fdc@nxp.com>

On Tue, Apr 07, 2026 at 05:15:31PM +0800, Liu Ying wrote:
> Support tianma,tm050rdh03 DPI panel on i.MX93 9x9 QSB.
>
> The panel connects with the QSB board through an adapter board[1]
> designed by NXP.
>
> Link: https://www.nxp.com/design/design-center/development-boards-and-designs/parallel-lcd-display:TM050RDH03-41 [1]
> Signed-off-by: Liu Ying <victor.liu@nxp.com>
> ---
>  arch/arm64/boot/dts/freescale/Makefile             |   2 +
>  .../imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi       | 110 +++++++++++++++++++++
>  .../imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtso       | 106 +-------------------

Can you add some description about raname in commit message?
Use -C option to create patch.

...
> diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso
> new file mode 100644
> index 000000000000..c233797ec28c
> --- /dev/null
> +++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb-tianma-tm050rdh03.dtso
> @@ -0,0 +1,14 @@
> +// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
> +/*
> + * Copyright 2026 NXP
> + */
> +
> +#include <dt-bindings/gpio/gpio.h>
> +#include "imx93-9x9-qsb-ontat-kd50g21-40nt-a1.dtsi"
> +
> +&{/} {
> +	panel {
> +		compatible = "tianma,tm050rdh03";
> +		enable-gpios = <&pcal6524 8 GPIO_ACTIVE_HIGH>;
> +	};
> +};

Is it possible to appply this overlay file and kd50g21-40nt-a1 overlay file

to imx93-9x9-qsb.dtb, so needn't create dtsi.

Frank
>
> ---
> base-commit: 816f193dd0d95246f208590924dd962b192def78
> change-id: 20260407-tianma-tm050rdh03-imx93-9x9-qsb-6e4bbbde3d08
>
> Best regards,
> --
> Liu Ying <victor.liu@nxp.com>
>


^ permalink raw reply

* Re: [PATCH v2 1/3] arm64: mm: Fix rodata=full block mapping support for realm guests
From: Suzuki K Poulose @ 2026-04-07  9:57 UTC (permalink / raw)
  To: Catalin Marinas, Ryan Roberts
  Cc: Will Deacon, David Hildenbrand (Arm), Dev Jain, Yang Shi,
	Jinjiang Tu, Kevin Brodsky, linux-arm-kernel, linux-kernel,
	stable
In-Reply-To: <ac7VD4Z85nS30GCp@arm.com>

On 02/04/2026 21:43, Catalin Marinas wrote:
> On Mon, Mar 30, 2026 at 05:17:02PM +0100, Ryan Roberts wrote:
>>   int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
>>   {
>>   	int ret;
>>   
>> -	/*
>> -	 * !BBML2_NOABORT systems should not be trying to change permissions on
>> -	 * anything that is not pte-mapped in the first place. Just return early
>> -	 * and let the permission change code raise a warning if not already
>> -	 * pte-mapped.
>> -	 */
>> -	if (!system_supports_bbml2_noabort())
>> -		return 0;
>> -
>>   	/*
>>   	 * If the region is within a pte-mapped area, there is no need to try to
>>   	 * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
>>   	 * change permissions from atomic context so for those cases (which are
>>   	 * always pte-mapped), we must not go any further because taking the
>> -	 * mutex below may sleep.
>> +	 * mutex below may sleep. Do not call force_pte_mapping() here because
>> +	 * it could return a confusing result if called from a secondary cpu
>> +	 * prior to finalizing caps. Instead, linear_map_requires_bbml2 gives us
>> +	 * what we need.
>>   	 */
>> -	if (force_pte_mapping() || is_kfence_address((void *)start))
>> +	if (!linear_map_requires_bbml2 || is_kfence_address((void *)start))
>>   		return 0;
>>   
>> +	if (!system_supports_bbml2_noabort()) {
>> +		/*
>> +		 * !BBML2_NOABORT systems should not be trying to change
>> +		 * permissions on anything that is not pte-mapped in the first
>> +		 * place. Just return early and let the permission change code
>> +		 * raise a warning if not already pte-mapped.
>> +		 */
>> +		if (system_capabilities_finalized())
>> +			return 0;
>> +
>> +		/*
>> +		 * Boot-time: split_kernel_leaf_mapping_locked() allocates from
>> +		 * page allocator. Can't split until it's available.
>> +		 */
>> +		if (WARN_ON(!page_alloc_available))
>> +			return -EBUSY;
>> +
>> +		/*
>> +		 * Boot-time: Started secondary cpus but don't know if they
>> +		 * support BBML2_NOABORT yet. Can't allow splitting in this
>> +		 * window in case they don't.
>> +		 */
>> +		if (WARN_ON(num_online_cpus() > 1))
>> +			return -EBUSY;
>> +	}
> 
> I think sashiko is over cautions here
> (https://sashiko.dev/#/patchset/20260330161705.3349825-1-ryan.roberts@arm.com)
> but it has a somewhat valid point from the perspective of
> num_online_cpus() semantics. We have have num_online_cpus() == 1 while
> having a secondary CPU just booted and with its MMU enabled. I don't
> think we can have any asynchronous tasks running at that point to
> trigger a spit though. Even async_init() is called after smp_init().
> 
> An option may be to attempt cpus_read_trylock() as this lock is taken by
> _cpu_up(). If it fails, return -EBUSY, otherwise check num_online_cpus()
> and unlock (and return -EBUSY if secondaries already started).
> 
> Another thing I couldn't get my head around - IIUC is_realm_world()
> won't return true for map_mem() yet (if in a realm). 
That is correct. map_mem() comes from paginig_init(), which gets called
before arm64_rsi_init(). Realm check was delayed until psci_xx_init().
We had a version which parsed the DT for PSCI conduit early enough
to be able to make the SMC calls to detect the Realm. But there
were concerns around it.


> Can we have realms on hardware that does not support BBML2_NOABORT? 

I can get this checked. I expect that they all will have BBML2 (v8.4
extension). But NOABORT is something that may need to be checked.

We may not have


> configuration with rodata_full set (it should be complementary to realm
> support).



> 
> I'll add the patches to for-next/core to give them a bit of time in
> -next but let's see next week if we ignore this (with an updated
> comment) or we try to avoid the issue altogether.
> 

Thanks

Suzuki



^ permalink raw reply

* Re: [PATCH v2] firmware: arm_ffa: Use the correct buffer size during RXTX_MAP
From: Sudeep Holla @ 2026-04-07 10:06 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, android-kvm, Sebastian Ene; +Cc: Sudeep Holla
In-Reply-To: <20260402113939.930221-1-sebastianene@google.com>

On Thu, 02 Apr 2026 11:39:39 +0000, Sebastian Ene wrote:
> Don't use the discovered buffer size from an FFA_FEATURES call directly
> since we can run on a system that has the PAGE_SIZE larger than the
> returned size which makes the alloc_pages_exact for the buffer to be
> rounded up.

Applied to sudeep.holla/linux (for-next/ffa/fixes), thanks!

[1/1] firmware: arm_ffa: Use the correct buffer size during RXTX_MAP
      https://git.kernel.org/sudeep.holla/c/83210251fd70
--
Regards,
Sudeep



^ permalink raw reply

* [GIT PULL] firmware: arm_ffa: Fix for v7.1
From: Sudeep Holla @ 2026-04-07 10:08 UTC (permalink / raw)
  To: ARM SoC Team, SoC Team, ALKML; +Cc: Sudeep Holla, Arnd Bergmann

Hi ARM SoC Team,

Please pull ! This is the only fix/update I have at the moment for v7.1
So, I am sending it early as fix but late as an update for v7.1.

Regards,
Sudeep

-->8

The following changes since commit 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f:

  Linux 7.0-rc1 (2026-02-22 13:18:59 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux.git tags/ffa-fix-7.1

for you to fetch changes up to 83210251fd70d5f96bcdc8911e15f7411a6b2463:

  firmware: arm_ffa: Use the correct buffer size during RXTX_MAP (2026-04-07 10:47:42 +0100)

----------------------------------------------------------------
Arm FF-A fix for v7.1

Use the page aligned backing allocation size when computing the RXTX_MAP
page count. This fixes FF-A RX/TX buffer registration on kernels built
with 16K/64K PAGE_SIZE, where alloc_pages_exact() backs the buffer with a
larger aligned span than the discovered minimum buffer size.

----------------------------------------------------------------
Sebastian Ene (1):
      firmware: arm_ffa: Use the correct buffer size during RXTX_MAP

 drivers/firmware/arm_ffa/driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

^ permalink raw reply

* Re: [PATCH v2 1/3] arm64: mm: Fix rodata=full block mapping support for realm guests
From: Ryan Roberts @ 2026-04-07 10:13 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Will Deacon, David Hildenbrand (Arm), Dev Jain, Yang Shi,
	Suzuki K Poulose, Jinjiang Tu, Kevin Brodsky, linux-arm-kernel,
	linux-kernel, stable
In-Reply-To: <adTPFrlVCEt-hioX@arm.com>

On 07/04/2026 10:32, Catalin Marinas wrote:
> On Tue, Apr 07, 2026 at 09:43:42AM +0100, Ryan Roberts wrote:
>> On 03/04/2026 11:31, Catalin Marinas wrote:
>>> On Thu, Apr 02, 2026 at 09:43:59PM +0100, Catalin Marinas wrote:
>>>> Another thing I couldn't get my head around - IIUC is_realm_world()
>>>> won't return true for map_mem() yet (if in a realm). Can we have realms
>>>> on hardware that does not support BBML2_NOABORT? We may not have
>>>> configuration with rodata_full set (it should be complementary to realm
>>>> support).
>>>
>>> With rodata_full==false, can_set_direct_map() returns false initially
>>> but after arm64_rsi_init() it starts returning true if is_realm_world().
>>> The side-effect is that map_mem() goes for block mappings and
>>> linear_map_requires_bbml2 set to false. Later on,
>>> linear_map_maybe_split_to_ptes() will skip the splitting.
>>>
>>> Unless I'm missing something, is_realm_world() calls in
>>> force_pte_mapping() and can_set_direct_map() are useless. I'd remove
>>> them and either require BBML2_NOABORT with CCA or get the user to force
>>> rodata_full when running in realms. Or move arm64_rsi_init() even
>>> earlier?
>>
>> I'd need Suzuki to comment on this. As I said in the other mail, I was treating
>> this like a pre-existing bug. But I guess linear_map_requires_bbml2 ending up
>> wrong is a problem here. I'm not sure it's quite as simple as requiring
>> BBML2_NOABORT with CCA as we still need can_set_direct_map() to return true if
>> we are in a realm.
> 
> can_set_direct_map() == true is not a property of the realm but rather a
> requirement. 

Yes indeed. It would be better to call it might_set_direct_map() or something
like that...

> In the absence of BBML2_NOABORT, I guess the test was added
> under the assumption that force_pte_mapping() also returns true if
> is_realm_world(). We might as well add a variable or static label to
> track whether can_set_direct_map() is possible and avoid tests that
> duplicate force_pte_mapping().

I'm not sure I follow. We have linear_map_requires_bbml2 which is inteded to
track this shape of thing; if we have forced pte mapping then the value of
can_set_direct_map() is irrelevant - we will never need to split because we are
already pte-mapped. But if can_set_direct_map() initially returns false because
is_realm_world() incorrectly returns false in the early boot environment, then
linear_map_requires_bbml2 will be set to false, and we will incorrectly
short-circuit splitting any block mappings in split_kernel_leaf_mapping().

I think we are agreed on the problem. But I don't understand how tracking
can_set_direct_map() in a cached variable helps with that.

> 
> This won't solve the is_realm_world() changing polarity during boot but
> at least we know it won't suddenly make can_set_direct_map() return
> true when it shouldn't.

But is_real_world() _should_ make can_set_direct_map() return true, shouldn't
it? If we are in realm-world, we need to be able to flip the NS_SHARED bit on
parts of the linear map. So if we are in realm-world, we _might_ need to update
he direct map and that's what can_set_direct_map() is supposed to tell us.

^ permalink raw reply

* Re: [PATCH v2 1/4] ARM: dts: qcom: msm8974pro-htc-m8: add status LEDs
From: Konrad Dybcio @ 2026-04-07 10:27 UTC (permalink / raw)
  To: alex, Bjorn Andersson, Konrad Dybcio, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley
  Cc: Luca Weiss, linux-arm-kernel, linux-arm-msm,
	~postmarketos/upstreaming, phone-devel, devicetree, linux-kernel,
	Lee Jones, Pavel Machek, linux-leds
In-Reply-To: <20260406-m8-dts-additions-v2-1-c4c4bd50af48@me.ssier.org>

On 4/6/26 7:16 AM, Alexandre Messier via B4 Relay wrote:
> From: Alexandre Messier <alex@me.ssier.org>
> 
> Add support for the notification LEDs on the HTC One M8.
> 
> Two LEDs are available, one orange and one green. Together,
> they both form a single notification source, so use a
> multicolor LED node to describe this arrangement.
> 
> Cc: Lee Jones <lee@kernel.org>
> Cc: Pavel Machek <pavel@kernel.org>
> Cc: linux-leds@vger.kernel.org
> Signed-off-by: Alexandre Messier <alex@me.ssier.org>
> ---

Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>

Konrad


^ permalink raw reply

* Re: [PATCH v2 2/4] ARM: dts: qcom: msm8974pro-htc-m8: add NFC support
From: Konrad Dybcio @ 2026-04-07 10:27 UTC (permalink / raw)
  To: alex, Bjorn Andersson, Konrad Dybcio, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley
  Cc: Luca Weiss, linux-arm-kernel, linux-arm-msm,
	~postmarketos/upstreaming, phone-devel, devicetree, linux-kernel
In-Reply-To: <20260406-m8-dts-additions-v2-2-c4c4bd50af48@me.ssier.org>

On 4/6/26 7:16 AM, Alexandre Messier via B4 Relay wrote:
> From: Alexandre Messier <alex@me.ssier.org>
> 
> Add the NFC chip used in the HTC One M8 to its device tree.
> 
> The downstream vendor kernel used an I2C frequency of 384 kHz
> for this bus. Use the same value as the vendor.
> 
> Signed-off-by: Alexandre Messier <alex@me.ssier.org>
> ---

Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>

Konrad


^ permalink raw reply

* Re: [PATCH v2 4/4] ARM: dts: qcom: msm8974pro-htc-m8: add touchscreen
From: Konrad Dybcio @ 2026-04-07 10:28 UTC (permalink / raw)
  To: alex, Bjorn Andersson, Konrad Dybcio, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley
  Cc: Luca Weiss, linux-arm-kernel, linux-arm-msm,
	~postmarketos/upstreaming, phone-devel, devicetree, linux-kernel
In-Reply-To: <20260406-m8-dts-additions-v2-4-c4c4bd50af48@me.ssier.org>

On 4/6/26 7:17 AM, Alexandre Messier via B4 Relay wrote:
> From: Alexandre Messier <alex@me.ssier.org>
> 
> Add the touchscreen device node for the HTC One (M8).
> 
> The downstream vendor kernel used an I2C frequency of 384 kHz
> for this bus. Use the same value as the vendor.
> 
> Signed-off-by: Alexandre Messier <alex@me.ssier.org>
> ---

Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>

Konrad


^ permalink raw reply

* [PATCH v5 1/4] arm64: tlb: Introduce __tlbi_sync_s1ish_{kernel,batch}() for TLB maintenance
From: Catalin Marinas @ 2026-04-07 10:28 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: Will Deacon, James Morse, Mark Rutland, Mark Brown
In-Reply-To: <20260407102848.2266988-1-catalin.marinas@arm.com>

Add __tlbi_sync_s1ish_kernel() similar to __tlbi_sync_s1ish() and use it
for kernel TLB maintenance. Also use this function in flush_tlb_all()
which is only used in relation to kernel mappings. Subsequent patches
can differentiate between workarounds that apply to user only or both
user and kernel.

A subsequent patch will add mm_struct to __tlbi_sync_s1ish(). Since
arch_tlbbatch_flush() is not specific to an mm, add a corresponding
__tlbi_sync_s1ish_batch() helper.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/tlbflush.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 1416e652612b..f41eebf00990 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -191,6 +191,18 @@ static inline void __tlbi_sync_s1ish(void)
 	__repeat_tlbi_sync(vale1is, 0);
 }
 
+static inline void __tlbi_sync_s1ish_batch(void)
+{
+	dsb(ish);
+	__repeat_tlbi_sync(vale1is, 0);
+}
+
+static inline void __tlbi_sync_s1ish_kernel(void)
+{
+	dsb(ish);
+	__repeat_tlbi_sync(vale1is, 0);
+}
+
 /*
  * Complete broadcast TLB maintenance issued by hyp code which invalidates
  * stage 1 translation information in any translation regime.
@@ -299,7 +311,7 @@ static inline void flush_tlb_all(void)
 {
 	dsb(ishst);
 	__tlbi(vmalle1is);
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish_kernel();
 	isb();
 }
 
@@ -385,7 +397,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
  */
 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish_batch();
 }
 
 /*
@@ -568,7 +580,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 	dsb(ishst);
 	__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
 			     TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish_kernel();
 	isb();
 }
 
@@ -582,7 +594,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
 
 	dsb(ishst);
 	__tlbi(vaae1is, addr);
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish_kernel();
 	isb();
 }
 


^ permalink raw reply related

* [PATCH v5 0/4] arm64: Work around C1-Pro erratum 4193714 (CVE-2026-0995)
From: Catalin Marinas @ 2026-04-07 10:28 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: Will Deacon, James Morse, Mark Rutland, Mark Brown

That's version 5 of the C1-Pro workaround. Version 3 here:

https://lore.kernel.org/r/20260402101246.3870036-1-catalin.marinas@arm.com

Changes since v4:

- Static sme_dvmsync_cpus mask to avoid allocating under stop_machine().
  Sashiko was right, even GFP_ATOMIC won't work under a raw_spin_lock()

- batch->cpumask allocated lazily on arch_tlbbatch_add_pending() only to
  avoid a potential leak in case of task cloning failure. That's mostly
  theoretical as the (mobile) systems affected are expected to be built
  with CPUMASK_OFFSTACK=n (the GKI kernel defaults to NR_CPUS=32)

The other Sashiko report about the DSB in arch_tlbbatch_add_pending() is
correct but it's not much we can do about it. I don't expect this to be
noticeable at all on systems with a small number of CPUs.

As with v4, there's no longer a global sme_active_cpus mask, reducing
the risk DoS from a malicious app using SME.

Erratum description:

Arm C1-Pro prior to r1p3 has an erratum (4193714) where a TLBI+DSB
sequence might fail to ensure the completion of all outstanding SME
(Scalable Matrix Extension) memory accesses. The DVMSync message is
acknowledged before the SME accesses have fully completed, potentially
allowing pages to be reused before all in-flight accesses are done.

The workaround consists of executing a DSB locally (via IPI)
on all affected CPUs running with SME enabled, after the TLB
invalidation. This ensures the SME accesses have completed before the
IPI is acknowledged.

This has been assigned CVE-2026-0995:

https://developer.arm.com/documentation/111823/latest/

Catalin Marinas (4):
  arm64: tlb: Introduce __tlbi_sync_s1ish_{kernel,batch}() for TLB
    maintenance
  arm64: tlb: Pass the corresponding mm to __tlbi_sync_s1ish()
  arm64: cputype: Add C1-Pro definitions
  arm64: errata: Work around early CME DVMSync acknowledgement

 Documentation/arch/arm64/silicon-errata.rst |  2 +
 arch/arm64/Kconfig                          | 12 +++
 arch/arm64/include/asm/cpucaps.h            |  2 +
 arch/arm64/include/asm/cputype.h            |  2 +
 arch/arm64/include/asm/fpsimd.h             | 21 +++++
 arch/arm64/include/asm/tlbbatch.h           | 10 ++-
 arch/arm64/include/asm/tlbflush.h           | 96 +++++++++++++++++++--
 arch/arm64/kernel/cpu_errata.c              | 30 +++++++
 arch/arm64/kernel/entry-common.c            |  3 +
 arch/arm64/kernel/fpsimd.c                  | 79 +++++++++++++++++
 arch/arm64/kernel/process.c                 | 36 ++++++++
 arch/arm64/kernel/sys_compat.c              |  2 +-
 arch/arm64/tools/cpucaps                    |  1 +
 13 files changed, 285 insertions(+), 11 deletions(-)

^ permalink raw reply

* [PATCH v5 2/4] arm64: tlb: Pass the corresponding mm to __tlbi_sync_s1ish()
From: Catalin Marinas @ 2026-04-07 10:28 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: Will Deacon, James Morse, Mark Rutland, Mark Brown
In-Reply-To: <20260407102848.2266988-1-catalin.marinas@arm.com>

The mm structure will be used for workarounds that need limiting to
specific tasks.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/tlbflush.h | 8 ++++----
 arch/arm64/kernel/sys_compat.c    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index f41eebf00990..262791191935 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -185,7 +185,7 @@ do {										\
  * Complete broadcast TLB maintenance issued by the host which invalidates
  * stage 1 information in the host's own translation regime.
  */
-static inline void __tlbi_sync_s1ish(void)
+static inline void __tlbi_sync_s1ish(struct mm_struct *mm)
 {
 	dsb(ish);
 	__repeat_tlbi_sync(vale1is, 0);
@@ -323,7 +323,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 	asid = __TLBI_VADDR(0, ASID(mm));
 	__tlbi(aside1is, asid);
 	__tlbi_user(aside1is, asid);
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish(mm);
 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 }
 
@@ -377,7 +377,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long uaddr)
 {
 	flush_tlb_page_nosync(vma, uaddr);
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish(vma->vm_mm);
 }
 
 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
@@ -532,7 +532,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 {
 	__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
 				 last_level, tlb_level);
-	__tlbi_sync_s1ish();
+	__tlbi_sync_s1ish(vma->vm_mm);
 }
 
 static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index b9d4998c97ef..03fde2677d5b 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
 			 * We pick the reserved-ASID to minimise the impact.
 			 */
 			__tlbi(aside1is, __TLBI_VADDR(0, 0));
-			__tlbi_sync_s1ish();
+			__tlbi_sync_s1ish(current->mm);
 		}
 
 		ret = caches_clean_inval_user_pou(start, start + chunk);


^ permalink raw reply related

* [PATCH v5 3/4] arm64: cputype: Add C1-Pro definitions
From: Catalin Marinas @ 2026-04-07 10:28 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: Will Deacon, James Morse, Mark Rutland, Mark Brown
In-Reply-To: <20260407102848.2266988-1-catalin.marinas@arm.com>

Add cputype definitions for C1-Pro. These will be used for errata
detection in subsequent patches.

These values can be found in "Table A-303: MIDR_EL1 bit descriptions" in
issue 07 of the C1-Pro TRM:

  https://documentation-service.arm.com/static/6930126730f8f55a656570af

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/cputype.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 08860d482e60..7b518e81dd15 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -98,6 +98,7 @@
 #define ARM_CPU_PART_CORTEX_A725	0xD87
 #define ARM_CPU_PART_CORTEX_A720AE	0xD89
 #define ARM_CPU_PART_NEOVERSE_N3	0xD8E
+#define ARM_CPU_PART_C1_PRO		0xD8B
 
 #define APM_CPU_PART_XGENE		0x000
 #define APM_CPU_VAR_POTENZA		0x00
@@ -189,6 +190,7 @@
 #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
 #define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE)
 #define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
+#define MIDR_C1_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_C1_PRO)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)


^ permalink raw reply related

* [PATCH v5 4/4] arm64: errata: Work around early CME DVMSync acknowledgement
From: Catalin Marinas @ 2026-04-07 10:28 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: Will Deacon, James Morse, Mark Rutland, Mark Brown
In-Reply-To: <20260407102848.2266988-1-catalin.marinas@arm.com>

C1-Pro acknowledges DVMSync messages before completing the SME/CME
memory accesses. Work around this by issuing an IPI to the affected CPUs
if they are running in EL0 with SME enabled.

Note that we avoid the local DSB in the IPI handler as the kernel runs
with SCTLR_EL1.IESB=1. This is sufficient to complete SME memory
accesses at EL0 on taking an exception to EL1. On the return to user
path, no barrier is necessary either. See the comment in
sme_set_active() and the more detailed explanation in the link below.

To avoid a potential IPI flood from malicious applications (e.g.
madvise(MADV_PAGEOUT) in a tight loop), track where a process is active
via mm_cpumask() and only interrupt those CPUs.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/ablEXwhfKyJW1i7l@J2N7QTR9R3
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Brown <broonie@kernel.org>
---
 Documentation/arch/arm64/silicon-errata.rst |  2 +
 arch/arm64/Kconfig                          | 12 ++++
 arch/arm64/include/asm/cpucaps.h            |  2 +
 arch/arm64/include/asm/fpsimd.h             | 21 ++++++
 arch/arm64/include/asm/tlbbatch.h           | 10 ++-
 arch/arm64/include/asm/tlbflush.h           | 72 ++++++++++++++++++-
 arch/arm64/kernel/cpu_errata.c              | 30 ++++++++
 arch/arm64/kernel/entry-common.c            |  3 +
 arch/arm64/kernel/fpsimd.c                  | 79 +++++++++++++++++++++
 arch/arm64/kernel/process.c                 | 36 ++++++++++
 arch/arm64/tools/cpucaps                    |  1 +
 11 files changed, 264 insertions(+), 4 deletions(-)

diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 4c300caad901..282ad4257983 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -202,6 +202,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-V3AE   | #3312417        | ARM64_ERRATUM_3194386       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | C1-Pro          | #4193714        | ARM64_ERRATUM_4193714       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-500         | #841119,826419  | ARM_SMMU_MMU_500_CPRE_ERRATA|
 |                |                 | #562869,1047329 |                             |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 38dba5f7e4d2..9b419f1a9ae6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1175,6 +1175,18 @@ config ARM64_ERRATUM_4311569
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_4193714
+	bool "C1-Pro: 4193714: SME DVMSync early acknowledgement"
+	depends on ARM64_SME
+	default y
+	help
+	  Enable workaround for C1-Pro acknowledging the DVMSync before
+	  the SME memory accesses are complete. This will cause TLB
+	  maintenance for processes using SME to also issue an IPI to
+	  the affected CPUs.
+
+	  If unsure, say Y.
+
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 177c691914f8..0b1b78a4c03e 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -64,6 +64,8 @@ cpucap_is_possible(const unsigned int cap)
 		return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
 	case ARM64_WORKAROUND_SPECULATIVE_SSBS:
 		return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
+	case ARM64_WORKAROUND_4193714:
+		return IS_ENABLED(CONFIG_ARM64_ERRATUM_4193714);
 	case ARM64_MPAM:
 		/*
 		 * KVM MPAM support doesn't rely on the host kernel supporting MPAM.
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 1d2e33559bd5..d9d00b45ab11 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -428,6 +428,24 @@ static inline size_t sme_state_size(struct task_struct const *task)
 	return __sme_state_size(task_get_sme_vl(task));
 }
 
+void sme_enable_dvmsync(void);
+void sme_set_active(void);
+void sme_clear_active(void);
+
+static inline void sme_enter_from_user_mode(void)
+{
+	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
+	    test_thread_flag(TIF_SME))
+		sme_clear_active();
+}
+
+static inline void sme_exit_to_user_mode(void)
+{
+	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
+	    test_thread_flag(TIF_SME))
+		sme_set_active();
+}
+
 #else
 
 static inline void sme_user_disable(void) { BUILD_BUG(); }
@@ -456,6 +474,9 @@ static inline size_t sme_state_size(struct task_struct const *task)
 	return 0;
 }
 
+static inline void sme_enter_from_user_mode(void) { }
+static inline void sme_exit_to_user_mode(void) { }
+
 #endif /* ! CONFIG_ARM64_SME */
 
 /* For use by EFI runtime services calls only */
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h
index fedb0b87b8db..6297631532e5 100644
--- a/arch/arm64/include/asm/tlbbatch.h
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -2,11 +2,17 @@
 #ifndef _ARCH_ARM64_TLBBATCH_H
 #define _ARCH_ARM64_TLBBATCH_H
 
+#include <linux/cpumask.h>
+
 struct arch_tlbflush_unmap_batch {
+#ifdef CONFIG_ARM64_ERRATUM_4193714
 	/*
-	 * For arm64, HW can do tlb shootdown, so we don't
-	 * need to record cpumask for sending IPI
+	 * Track CPUs that need SME DVMSync on completion of this batch.
+	 * Otherwise, the arm64 HW can do tlb shootdown, so we don't need to
+	 * record cpumask for sending IPI
 	 */
+	cpumask_var_t cpumask;
+#endif
 };
 
 #endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 262791191935..4aae42b83049 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -80,6 +80,71 @@ static inline unsigned long get_trans_granule(void)
 	}
 }
 
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+void sme_do_dvmsync(const struct cpumask *mask);
+
+static inline void sme_dvmsync(struct mm_struct *mm)
+{
+	if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+		return;
+
+	sme_do_dvmsync(mm_cpumask(mm));
+}
+
+static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					   struct mm_struct *mm)
+{
+	if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+		return;
+
+	/*
+	 * Order the mm_cpumask() read after the hardware DVMSync.
+	 */
+	dsb(ish);
+	if (cpumask_empty(mm_cpumask(mm)))
+		return;
+
+	/*
+	 * Allocate the batch cpumask on first use. Fall back to an immediate
+	 * IPI for this mm in case of failure.
+	 */
+	if (!cpumask_available(batch->cpumask) &&
+	    !zalloc_cpumask_var(&batch->cpumask, GFP_ATOMIC)) {
+		sme_do_dvmsync(mm_cpumask(mm));
+		return;
+	}
+
+	cpumask_or(batch->cpumask, batch->cpumask, mm_cpumask(mm));
+}
+
+static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
+{
+	if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+		return;
+
+	if (!cpumask_available(batch->cpumask))
+		return;
+
+	sme_do_dvmsync(batch->cpumask);
+	cpumask_clear(batch->cpumask);
+}
+
+#else
+
+static inline void sme_dvmsync(struct mm_struct *mm)
+{
+}
+static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					   struct mm_struct *mm)
+{
+}
+static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
+{
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
 /*
  * Level-based TLBI operations.
  *
@@ -189,12 +254,14 @@ static inline void __tlbi_sync_s1ish(struct mm_struct *mm)
 {
 	dsb(ish);
 	__repeat_tlbi_sync(vale1is, 0);
+	sme_dvmsync(mm);
 }
 
-static inline void __tlbi_sync_s1ish_batch(void)
+static inline void __tlbi_sync_s1ish_batch(struct arch_tlbflush_unmap_batch *batch)
 {
 	dsb(ish);
 	__repeat_tlbi_sync(vale1is, 0);
+	sme_dvmsync_batch(batch);
 }
 
 static inline void __tlbi_sync_s1ish_kernel(void)
@@ -397,7 +464,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
  */
 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
-	__tlbi_sync_s1ish_batch();
+	__tlbi_sync_s1ish_batch(batch);
 }
 
 /*
@@ -602,6 +669,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
 		struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
+	sme_dvmsync_add_pending(batch, mm);
 }
 
 static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 5c0ab6bfd44a..5377e4c2eba2 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -11,6 +11,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 #include <asm/kvm_asm.h>
 #include <asm/smp_plat.h>
 
@@ -575,6 +576,23 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
 };
 #endif
 
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+static bool has_sme_dvmsync_erratum(const struct arm64_cpu_capabilities *entry,
+				    int scope)
+{
+	if (!id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)))
+		return false;
+
+	return is_affected_midr_range(entry, scope);
+}
+
+static void cpu_enable_sme_dvmsync(const struct arm64_cpu_capabilities *__unused)
+{
+	if (this_cpu_has_cap(ARM64_WORKAROUND_4193714))
+		sme_enable_dvmsync();
+}
+#endif
+
 #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
 static const struct midr_range erratum_ac03_cpu_38_list[] = {
 	MIDR_ALL_VERSIONS(MIDR_AMPERE1),
@@ -901,6 +919,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.matches = need_arm_si_l1_workaround_4311569,
 	},
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+	{
+		.desc = "C1-Pro SME DVMSync early acknowledgement",
+		.capability = ARM64_WORKAROUND_4193714,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = has_sme_dvmsync_erratum,
+		.cpu_enable = cpu_enable_sme_dvmsync,
+		/* C1-Pro r0p0 - r1p2 (the latter only when REVIDR_EL1[0]==0) */
+		.midr_range = MIDR_RANGE(MIDR_C1_PRO, 0, 0, 1, 2),
+		MIDR_FIXED(MIDR_CPU_VAR_REV(1, 2), BIT(0)),
+	},
+#endif
 #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
 	{
 		.desc = "ARM errata 2966298, 3117295",
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3625797e9ee8..fb1e374af622 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -21,6 +21,7 @@
 #include <asm/daifflags.h>
 #include <asm/esr.h>
 #include <asm/exception.h>
+#include <asm/fpsimd.h>
 #include <asm/irq_regs.h>
 #include <asm/kprobes.h>
 #include <asm/mmu.h>
@@ -67,6 +68,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);
 	mte_disable_tco_entry(current);
+	sme_enter_from_user_mode();
 }
 
 /*
@@ -80,6 +82,7 @@ static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
 	local_irq_disable();
 	exit_to_user_mode_prepare_legacy(regs);
 	local_daif_mask();
+	sme_exit_to_user_mode();
 	mte_check_tfsr_exit();
 	exit_to_user_mode();
 }
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 9de1d8a604cb..60a45d600b46 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -15,6 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
+#include <linux/cpumask.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/linkage.h>
@@ -28,6 +29,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
 #include <linux/swab.h>
@@ -1358,6 +1360,83 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
 	put_cpu_fpsimd_context();
 }
 
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+/*
+ * SME/CME erratum handling.
+ */
+static cpumask_t sme_dvmsync_cpus;
+
+/*
+ * These helpers are only called from non-preemptible contexts, so
+ * smp_processor_id() is safe here.
+ */
+void sme_set_active(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
+		return;
+
+	cpumask_set_cpu(cpu, mm_cpumask(current->mm));
+
+	/*
+	 * A subsequent (post ERET) SME access may use a stale address
+	 * translation. On C1-Pro, a TLBI+DSB on a different CPU will wait for
+	 * the completion of cpumask_set_cpu() above as it appears in program
+	 * order before the SME access. The post-TLBI+DSB read of mm_cpumask()
+	 * will lead to the IPI being issued.
+	 *
+	 * https://lore.kernel.org/r/ablEXwhfKyJW1i7l@J2N7QTR9R3
+	 */
+}
+
+void sme_clear_active(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
+		return;
+
+	/*
+	 * With SCTLR_EL1.IESB enabled, the SME memory transactions are
+	 * completed on entering EL1.
+	 */
+	cpumask_clear_cpu(cpu, mm_cpumask(current->mm));
+}
+
+static void sme_dvmsync_ipi(void *unused)
+{
+	/*
+	 * With SCTLR_EL1.IESB on, taking an exception is sufficient to ensure
+	 * the completion of the SME memory accesses, so no need for an
+	 * explicit DSB.
+	 */
+}
+
+void sme_do_dvmsync(const struct cpumask *mask)
+{
+	/*
+	 * This is called from the TLB maintenance functions after the DSB ISH
+	 * to send the hardware DVMSync message. If this CPU sees the mask as
+	 * empty, the remote CPU executing sme_set_active() would have seen
+	 * the DVMSync and no IPI required.
+	 */
+	if (cpumask_empty(mask))
+		return;
+
+	preempt_disable();
+	smp_call_function_many(mask, sme_dvmsync_ipi, NULL, true);
+	preempt_enable();
+}
+
+void sme_enable_dvmsync(void)
+{
+	cpumask_set_cpu(smp_processor_id(), &sme_dvmsync_cpus);
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
 /*
  * Trapped SME access
  *
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 489554931231..4c328b7c79ba 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/reboot.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/elfcore.h>
 #include <linux/pm.h>
@@ -339,8 +340,41 @@ void flush_thread(void)
 	flush_gcs();
 }
 
+#ifdef CONFIG_ARM64_ERRATUM_4193714
+
+static void arch_dup_tlbbatch_mask(struct task_struct *dst)
+{
+	/*
+	 * Clear the inherited cpumask with memset() to cover both cases where
+	 * cpumask_var_t is a pointer or an array. It will be allocated lazily
+	 * in sme_dvmsync_add_pending() if CPUMASK_OFFSTACK=y.
+	 */
+	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+		memset(&dst->tlb_ubc.arch.cpumask, 0,
+		       sizeof(dst->tlb_ubc.arch.cpumask));
+}
+
+static void arch_release_tlbbatch_mask(struct task_struct *tsk)
+{
+	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
+		free_cpumask_var(tsk->tlb_ubc.arch.cpumask);
+}
+
+#else
+
+static void arch_dup_tlbbatch_mask(struct task_struct *dst)
+{
+}
+
+static void arch_release_tlbbatch_mask(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_ARM64_ERRATUM_4193714 */
+
 void arch_release_task_struct(struct task_struct *tsk)
 {
+	arch_release_tlbbatch_mask(tsk);
 	fpsimd_release_task(tsk);
 }
 
@@ -356,6 +390,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
 	*dst = *src;
 
+	arch_dup_tlbbatch_mask(dst);
+
 	/*
 	 * Drop stale reference to src's sve_state and convert dst to
 	 * non-streaming FPSIMD mode.
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 7261553b644b..8946be60a409 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -105,6 +105,7 @@ WORKAROUND_2077057
 WORKAROUND_2457168
 WORKAROUND_2645198
 WORKAROUND_2658417
+WORKAROUND_4193714
 WORKAROUND_4311569
 WORKAROUND_AMPERE_AC03_CPU_38
 WORKAROUND_AMPERE_AC04_CPU_23


^ permalink raw reply related

* Re: [PATCH] coresight: tpdm: fix invalid MMIO access issue
From: Leo Yan @ 2026-04-07 10:35 UTC (permalink / raw)
  To: Jie Gan
  Cc: Suzuki K Poulose, Mike Leach, James Clark, Alexander Shishkin,
	Tingwei Zhang, coresight, linux-arm-kernel, linux-kernel
In-Reply-To: <0a4911f4-168b-48c4-9554-3962d6e96efa@oss.qualcomm.com>

Hi Jie,

On Tue, Apr 07, 2026 at 04:33:22PM +0800, Jie Gan wrote:
> On 4/7/2026 4:10 PM, Leo Yan wrote:
> > On Tue, Apr 07, 2026 at 12:47:11PM +0800, Jie Gan wrote:
> > > Create the csdev_access struct only when a valid MMIO resource is
> > > available. In tpdm_probe(), base is uninitialized for static TPDM
> > > instances that lack an MMIO resource, causing csdev_access to be
> > > created with a garbage address and potentially leading to
> > > unexpected issues.
> > 
> > This patch itself is fine for me.  However, I am wandering if this
> > is sufficient.
> > 
> > As mentioned "potentially leading to unexpected issues", can I
> > understand some code pieces access register with uninitialized base?
> > If so, you would also explictly add coresight_is_static_tpdm() to
> > prevent register access.
> > 
> 
> Actually, we havent MMIO access for the static TPDM device, So no issues are
> observed. The commit message here may be misleading. do I need rephrase the
> commit message?

Yes, good to clarify a bit in commit log:

  "So far there has no register access for static instance, but this
   change helps mitigate potential risks in the future."

With this:

Reviewed-by: Leo Yan <leo.yan@arm.com>


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox