Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 4/8] ARM: zte: Add support for zx29 low level debug
From: Stefan Dösinger @ 2026-04-16 20:19 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Russell King, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Arnd Bergmann,
	Krzysztof Kozlowski, Alexandre Belloni, Linus Walleij,
	Drew Fustini, Greg Kroah-Hartman, Jiri Slaby
  Cc: linux-doc, linux-kernel, linux-arm-kernel, devicetree, soc,
	linux-serial, Stefan Dösinger
In-Reply-To: <20260416-send-v4-0-e19d02b944ec@gmail.com>

This is based on the removed zx29 code. A separate (more complicated)
patch will re-add the register map to the pl011 serial driver.

Signed-off-by: Stefan Dösinger <stefandoesinger@gmail.com>

---

I am unsure about the virtual address. It doesn't seem to matter, as
long as it is a valid address. This address is based on the old removed
code. Is there a rule-of-thumb physical to virtual mapping I can use to
give a sensible default value?
---
 arch/arm/Kconfig.debug         | 12 ++++++++++++
 arch/arm/include/debug/pl01x.S |  7 +++++++
 2 files changed, 19 insertions(+)

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 366f162e147d..98d8a5a60048 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1331,6 +1331,16 @@ choice
 		  This option selects UART0 on VIA/Wondermedia System-on-a-chip
 		  devices, including VT8500, WM8505, WM8650 and WM8850.
 
+	config DEBUG_ZTE_ZX
+		bool "Kernel low-level debugging via zx29 UART"
+		select DEBUG_UART_PL01X
+		depends on ARCH_ZTE
+		help
+		  Say Y here if you are enabling ZTE zx297520v3 SOC and need
+		  debug UART support. This UART is a PL011 with different
+		  register addresses. The UART for boot messages on zx29 boards
+		  is usually UART1 and is operating at 921600 8N1.
+
 	config DEBUG_ZYNQ_UART0
 		bool "Kernel low-level debugging on Xilinx Zynq using UART0"
 		depends on ARCH_ZYNQ
@@ -1545,6 +1555,7 @@ config DEBUG_UART_8250
 
 config DEBUG_UART_PHYS
 	hex "Physical base address of debug UART"
+	default 0x01408000 if DEBUG_ZTE_ZX
 	default 0x01c28000 if DEBUG_SUNXI_UART0
 	default 0x01c28400 if DEBUG_SUNXI_UART1
 	default 0x01d0c000 if DEBUG_DAVINCI_DA8XX_UART1
@@ -1701,6 +1712,7 @@ config DEBUG_UART_VIRT
 	default 0xf31004c0 if DEBUG_MESON_UARTAO
 	default 0xf4090000 if DEBUG_LPC32XX
 	default 0xf4200000 if DEBUG_GEMINI
+	default 0xf4708000 if DEBUG_ZTE_ZX
 	default 0xf6200000 if DEBUG_PXA_UART1
 	default 0xf7000000 if DEBUG_SUN9I_UART0
 	default 0xf7000000 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART0
diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
index c7e02d0628bf..0c7bfa4c10db 100644
--- a/arch/arm/include/debug/pl01x.S
+++ b/arch/arm/include/debug/pl01x.S
@@ -8,6 +8,13 @@
 */
 #include <linux/amba/serial.h>
 
+#ifdef CONFIG_DEBUG_ZTE_ZX
+#undef UART01x_DR
+#undef UART01x_FR
+#define UART01x_DR     0x04
+#define UART01x_FR     0x14
+#endif
+
 #ifdef CONFIG_DEBUG_UART_PHYS
 		.macro	addruart, rp, rv, tmp
 		ldr	\rp, =CONFIG_DEBUG_UART_PHYS

-- 
2.52.0



^ permalink raw reply related

* [PATCH v4 3/8] ARM: dts: Add D-Link DWR-932M support
From: Stefan Dösinger @ 2026-04-16 20:19 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Russell King, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Arnd Bergmann,
	Krzysztof Kozlowski, Alexandre Belloni, Linus Walleij,
	Drew Fustini, Greg Kroah-Hartman, Jiri Slaby
  Cc: linux-doc, linux-kernel, linux-arm-kernel, devicetree, soc,
	linux-serial, Stefan Dösinger
In-Reply-To: <20260416-send-v4-0-e19d02b944ec@gmail.com>

This adds DT bindings for zx297520v3 and one board that consumes it.

Signed-off-by: Stefan Dösinger <stefandoesinger@gmail.com>
---
 MAINTAINERS                              |  1 +
 arch/arm/boot/dts/Makefile               |  1 +
 arch/arm/boot/dts/zte/Makefile           |  3 +++
 arch/arm/boot/dts/zte/dlink-dwr-932m.dts | 21 ++++++++++++++++++
 arch/arm/boot/dts/zte/zx297520v3.dtsi    | 37 ++++++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bcade90ca14e..f7ca0d478e81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -29203,6 +29203,7 @@ F:	tools/testing/selftests/cgroup/test_zswap.c
 ZX29
 M:	Stefan Dösinger <stefandoesinger@gmail.com>
 F:	Documentation/devicetree/bindings/arm/zte.yaml
+F:	arch/arm/boot/dts/zte
 F:	arch/arm/mach-zte/
 
 SENARYTECH AUDIO CODEC DRIVER
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index efe38eb25301..28fba538d552 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -39,3 +39,4 @@ subdir-y += unisoc
 subdir-y += vt8500
 subdir-y += xen
 subdir-y += xilinx
+subdir-y += zte
diff --git a/arch/arm/boot/dts/zte/Makefile b/arch/arm/boot/dts/zte/Makefile
new file mode 100644
index 000000000000..416c24a489cd
--- /dev/null
+++ b/arch/arm/boot/dts/zte/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_ZX297520V3) += \
+	dlink-dwr-932m.dtb
diff --git a/arch/arm/boot/dts/zte/dlink-dwr-932m.dts b/arch/arm/boot/dts/zte/dlink-dwr-932m.dts
new file mode 100644
index 000000000000..7b2a26aaaecb
--- /dev/null
+++ b/arch/arm/boot/dts/zte/dlink-dwr-932m.dts
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * D-Link DWR-932M Board
+ *
+ * (C) Copyright 2026 Stefan Dösinger
+ *
+ */
+
+/dts-v1/;
+
+#include "zx297520v3.dtsi"
+
+/ {
+	model = "D-Link DWR-932M";
+	compatible = "dlink,dwr932m", "zte,zx297520v3";
+
+	memory@20000000 {
+		device_type = "memory";
+		reg = <0x20000000 0x04000000>;
+	};
+};
diff --git a/arch/arm/boot/dts/zte/zx297520v3.dtsi b/arch/arm/boot/dts/zte/zx297520v3.dtsi
new file mode 100644
index 000000000000..d6c71d52b26c
--- /dev/null
+++ b/arch/arm/boot/dts/zte/zx297520v3.dtsi
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0>;
+		};
+	};
+
+	soc {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		interrupt-parent = <&gic>;
+		ranges;
+
+		gic: interrupt-controller@f2000000 {
+			compatible = "arm,gic-v3";
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0xf2000000 0x10000>,
+			      <0xf2040000 0x20000>;
+		};
+	};
+};

-- 
2.52.0



^ permalink raw reply related

* [PATCH v4 2/8] dt-bindings: arm: Add zx297520v3 board binding
From: Stefan Dösinger @ 2026-04-16 20:19 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Russell King, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Arnd Bergmann,
	Krzysztof Kozlowski, Alexandre Belloni, Linus Walleij,
	Drew Fustini, Greg Kroah-Hartman, Jiri Slaby
  Cc: linux-doc, linux-kernel, linux-arm-kernel, devicetree, soc,
	linux-serial, Stefan Dösinger
In-Reply-To: <20260416-send-v4-0-e19d02b944ec@gmail.com>

Add a compatible for boards based on the ZTE zx297520v3 SoC.

Signed-off-by: Stefan Dösinger <stefandoesinger@gmail.com>

---

The list of devices is the devices I have access to for testing. There
are many more devices based on this board and it is not always easy to
identify them. Often they are sold without any branding ("4G home
router") or with mobile carrier branding.
---
 Documentation/devicetree/bindings/arm/zte.yaml | 25 +++++++++++++++++++++++++
 MAINTAINERS                                    |  1 +
 2 files changed, 26 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/zte.yaml b/Documentation/devicetree/bindings/arm/zte.yaml
new file mode 100644
index 000000000000..6eba09edd2c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/zte.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/zte.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ZTE zx29
+
+maintainers:
+  - Stefan Dösinger <stefandoesinger@gmail.com>
+
+properties:
+  $nodename:
+    const: "/"
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+            - dlink,dwr932m
+            - hgsd,r310
+            - tecno,tr118
+            - zte,k10
+          - const: zte,zx297520v3
+
+additionalProperties: true
diff --git a/MAINTAINERS b/MAINTAINERS
index 974d7a98956a..bcade90ca14e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -29202,6 +29202,7 @@ F:	tools/testing/selftests/cgroup/test_zswap.c
 
 ZX29
 M:	Stefan Dösinger <stefandoesinger@gmail.com>
+F:	Documentation/devicetree/bindings/arm/zte.yaml
 F:	arch/arm/mach-zte/
 
 SENARYTECH AUDIO CODEC DRIVER

-- 
2.52.0



^ permalink raw reply related

* [PATCH v4 1/8] ARM: zte: Add zx297520v3 platform support
From: Stefan Dösinger @ 2026-04-16 20:19 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Russell King, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Arnd Bergmann,
	Krzysztof Kozlowski, Alexandre Belloni, Linus Walleij,
	Drew Fustini, Greg Kroah-Hartman, Jiri Slaby
  Cc: linux-doc, linux-kernel, linux-arm-kernel, devicetree, soc,
	linux-serial, Stefan Dösinger
In-Reply-To: <20260416-send-v4-0-e19d02b944ec@gmail.com>

This SoC is used in low end LTE-to-WiFi routers, for example some D-Link
DWR 932 revisions, ZTE K10, ZLT S10 4G, but also models that are branded
and sold by ISPs themselves. They are widespread in Africa, China,
Russia and Eastern Europe.

This SoC is a relative of the zx296702 and zx296718 that had some
upstream support until commit 89d4f98ae90d ("ARM: remove zte zx
platform"). My eventual goal is to enable OpenWRT to run on these
devices.

Signed-off-by: Stefan Dösinger <stefandoesinger@gmail.com>
---
 Documentation/arch/arm/zte/zx297520v3.rst | 158 ++++++++++++++++++++++++++++++
 MAINTAINERS                               |   4 +
 arch/arm/Kconfig                          |   2 +
 arch/arm/Makefile                         |   1 +
 arch/arm/mach-zte/Kconfig                 |  24 +++++
 arch/arm/mach-zte/Makefile                |   2 +
 arch/arm/mach-zte/zx297520v3.c            |  19 ++++
 7 files changed, 210 insertions(+)

diff --git a/Documentation/arch/arm/zte/zx297520v3.rst b/Documentation/arch/arm/zte/zx297520v3.rst
new file mode 100644
index 000000000000..a0f25ade0a3d
--- /dev/null
+++ b/Documentation/arch/arm/zte/zx297520v3.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+Booting Linux on ZTE zx297520v3 SoCs
+====================================
+
+...............................................................................
+
+Author:	Stefan Dösinger
+
+Date  : 27 Jan 2026
+
+1. Hardware description
+---------------------------
+Zx297520v3 SoCs use a 64 bit capable Cortex-A53 CPU and GICv3, although they
+run in aarch32 mode only. The CPU has support EL3, but no hypervisor (EL2) and
+it seems to lack VFP and NEON.
+
+The SoC is used in a number of cheap LTE to Wifi routers, both battery powered
+MiFis and stationary CPEs. In addition to the CPU these devices usually have
+64 MB Ram (although some is shared with the LTE chip), 128 MB NAND flash, an
+SDIO connected RTL8192-type Wifi chip limited to 2.4 ghz operation, USB 2,
+and buttons. Devices with as low as 32 MB or as high as 128 MB ram exist, as
+do devices with 8 or 16 MB of NOR flash.
+
+Some devices, especially the stationary ones, have 100 mbit Ethernet and an
+Ethernet switch.
+
+Usually the devices have LEDs for status indication, although some have SPI or
+i2c connected displays
+
+Some have an SD card slot. If it exists, it is a better choice for the root
+file system because it easily outperforms the built-in NAND.
+
+The LTE interface runs on a separate DSP called ZSP880. It is probably derived
+from LSI ZSPs and has an undocumented instruction set. The ZSP communicates
+with the main CPU via SRAM and DRAM and a mailbox hardware that can generate
+IRQs on either ends.
+
+There is also a Cortex M0 CPU, which is responsible for early HW initialization
+and starting the Cortex A53 CPU. It does not have any essential purpose once
+U-Boot is started. A SRAM-Based handover protocol exists to run custom code on
+this CPU.
+
+2. Booting via USB
+---------------------------
+
+The Boot ROM has support for booting custom code via USB. This mode can be
+entered by connecting a Boot PIN to GND or by modifying the third byte on NAND
+(set it to anything other than 0x5A aka 'Z'). A free software tool to start
+custom uboot and kernels can be found here:
+
+https://github.com/zx297520v3-mainline/zx297520v3-loader
+
+If USB download mode is entered but no boot commands are sent through USB, the
+device will proceed to boot normally after a few seconds. It is therefore
+possible to enable USB boot permanently and still leave the default boot files
+in place.
+
+3. Building for built-in U-Boot
+---------------------------
+The devices come with an ancient U-Boot that loads legacy uImages from NAND and
+boots them without a chance for the user to interrupt. The images are stored in
+files ap_cpuap.bin and ap_recovery.bin on a jffs2 partition named imagefs,
+usually mtd4. A file named "fotaflag" switches between the two modes.
+
+In addition to the uImage header, those files have a 384 byte signature header,
+which is used for authenticating the images on some devices. Most devices have
+this authentication disabled and it is enough to pad the uImage files with 384
+zero bytes.
+
+Builtin U-Boot also poorly sets up the CPU. Read the next section for details
+on this. It has no support for loading DTBs, so CONFIG_ARM_APPENDED_DTB is
+needed.
+
+So to build an image that boots from NAND the following steps are necessary:
+
+1) Patch the assembly code from section 3 into arch/arm/kernel/head.S.
+2) make zx29_defconfig
+3) make [-j x]
+4) cat arch/arm/boot/zImage arch/arm/boot/dts/zte/[device].dtb > kernel+dtb
+5) mkimage -A arm -O linux -T kernel -C none -a 0x20008000 -d kernel+dtb uimg
+6) dd if=/dev/zero bs=1 count=384 of=ap_recovery.bin
+7) cat uimg >> ap_recovery.bin
+8) Place this file onto imagefs on the device. Delete ap_cpuap.bin if the
+free space is not enough.
+9) Create the file fotaflag: echo -n FOTA-RECOVERY > fotaflag
+
+For development, booting ap_recovery.bin is recommended because the normal boot
+mode arms the watchdog before starting the kernel.
+
+4. CPU and GIC Setup
+---------------------------
+
+Generally CPU and GICv3 need to be set up according to the requirements spelled
+out in Documentation/arch/arm64/booting.rst. For zx297520v3 this means:
+
+1. GICD_CTLR.DS=1 to disable GIC security
+2. Enable access to ICC_SRE
+3. Disable trapping IRQs into monitor mode
+4. Configure EL2 and below to run in insecure mode.
+5. Configure timer PPIs to active-low.
+
+The kernel sources provided by ZTE do not boot either (interrupts do not work
+at all). They are incomplete in other aspects too, so it is assumed that there
+is some workaround similar to the one described in this document somewhere in
+the binary blobs.
+
+The assembly code below is given as an example of how to achieve this:
+
+```
+#include <linux/irqchip/arm-gic-v3.h>
+#include <asm/assembler.h>
+#include <asm/cp15.h>
+
+@ This allows EL1 to handle ints hat are normally handled by EL2/3.
+ldr     r3, =0xf2000000
+ldr     r4, =#(GICD_CTLR_ARE_NS | GICD_CTLR_DS)
+str     r4, [r3]
+
+cps     #MON_MODE
+
+@ Work in non-secure physical address space: SCR_EL3.NS = 1. At least the UART
+@ seems to respond only to non-secure addresses. I have taken insipiration from
+@ Raspberry pi's armstub7.S here.
+@
+@ ARM docs say modify this bit in monitor mode only...
+mov	r3, #0x131			@ non-secure, Make F, A bits in CPSR writeable
+					@ Allow hypervisor call.
+mcr     p15, 0, r3, c1, c1, 0
+
+@ AP_PPI_MODE_REG: Configure timer PPIs (10, 11, 13, 14) to active-low.
+ldr	r3, =0xF22020a8
+ldr	r4, =0x50
+str	r4, [r3]
+ldr	r3, =0xF22020ac
+ldr	r4, =0x14
+str	r4, [r3]
+
+@ Enable EL2 access to ICC_SRE (bit 3, ICC_SRE_EL3.Enable). Enable system reg
+@ access to GICv3 registers (bit 0, ICC_SRE_EL3.SRE) for EL1 and EL3.
+mrc     p15, 6, r3, c12, c12, 5         @ ICC_SRE_EL3
+orr     r3, #0x9                        @ FIXME: No defines for SRE_EL3 values?
+mcr     p15, 6, r3, c12, c12, 5
+mrc     p15, 0, r3, c12, c12, 5         @ ICC_SRE_EL1
+orr     r3, #(ICC_SRE_EL1_SRE)
+mcr     p15, 0, r3, c12, c12, 5
+
+@ Like ICC_SRE_EL3, enable EL1 access to ICC_SRE and system register access
+@ for EL2.
+mrc     p15, 4, r3, c12, c9, 5          @ ICC_SRE_EL2 aka ICC_HSRE
+orr     r3, r3, #(ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE)
+mcr     p15, 4, r3, c12, c9, 5
+isb
+
+@ Back to SVC mode. TODO: Doesn't safe_svcmode_maskall do this for us anyway?
+cps     #SVC_MODE
+```
diff --git a/MAINTAINERS b/MAINTAINERS
index d1cc0e12fe1f..974d7a98956a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -29200,6 +29200,10 @@ F:	include/linux/zswap.h
 F:	mm/zswap.c
 F:	tools/testing/selftests/cgroup/test_zswap.c
 
+ZX29
+M:	Stefan Dösinger <stefandoesinger@gmail.com>
+F:	arch/arm/mach-zte/
+
 SENARYTECH AUDIO CODEC DRIVER
 M:	bo liu <bo.liu@senarytech.com>
 S:	Maintained
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ec33376f8e2b..4217ed704e48 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -464,6 +464,8 @@ source "arch/arm/mach-versatile/Kconfig"
 
 source "arch/arm/mach-vt8500/Kconfig"
 
+source "arch/arm/mach-zte/Kconfig"
+
 source "arch/arm/mach-zynq/Kconfig"
 
 # ARMv7-M architecture
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index b7de4b6b284c..573813ef5e77 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -223,6 +223,7 @@ machine-$(CONFIG_ARCH_SUNXI)		+= sunxi
 machine-$(CONFIG_ARCH_TEGRA)		+= tegra
 machine-$(CONFIG_ARCH_U8500)		+= ux500
 machine-$(CONFIG_ARCH_VT8500)		+= vt8500
+machine-$(CONFIG_ARCH_ZTE)		+= zte
 machine-$(CONFIG_ARCH_ZYNQ)		+= zynq
 machine-$(CONFIG_PLAT_VERSATILE)	+= versatile
 machine-$(CONFIG_PLAT_SPEAR)		+= spear
diff --git a/arch/arm/mach-zte/Kconfig b/arch/arm/mach-zte/Kconfig
new file mode 100644
index 000000000000..24699256863b
--- /dev/null
+++ b/arch/arm/mach-zte/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig ARCH_ZTE
+	bool "ZTE zx family"
+	depends on ARCH_MULTI_V7
+	help
+	  Support for ZTE zx-based family of processors.
+
+if ARCH_ZTE
+
+config SOC_ZX297520V3
+	default y if ARCH_ZTE
+	bool "ZX297520v3"
+	select ARM_GIC_V3
+	select ARM_AMBA
+	select HAVE_ARM_ARCH_TIMER
+	select PM_GENERIC_DOMAINS if PM
+	help
+	  Support for ZTE zx297520v3 SoC. It a single core SoC used in cheap LTE to WiFi routers.
+	  These devices can be Identified by the occurrence of the string "zx297520v3" in the boot
+	  output and /proc/cpuinfo of their stock firmware.
+
+	  Please read Documentation/arch/arm/zte/zx297520v3.rst on how to boot the kernel.
+
+endif
diff --git a/arch/arm/mach-zte/Makefile b/arch/arm/mach-zte/Makefile
new file mode 100644
index 000000000000..1bfe4fddd6af
--- /dev/null
+++ b/arch/arm/mach-zte/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SOC_ZX297520V3) += zx297520v3.o
diff --git a/arch/arm/mach-zte/zx297520v3.c b/arch/arm/mach-zte/zx297520v3.c
new file mode 100644
index 000000000000..c11c7e836f91
--- /dev/null
+++ b/arch/arm/mach-zte/zx297520v3.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2026 Stefan Dösinger
+ */
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+static const char *const zx297520v3_dt_compat[] __initconst = {
+	"zte,zx297520v3",
+	NULL,
+};
+
+DT_MACHINE_START(ZX, "ZTE zx297520v3 (Device Tree)")
+	.dt_compat	= zx297520v3_dt_compat,
+MACHINE_END

-- 
2.52.0



^ permalink raw reply related

* [PATCH v4 0/8] Add support for ZTE zx297520v3
From: Stefan Dösinger @ 2026-04-16 20:19 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Russell King, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Arnd Bergmann,
	Krzysztof Kozlowski, Alexandre Belloni, Linus Walleij,
	Drew Fustini, Greg Kroah-Hartman, Jiri Slaby
  Cc: linux-doc, linux-kernel, linux-arm-kernel, devicetree, soc,
	linux-serial, Stefan Dösinger

Hi,

This is a follow-up on my RFC patches from January [0] for ZTE's 
zx297520v3 chipset. This chipset is popular in cheap LTE-to-wifi routers 
sold in developing countries. My goal is to run OpenWRT on them. I made 
more progress in more work on this SoC and it is time to get serious 
about code review and upstreaming.

Since my version in January I managed to get more hardware running: SPI, 
I2C, PMIC with real time clock and voltage regulators, Watchdog. LTE is 
not working yet, but I am able to start the coprocessor that handles it 
and talk to it via mailbox + shared memory. Wifi is working on a few 
more devices. Since WiFi, USB and Ethernet are working, the devices can 
have actual use with OpenWRT even without LTE.

Another hacker created a free software program to talk to the USB loader 
[1] and boot U-Boot and Linux without modifying the on disk files. At 
the moment it needs a proprietary blob, so my documentation is 
emphasising booting with the on-device U-Boot.

This patchset here is mostly unmodified from the version I sent in 
January. It is the bare minimum to get an interactive shell working on 
the UART. Future patches can be found on my git repository [2] for those 
curious to peek ahead. The first 30 patches are in reasonable shape, but 
the further you go the more cleanup is necessary. I expect all of the 
patches go require a few rounds of feedback though.

My plan for upstreaming is largly this:

1) This bare minimum boot patchset
2) Add clock and pinctrl drivers
3) Add standard hardware to the device tree
4) Add zx29 specific drivers one by one: Watchdog, spi, i2c, DMA, PMIC, 
battery
5) SDIO backend for rtl8xxxu
6) rproc, mailbox and rpmsg

I am willing to maintain support for the SoC within reason. My patches 
add myself as maintainer. This is a hobby project for me though, keep 
that in mind if you want to ship a commercial product with these SoCs 
and upstreaming Linux.

Cheers,
Stefan

0: https://lists.infradead.org/pipermail/linux-arm-kernel/2026-January/1099306.html
1: https://github.com/zx297520v3-mainline/zx297520v3-loader
2: https://gitlab.com/stefandoesinger/zx297520-kernel/

Patch changelog:

v4: rename zx29.yaml to zte.yaml and add board enums
v3: Remove [RFC] tag, add defconfig
v2: checkpatch.pl fixes

Signed-off-by: Stefan Dösinger <stefandoesinger@gmail.com>
---
Stefan Dösinger (8):
      ARM: zte: Add zx297520v3 platform support
      dt-bindings: arm: Add zx297520v3 board binding
      ARM: dts: Add D-Link DWR-932M support
      ARM: zte: Add support for zx29 low level debug
      ARM: dts: Add an armv7 timer for zx297520v3
      ARM: zte: Bring back zx29 UART support
      ARM: dts: Declare UART1 on zx297520v3 boards
      ARM: defconfig: Add a zx29 defconfig file

 Documentation/arch/arm/zte/zx297520v3.rst      | 158 +++++++++++++++++++++++++
 Documentation/devicetree/bindings/arm/zte.yaml |  25 ++++
 MAINTAINERS                                    |   6 +
 arch/arm/Kconfig                               |   2 +
 arch/arm/Kconfig.debug                         |  12 ++
 arch/arm/Makefile                              |   1 +
 arch/arm/boot/dts/Makefile                     |   1 +
 arch/arm/boot/dts/zte/Makefile                 |   3 +
 arch/arm/boot/dts/zte/dlink-dwr-932m.dts       |  21 ++++
 arch/arm/boot/dts/zte/zx297520v3.dtsi          |  83 +++++++++++++
 arch/arm/configs/zx29_defconfig                |  90 ++++++++++++++
 arch/arm/include/debug/pl01x.S                 |   7 ++
 arch/arm/mach-zte/Kconfig                      |  24 ++++
 arch/arm/mach-zte/Makefile                     |   2 +
 arch/arm/mach-zte/zx297520v3.c                 |  19 +++
 drivers/tty/serial/amba-pl011.c                |  37 ++++++
 include/linux/amba/bus.h                       |   6 +
 17 files changed, 497 insertions(+)
---
base-commit: 028ef9c96e96197026887c0f092424679298aae8
change-id: 20260416-send-5c08e095e5c9

Best regards,
-- 
Stefan Dösinger <stefandoesinger@gmail.com>



^ permalink raw reply

* Re: [PATCH net-next 5/6] net: stmmac: move PHY handling out of __stmmac_open()/release()
From: Jakub Kicinski @ 2026-04-16 20:16 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Alexander Stein, Andrew Lunn, Heiner Kallweit, Alexandre Torgue,
	Andrew Lunn, David S. Miller, Eric Dumazet, linux-arm-kernel,
	linux-stm32, Maxime Coquelin, netdev, Paolo Abeni
In-Reply-To: <aeE8mpXy9FRHvN9q@shell.armlinux.org.uk>

On Thu, 16 Apr 2026 20:46:34 +0100 Russell King (Oracle) wrote:
> On Thu, Apr 16, 2026 at 09:08:26AM -0700, Jakub Kicinski wrote:
> > On Thu, 16 Apr 2026 14:47:57 +0100 Russell King (Oracle) wrote:  
> > > The next problem will be netdev's policy over reviews vs patches
> > > balance which I'm already in deficit, and I have *NO* *TIME*
> > > what so ever to review patches - let alone propose patches to
> > > fix people's problems.
> > > 
> > > So I'm going to say this plainly: if netdev wants to enforce that
> > > rule, then I won't be fixing people's problems.
> > 
> > Do you have a better proposal?
> > I'm under the same pressure of million stupid projects from my employer
> > as you are. Do y'all think that upstream maintainers have time given by
> > their employers to do the reviews? SMH.  
> 
> Are you really under the same pressure? I have one of my parents in
> hospital right now, and was in A&E yesterday afternoon through into
> the evening. I've been down at the hospital since 2pm today, only
> just come back to feed the other parent and head back down for what
> could be a long night. Then there's supposed to be an appointment
> that will take up to 3 hours tomorrow morning...
> 
> Yea, I'm sure you have the same pressures and worry from your
> employer - except my pressures are medical, looking after my parents.
> 
> Thank you for your lack of understanding.

Not my point. Sorry to hear about the issues you're facing.

I don't think making vague complaints about the development process
is going to make anything better.


^ permalink raw reply

* Re: [PATCH net-next 5/6] net: stmmac: move PHY handling out of __stmmac_open()/release()
From: Russell King (Oracle) @ 2026-04-16 19:46 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Alexander Stein, Andrew Lunn, Heiner Kallweit, Alexandre Torgue,
	Andrew Lunn, David S. Miller, Eric Dumazet, linux-arm-kernel,
	linux-stm32, Maxime Coquelin, netdev, Paolo Abeni
In-Reply-To: <20260416090826.1c5ca018@kernel.org>

On Thu, Apr 16, 2026 at 09:08:26AM -0700, Jakub Kicinski wrote:
> On Thu, 16 Apr 2026 14:47:57 +0100 Russell King (Oracle) wrote:
> > The next problem will be netdev's policy over reviews vs patches
> > balance which I'm already in deficit, and I have *NO* *TIME*
> > what so ever to review patches - let alone propose patches to
> > fix people's problems.
> > 
> > So I'm going to say this plainly: if netdev wants to enforce that
> > rule, then I won't be fixing people's problems.
> 
> Do you have a better proposal?
> I'm under the same pressure of million stupid projects from my employer
> as you are. Do y'all think that upstream maintainers have time given by
> their employers to do the reviews? SMH.

Are you really under the same pressure? I have one of my parents in
hospital right now, and was in A&E yesterday afternoon through into
the evening. I've been down at the hospital since 2pm today, only
just come back to feed the other parent and head back down for what
could be a long night. Then there's supposed to be an appointment
that will take up to 3 hours tomorrow morning...

Yea, I'm sure you have the same pressures and worry from your
employer - except my pressures are medical, looking after my parents.

Thank you for your lack of understanding.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!


^ permalink raw reply

* Re: [patch 07/38] treewide: Consolidate cycles_t
From: Thomas Gleixner @ 2026-04-16 19:32 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP), LKML
  Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
	linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
	linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
	Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
	Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
	Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
	linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
	Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
	Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
	linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
	linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
	sparclinux
In-Reply-To: <0758843e-8f75-4c82-b9c0-25fab502e62f@kernel.org>

On Wed, Apr 15 2026 at 08:43, Christophe Leroy wrote:
>> -typedef unsigned long cycles_t;
>> -
>> -static inline cycles_t get_cycles(void)
>> +ostatic inline cycles_t get_cycles(void)
>
> What is 'ostatic' ?

That's a really good question :)


^ permalink raw reply

* Re: [patch 35/38] s390: Select ARCH_HAS_RANDOM_ENTROPY
From: Thomas Gleixner @ 2026-04-16 19:29 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: LKML, linux-s390, Arnd Bergmann, x86, Lu Baolu, iommu,
	Michael Grzeschik, netdev, linux-wireless, Herbert Xu,
	linux-crypto, Vlastimil Babka, linux-mm, David Woodhouse,
	Bernie Thompson, linux-fbdev, Theodore Tso, linux-ext4,
	Andrew Morton, Uladzislau Rezki, Marco Elver, Dmitry Vyukov,
	kasan-dev, Andrey Ryabinin, Thomas Sailer, linux-hams,
	Jason A. Donenfeld, Richard Henderson, linux-alpha, Russell King,
	linux-arm-kernel, Catalin Marinas, Huacai Chen, loongarch,
	Geert Uytterhoeven, linux-m68k, Dinh Nguyen, Jonas Bonn,
	linux-openrisc, Helge Deller, linux-parisc, Michael Ellerman,
	linuxppc-dev, Paul Walmsley, linux-riscv, David S. Miller,
	sparclinux
In-Reply-To: <20260416134238.9230Ba6-hca@linux.ibm.com>

On Thu, Apr 16 2026 at 15:42, Heiko Carstens wrote:
> On Fri, Apr 10, 2026 at 02:21:19PM +0200, Thomas Gleixner wrote:
>> The only remaining non-architecture usage of get_cycles() is to provide
>> random_get_entropy().
>> 
>> Switch s390 over to the new scheme of selecting ARCH_HAS_RANDOM_ENTROPY and
>> providing random_get_entropy() in asm/random.h.
>> 
>> Add 'asm/timex.h' includes to the relevant files, so the global include can
>> be removed once all architectures are converted over.
>> 
>> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
>> Cc: Heiko Carstens <hca@linux.ibm.com>
>> Cc: linux-s390@vger.kernel.org
>> ---
>>  arch/s390/Kconfig              |    1 +
>>  arch/s390/include/asm/random.h |   12 ++++++++++++
>>  arch/s390/include/asm/timex.h  |    6 ------
>>  arch/s390/kernel/time.c        |    1 +
>>  arch/s390/kernel/vtime.c       |    1 +
>>  5 files changed, 15 insertions(+), 6 deletions(-)
>
> Acked-by: Heiko Carstens <hca@linux.ibm.com>
>
> Thomas, would you mind adding the below as minor improvement to this
> series?

Sure. I'll respin it next week.


^ permalink raw reply

* Re: [PATCH ath-next] wifi: ath9k: owl: move name into owl_nvmem_probe
From: Toke Høiland-Jørgensen @ 2026-04-16 19:21 UTC (permalink / raw)
  To: Rosen Penev, linux-wireless
  Cc: Andreas Färber, Manivannan Sadhasivam,
	moderated list:ARM/ACTIONS SEMI ARCHITECTURE,
	moderated list:ARM/ACTIONS SEMI ARCHITECTURE, open list
In-Reply-To: <20260223224254.27081-1-rosenp@gmail.com>

Rosen Penev <rosenp@gmail.com> writes:

> There is no need for dynamic allocation for a simple string.
> request_firmware_nowait copies the string internally anyway.
>
> The error message on failure is also wrong. It's an allocation failure,
> not a find failure.
>
> Signed-off-by: Rosen Penev <rosenp@gmail.com>

Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>


^ permalink raw reply

* Re: [PATCH] MAINTAINERS: Move Peter De Schrijver to CREDITS
From: Aaro Koskinen @ 2026-04-16 18:13 UTC (permalink / raw)
  To: Thierry Reding
  Cc: linux-tegra, linux-arm-kernel, linux-pm, linux-omap, linux-kernel,
	Paul Walmsley, Geert Uytterhoeven
In-Reply-To: <20260416131810.3116408-1-thierry.reding@kernel.org>

Hello,

On Thu, Apr 16, 2026 at 03:18:10PM +0200, Thierry Reding wrote:
> From: Thierry Reding <treding@nvidia.com>
> 
> Peter sadly passed away a while back. Paul did a much better job at
> finding the right words to mourn this loss than I ever could, so I will
> leave this link here:
> 
>   https://lore.kernel.org/lkml/alpine.DEB.2.21.999.2407240345480.11116@utopia.booyaka.com/T/#u
> 
> Co-developed-by: Paul Walmsley <pjw@kernel.org>
> Signed-off-by: Thierry Reding <treding@nvidia.com>

Thanks for doing this. I think also the m68k work should be mentioned?

A.

> ---
>  CREDITS     | 6 ++++++
>  MAINTAINERS | 1 -
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/CREDITS b/CREDITS
> index 885fb05d8816..29fcfa679430 100644
> --- a/CREDITS
> +++ b/CREDITS
> @@ -3645,7 +3645,13 @@ D: Macintosh IDE Driver
>  
>  N: Peter De Schrijver
>  E: stud11@cc4.kuleuven.ac.be
> +E: p2@mind.be
> +E: peter.de-schrijver@nokia.com
> +E: pdeschrijver@nvidia.com
> +E: p2@psychaos.be
>  D: Mitsumi CD-ROM driver patches March version
> +D: OMAP power management
> +D: NVIDIA Tegra clock and BPMP drivers, among many other things
>  S: Molenbaan 29
>  S: B2240 Zandhoven
>  S: Belgium
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ef978bfca514..ffe20d770249 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -26145,7 +26145,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git
>  N:	[^a-z]tegra
>  
>  TEGRA CLOCK DRIVER
> -M:	Peter De Schrijver <pdeschrijver@nvidia.com>
>  M:	Prashant Gaikwad <pgaikwad@nvidia.com>
>  S:	Supported
>  F:	drivers/clk/tegra/
> -- 
> 2.52.0
> 
> 


^ permalink raw reply

* Re: [PATCH v13 00/48] arm64: Support for Arm CCA in KVM
From: Alper Gun @ 2026-04-16 17:44 UTC (permalink / raw)
  To: Suzuki K Poulose
  Cc: Steven Price, kvm, kvmarm, Catalin Marinas, Marc Zyngier,
	Will Deacon, James Morse, Oliver Upton, Zenghui Yu,
	linux-arm-kernel, linux-kernel, Joey Gouly, Alexandru Elisei,
	Christoffer Dall, Fuad Tabba, linux-coco, Ganapatrao Kulkarni,
	Gavin Shan, Shanker Donthineni, Aneesh Kumar K . V, Emi Kisanuki,
	Vishal Annapurve
In-Reply-To: <97d26e6e-b565-447f-95eb-2ece0755fe57@arm.com>

On Thu, Apr 16, 2026 at 4:05 AM Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>
> On 16/04/2026 00:27, Alper Gun wrote:
> > On Wed, Apr 15, 2026 at 4:01 AM Steven Price <steven.price@arm.com> wrote:
> >>
> >> On 14/04/2026 22:40, Alper Gun wrote:
> >>> On Wed, Mar 18, 2026 at 8:54 AM Steven Price <steven.price@arm.com> wrote:
> >>>>
> >>>> This series adds support for running protected VMs using KVM under the
> >>>> Arm Confidential Compute Architecture (CCA).
> >>>>
> >>>> New major version number! This now targets RMM v2.0-bet0[1]. And unlike
> >>>> for Linux this represents a significant change.
> >>>>
> >>>> RMM v2.0 brings with it the ability to configure the RMM to have the
> >>>> same page size as the host (so no more RMM_PAGE_SIZE and dealing with
> >>>> granules being different from host pages). It also introduces range
> >>>> based APIs for many operations which should be more efficient and
> >>>> simplifies the code in places.
> >>>>
> >>>> The handling of the GIC has changed, so the system registers are used to
> >>>> pass the GIC state rather than memory. This means fewer changes to the
> >>>> KVM code as it looks much like a normal VM in this respect.
> >>>>
> >>>> And of course the new uAPI introduced in the previous v12 posting is
> >>>> retained so that also remains simplified compared to earlier postings.
> >>>>
> >>>> The RMM support for v2.0 is still early and so this series includes a
> >>>> few hacks to ease the integration. Of note are that there are some RMM
> >>>> v1.0 SMCs added to paper over areas where the RMM implementation isn't
> >>>> quite ready for v2.0, and "SROs" (see below) are deferred to the final
> >>>> patch in the series.
> >>>>
> >>>> The PMU in RMM v2.0 requires more handling on the RMM-side (and
> >>>> therefore simplifies the implementation on Linux), but this isn't quite
> >>>> ready yet. The Linux side is implemented (but untested).
> >>>>
> >>>> PSCI still requires the VMM to provide the "target" REC for operations
> >>>> that affect another vCPU. This is likely to change in a future version
> >>>> of the specification. There's also a desire to force PSCI to be handled
> >>>> in the VMM for realm guests - this isn't implemented yet as I'm waiting
> >>>> for the dust to settle on the RMM interface first.
> >>>>
> >>>> Stateful RMI Operations
> >>>> -----------------------
> >>>>
> >>>> The RMM v2.0 spec brings a new concept of Stateful RMI Operations (SROs)
> >>>> which allow the RMM to complete an operation over several SMC calls and
> >>>> requesting/returning memory to the host. This has the benefit of
> >>>> allowing interrupts to be handled in the middle of an operation (by
> >>>> returning to the host to handle the interrupt without completing the
> >>>> operation) and enables the RMM to dynamically allocate memory for
> >>>> internal tracking purposes. One example of this is RMI_REC_CREATE no
> >>>> longer needs "auxiliary granules" provided upfront but can request the
> >>>> memory needed during the RMI_REC_CREATE operation.
> >>>>
> >>>> There are a fairly large number of operations that are defined as SROs
> >>>> in the specification, but current both Linux and RMM only have support
> >>>> for RMI_REC_CREATE and RMI_REC_DESTROY. There a number of TODOs/FIXMEs
> >>>> in the code where support is missing.
> >>>>
> >>>> Given the early stage support for this, the SRO handling is all confined
> >>>> to the final patch. This patch can be dropped to return to a pre-SRO
> >>>> state (albeit a mixture of RMM v1.0 and v2.0 APIs) for testing purposes.
> >>>>
> >>>> A future posting will reorder the series to move the generic SRO support
> >>>> to an early patch and will implement the proper support for this in all
> >>>> RMI SMCs.
> >>>>
> >>>> One aspect of SROs which is not yet well captured is that in some
> >>>> circumstances the Linux kernel will need to call an SRO call in a
> >>>> context where memory allocation is restricted (e.g. because a spinlock
> >>>> is held). In this case the intention is that the SRO will be cancelled,
> >>>> the spinlock dropped so the memory allocation can be completed, and then
> >>>> the SRO restarted (obviously after rechecking the state that the
> >>>> spinlock was protecting). For this reason the code stores the memory
> >>>> allocations within a struct rmi_sro_state object - see the final patch
> >>>> for more details.
> >>>>
> >>>> This series is based on v7.0-rc1. It is also available as a git
> >>>> repository:
> >>>>
> >>>> https://gitlab.arm.com/linux-arm/linux-cca cca-host/v13
> >>>>
> >>>>
> >>>
> >>> Hi Steven,
> >>>
> >>> I have a question regarding host kexec and kdump scenarios, and
> >>> whether there is any plan to make them work in this initial series.
> >>>
> >>> Intel TDX and AMD SEV-SNP both have a firmware shutdown command that
> >>> is invoked during the kexec or panic code paths to safely bypass
> >>> hardware memory protections and boot into the new kernel. As far as
> >>> I know, there is no similar global teardown command available for
> >>> the RMM.
> >>
> >> Correct, the RMM specification as it stands doesn't provide a mechanism
> >> for the host to do this. The host would have to identify all the realm
> >> guests in the system: specifically the address of the RDs (Realm
> >> Descriptors) and RECs (Realm Execution Contexts). It needs this to tear
> >> down the guests and be able to undelegate the memory.
> >>
> >> It's an interesting point and I'll raise the idea of a "firmware
> >> shutdown command" to make this more possible.
> >>
> >>> What is the roadmap for supporting both general kexec and
> >>> more specifically kdump (panic) scenarios with CCA?
> >>
> >> I don't have a roadmap I'm afraid for these. kexec in theory would be
> >> possible with KVM gracefully terminating all realms. For kdump/panic
> >> that sort of graceful shutdown isn't really appropriate (or likely to
> >> succeed).
> >>
> >
> > Thanks Steven for the clarification.
> >
> > For us, kdump is highly critical as it is our primary diagnostic tool
> > for host crashes. Without it, monitoring and debugging at fleet scale
> > would become unmanageable.
> >
> > To confirm my understanding of the current architecture: if a host
> > panics while no Realms are actively running (and therefore no pages
> > are currently in the delegated state), the standard kdump extraction
> > should work perfectly fine without any modifications, correct?
>
> This may not be true. We could have pages donated to RMM for GPT,
> Tracking etc. So, unless Linux keeps track of them, it may be
> unsafe for a crash kernel to access them.
>
> >
> > Regarding the KVM tracking structures (RDs, RECs, RTTs, etc.) when VMs
> > are running, perhaps we could use `vmcoreinfo` to export the physical
> > addresses of these delegated pages. This would allow tools like
>
> Thinking of this, do we really need to ? We could access the pages from
> "vmcore" read and handle the GPFs for such accesses and give out 0s
> for the Granules. Anyways, we can't get access to the data on those
> pages that are still in Realm PAS.
>

I like the idea of handling the GPFs directly during vmcore reads for
kdump case. That's much simpler\cleaner solution.

> > `makedumpfile` to explicitly filter them out. I assume these pages must
> > remain hardware-locked while the VMs are active.
>
>
>
> >
> > Long-term, having an architectural shutdown command - similar to the
> > TDH.SYS.DISABLE command in Intel TDX - would be incredibly useful. It
> > would allow the kdump kernel to safely bypass these hardware security
> > checks, especially when extracting host-side KVM state.
>

> For kexec, may be we could do this. Alternatively we could try to
> reclaim everything back, (GPTs, Tracking) before kexec-reboot.
>

Agreed. Reclaiming all delegated memory prior to the kexec reboot
makes perfect sense.

> >
> > As for the protected realm memory, I assume that is an easier problem.
> > We naturally want to exclude guest pages from a host dump regardless
> > of whether they are Realm pages or not. However, accidental touches
> > are still fatal.
> >
> >> There is also some RMM configuration which cannot be repeated (see
> >> RMI_RMM_CONFIG_SET) - which implies that the kexec kernel must be
> >> similar to the first kernel (i.e. same page size).
>
> That is true, the page sizes must match. RMM spec is updated to probe
> the state of the RMM and detect if it can do the CONFIG_SET
>
> Suzuki
>
> >>
> >> Thanks,
> >> Steve
>


^ permalink raw reply

* Re: [PATCH rc v2 0/5] iommu/arm-smmu-v3: Fix device crash on kdump kernel
From: Jason Gunthorpe @ 2026-04-16 17:20 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Nicolin Chen, will, kevin.tian, joro, praan, baolu.lu,
	miko.lenczewski, smostafa, linux-arm-kernel, iommu, linux-kernel,
	stable, jamien
In-Reply-To: <3eaf217f-8e1e-4d64-983a-6b888886f157@arm.com>

On Thu, Apr 16, 2026 at 05:49:24PM +0100, Robin Murphy wrote:
> On 15/04/2026 10:17 pm, Nicolin Chen wrote:
> > When transitioning to a kdump kernel, the primary kernel might have crashed
> > while endpoint devices were actively bus-mastering DMA. Currently, the SMMU
> > driver aggressively resets the hardware during probe by clearing CR0_SMMUEN
> > and setting the Global Bypass Attribute (GBPA) to ABORT.
> > 
> > In a kdump scenario, this aggressive reset is highly destructive:
> > a) If GBPA is set to ABORT, in-flight DMA will be aborted, generating fatal
> >     PCIe AER or SErrors that may panic the kdump kernel
> > b) If GBPA is set to BYPASS, in-flight DMA targeting some IOVAs will bypass
> >     the SMMU and corrupt the physical memory at those 1:1 mapped IOVAs.
> 
> But wasn't that rather the point? Th kdump kernel doesn't know the scope of
> how much could have gone wrong (including potentially the SMMU configuration
> itself), so it just blocks everything, resets and reenables the devices it
> cares about, and ignores whatever else might be on fire.

The purpose of kdump is to have the maximum chance to capture a dump
from the blown up kernel.

Yes, on a perfect platform aborting the entire SMMU should improve the
chance of getting that dump.

But sadly there are so many busted up platforms where if you start
messing with the IOMMU they will explode and blow up the kdump. x86
and "firmware first" error handling systems are particularly notorious
for nasty behavior like this.

Seems like there are now ARM systems too. :(

So, the iommu drivers have been preserving the IOMMU and not
disrupting the DMAs on x86 for a long time. This is established kdump
practice.

> If AER can panic a kdump kernel, that seems like a failing of the kdump
> kernel itself more than anything else (especially given the likelihood that
> additional AER events could follow from whatever initial crash/failure
> triggered kdump to begin with).

Probably the kdump wasn't triggered by AER. You want kdump to not
trigger more RAS events that might blow up the kdump while it is
trying to run.. That increases the chance of success

> And frankly if some device getting a
> translation fault could directly SError the whole system, then I'd say that
> system is pretty doomed in general, kdump or not.

Aborting the SMMU while ATS is enabled also fails all ATS and
translated requests which is a catastrophic event for a CXL type
device that a correct OS should never trigger. The catastrophic
explosion of the CXL device also unplugs all it's RAM from the system
and the kdump kernel just cannot handle the resulting cascade of RAS
failures. Plus you loose all that CXL RAM you may have wanted to dump..

Regardless, the platform has this flaw and to make kdump work it has
to avoid triggering these errors like x86 does.

Jason


^ permalink raw reply

* Re: [PATCH v5 02/12] coresight: etm4x: fix underflow for nrseqstate
From: Yeoreum Yun @ 2026-04-16 17:07 UTC (permalink / raw)
  To: Leo Yan
  Cc: coresight, linux-arm-kernel, linux-kernel, suzuki.poulose,
	mike.leach, james.clark, alexander.shishkin, jie.gan
In-Reply-To: <20260416151102.GL356832@e132581.arm.com>

Hi Leo,

> On Wed, Apr 15, 2026 at 05:55:18PM +0100, Yeoreum Yun wrote:
> > TCRSEQEVR<n> is implemented only when TCRIDR5.NUMSEQSTATE is 0b100,
> > in which case n ranges from 0 to 2; otherwise, TCRIDR5.NUMSEQSTATE is 0b000.
>
> My suggestion in previous version is not quite right, thanks for
> making this correct.
>
> [...]
>
> > @@ -1395,6 +1395,8 @@ static ssize_t seq_idx_store(struct device *dev,
> >  	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
> >  	struct etmv4_config *config = &drvdata->config;
> >
> > +	if (!drvdata->nrseqstate)
> > +		return -EINVAL;
>
> For "nrseqstate = 0" case, would it return -EOPNOTSUPP instead?
>
> Otherwise, LGTM:
>
> Reviewed-by: Leo Yan <leo.yan@arm.com>

Yeap. It's much better to return -ENOTSUPP in here.
I'll change it.

--
Sincerely,
Yeoreum Yun


^ permalink raw reply

* Re: [PATCH v5 06/12] coresight: etm4x: fix leaked trace id
From: Yeoreum Yun @ 2026-04-16 17:06 UTC (permalink / raw)
  To: Leo Yan
  Cc: coresight, linux-arm-kernel, linux-kernel, suzuki.poulose,
	mike.leach, james.clark, alexander.shishkin, jie.gan
In-Reply-To: <20260416165541.GN356832@e132581.arm.com>

Hi Leo,
> On Wed, Apr 15, 2026 at 05:55:22PM +0100, Yeoreum Yun wrote:
> > If etm4_enable_sysfs() fails in cscfg_csdev_enable_active_config(),
> > the trace ID may be leaked because it is not released.
> >
> > To address this, call etm4_release_trace_id() when etm4_enable_sysfs()
> > fails in cscfg_csdev_enable_active_config().
> >
> > Reviewed-by: Jie Gan <jie.gan@oss.qualcomm.com>
> > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
> > ---
> >  drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> > index f55338a4989d..b199aebbdb60 100644
> > --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> > +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> > @@ -920,8 +920,10 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa
> >  	cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
> >  	if (cfg_hash) {
> >  		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
> > -		if (ret)
> > +		if (ret) {
> > +			etm4_release_trace_id(drvdata);
> >  			return ret;
> > +		}
>
> LGTM:
>
> Reviewed-by: Leo Yan <leo.yan@arm.com>

Thanks.

>
> Just recording a bit thoughts.  As Suzuki mentioned, it would be better
> to allocate trace IDs within a session.  We might consider maintaining
> the trace ID map in the sink driver data, since the sink driver is
> unique within a session so it is a central place to allocate trace ID.
>
> We should use paired way for allocation and release. For example:
>
>   coresight_enable_sysfs()
>   {
>       ...
>       coresight_path_assign_trace_id(path);
>
>   failed:
>       coresight_path_unassign_trace_id(path);
>   }
>
>   coresight_disable_sysfs()
>   {
>       coresight_path_unassign_trace_id(path);
>   }
>
> But this requires broader refactoring.  E.g., the STM driver currently
> allocates system trace IDs statically during probe, we might need to
> consolidate for all modules to use dynamic allocation.

So IIUC, Do we want to "map" per "session" and save this map information
in the "sink" driver? or just use "global" map but locate it in sink
driver?

I totally agree for above suggestion -- unsigned trace id
in the coresight_XXX function -- (but we need to add another callback
for this) but I think we don't need to sustain map per session
and it seems enough to use current storage for trace_id not move to
sink driver.

Anyway It would be better to refactorying wiht another patchset...

Thanks.

--
Sincerely,
Yeoreum Yun


^ permalink raw reply

* Re: [PATCH v2] raid6: arm64: add SVE optimized implementation for syndrome generation
From: Robin Murphy @ 2026-04-16 17:03 UTC (permalink / raw)
  To: Mark Brown
  Cc: Demian Shulhan, Ard Biesheuvel, Christoph Hellwig, Mark Rutland,
	Song Liu, Yu Kuai, Will Deacon, Catalin Marinas, linux-arm-kernel,
	Li Nan, linux-raid, linux-kernel
In-Reply-To: <acc15515-4c43-402c-9800-c9b5e3da9891@sirena.org.uk>

On 16/04/2026 5:47 pm, Mark Brown wrote:
> On Thu, Apr 16, 2026 at 05:26:08PM +0100, Robin Murphy wrote:
> 
>> Unless you've got a CPU with truly big wide vector units that _can't_ be
>> fully utilised by ASMID ops, then SVE is only really offering whatever
>> incidental benefits fall out of smaller code size. However, if you do have
>> those wider vectors, then the cost of correctly saving/restoring the SVE
>> state - of which a userspace benchmark isn't likely to be very
>> representative - is also going to scale up significantly.
> 
> The other case will be when there's some SVE only extension that
> accelerates something that's relevant for the algorithm.  That's not
> really a thing at present but I imagine that we'll run into that at some
> point.

Indeed - I was implicitly thinking in terms of things that _are_ just 
transliterated from NEON to SVE, where the primary gain is stuff like 
predicate loops, but even that _could_ potentially be enough to justify 
an argument in-kernel SVE (using a 128-bit VL to keep the additional 
state/cost to a minimum).

Cheers,
Robin.


^ permalink raw reply

* Re: [PATCH v5 06/12] coresight: etm4x: fix leaked trace id
From: Leo Yan @ 2026-04-16 16:55 UTC (permalink / raw)
  To: Yeoreum Yun
  Cc: coresight, linux-arm-kernel, linux-kernel, suzuki.poulose,
	mike.leach, james.clark, alexander.shishkin, jie.gan
In-Reply-To: <20260415165528.3369607-7-yeoreum.yun@arm.com>

On Wed, Apr 15, 2026 at 05:55:22PM +0100, Yeoreum Yun wrote:
> If etm4_enable_sysfs() fails in cscfg_csdev_enable_active_config(),
> the trace ID may be leaked because it is not released.
> 
> To address this, call etm4_release_trace_id() when etm4_enable_sysfs()
> fails in cscfg_csdev_enable_active_config().
> 
> Reviewed-by: Jie Gan <jie.gan@oss.qualcomm.com>
> Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
> ---
>  drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index f55338a4989d..b199aebbdb60 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -920,8 +920,10 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa
>  	cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
>  	if (cfg_hash) {
>  		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
> -		if (ret)
> +		if (ret) {
> +			etm4_release_trace_id(drvdata);
>  			return ret;
> +		}

LGTM:

Reviewed-by: Leo Yan <leo.yan@arm.com>

Just recording a bit thoughts.  As Suzuki mentioned, it would be better
to allocate trace IDs within a session.  We might consider maintaining
the trace ID map in the sink driver data, since the sink driver is
unique within a session so it is a central place to allocate trace ID.

We should use paired way for allocation and release. For example:

  coresight_enable_sysfs()
  {
      ...
      coresight_path_assign_trace_id(path);

  failed:
      coresight_path_unassign_trace_id(path);
  }

  coresight_disable_sysfs()
  {
      coresight_path_unassign_trace_id(path);
  }

But this requires broader refactoring.  E.g., the STM driver currently
allocates system trace IDs statically during probe, we might need to
consolidate for all modules to use dynamic allocation.

Thanks,
Leo


^ permalink raw reply

* Re: [PATCH rc v2 0/5] iommu/arm-smmu-v3: Fix device crash on kdump kernel
From: Robin Murphy @ 2026-04-16 16:49 UTC (permalink / raw)
  To: Nicolin Chen, will, jgg, kevin.tian
  Cc: joro, praan, baolu.lu, miko.lenczewski, smostafa,
	linux-arm-kernel, iommu, linux-kernel, stable, jamien
In-Reply-To: <cover.1776286352.git.nicolinc@nvidia.com>

On 15/04/2026 10:17 pm, Nicolin Chen wrote:
> When transitioning to a kdump kernel, the primary kernel might have crashed
> while endpoint devices were actively bus-mastering DMA. Currently, the SMMU
> driver aggressively resets the hardware during probe by clearing CR0_SMMUEN
> and setting the Global Bypass Attribute (GBPA) to ABORT.
> 
> In a kdump scenario, this aggressive reset is highly destructive:
> a) If GBPA is set to ABORT, in-flight DMA will be aborted, generating fatal
>     PCIe AER or SErrors that may panic the kdump kernel
> b) If GBPA is set to BYPASS, in-flight DMA targeting some IOVAs will bypass
>     the SMMU and corrupt the physical memory at those 1:1 mapped IOVAs.

But wasn't that rather the point? Th kdump kernel doesn't know the scope 
of how much could have gone wrong (including potentially the SMMU 
configuration itself), so it just blocks everything, resets and 
reenables the devices it cares about, and ignores whatever else might be 
on fire.

If AER can panic a kdump kernel, that seems like a failing of the kdump 
kernel itself more than anything else (especially given the likelihood 
that additional AER events could follow from whatever initial 
crash/failure triggered kdump to begin with). And frankly if some device 
getting a translation fault could directly SError the whole system, then 
I'd say that system is pretty doomed in general, kdump or not.

Thanks,
Robin.

> To safely absorb in-flight DMA, the kdump kernel must leave SMMUEN=1 intact
> and avoid modifying STRTAB_BASE. This allows HW to continue translating in-
> flight DMA using the crashed kernel's page tables until the endpoint device
> drivers probe and quiesce their respective hardware.
> 
> However, the ARM SMMUv3 architecture specification states that updating the
> SMMU_STRTAB_BASE register while SMMUEN == 1 is UNPREDICTABLE or ignored.
> 
> This leaves a kdump kernel no choice but to adopt the stream table from the
> crashed kernel.
> 
> In this series:
>   - Introduce an ARM_SMMU_OPT_KDUMP
>   - Skip SMMUEN and STRTAB_BASE resets in arm_smmu_device_reset()
>   - Map the crashed kernel's stream tables into the kdump kernel [*]
>   - Defer any default domain attachment to retain STEs until device drivers
>     explicitly request it.
> 
> [*] This is implemented via memremap, which only works on a coherent SMMU.
> 
> Note that the entire series requires Jason's work that was merged in v6.12:
> 85196f54743d ("iommu/arm-smmu-v3: Reorganize struct arm_smmu_strtab_cfg").
> I have a backported version that is verified with a v6.8 kernel. I can send
> if we see a strong need after this version is accepted.
> 
> This is on Github:
> https://github.com/nicolinc/iommufd/commits/smmuv3_kdump-v2
> 
> Changelog
> v2
>   * Add warning in non-coherent SMMU cases
>   * Keep eventq/priq disabled v.s. enabling-and-disabling-later
>   * Check KDUMP option in the beginning of arm_smmu_device_reset()
>   * Validate STRTAB format matches HW capability instead of forcing flags
> v1:
>   https://lore.kernel.org/all/cover.1775763475.git.nicolinc@nvidia.com/
> 
> Nicolin Chen (5):
>    iommu/arm-smmu-v3: Add arm_smmu_adopt_strtab() for kdump
>    iommu/arm-smmu-v3: Implement is_attach_deferred() for kdump
>    iommu/arm-smmu-v3: Retain CR0_SMMUEN during kdump device reset
>    iommu/arm-smmu-v3: Skip EVTQ/PRIQ setup in kdump kernel
>    iommu/arm-smmu-v3: Detect ARM_SMMU_OPT_KDUMP in
>      arm_smmu_device_hw_probe()
> 
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |   1 +
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 225 ++++++++++++++++++--
>   2 files changed, 207 insertions(+), 19 deletions(-)
> 



^ permalink raw reply

* Re: [PATCH v3 3/8] arm64: entry: add unwind info for various kernel entries
From: Jens Remus @ 2026-04-16 16:49 UTC (permalink / raw)
  To: Dylan Hatch, Roman Gushchin, Weinan Liu, Will Deacon,
	Josh Poimboeuf, Indu Bhagat, Peter Zijlstra, Steven Rostedt,
	Catalin Marinas, Jiri Kosina
  Cc: Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Heiko Carstens
In-Reply-To: <20260406185000.1378082-4-dylanbhatch@google.com>

On 4/6/2026 8:49 PM, Dylan Hatch wrote:
> From: Weinan Liu <wnliu@google.com>
> 
> DWARF CFI (Call Frame Information) specifies how to recover the return
> address and callee-saved registers at each PC in a given function.
> Compilers are able to generate the CFI annotations when they compile
> the code to assembly language. For handcrafted assembly, we need to
> annotate them by hand.
> 
> Annotate CFI unwind info for assembly for interrupt and exception
> handlers.
> 
> Signed-off-by: Weinan Liu <wnliu@google.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
>  arch/arm64/kernel/entry.S | 10 ++++++++++
>  1 file changed, 10 insertions(+)

The added CFI directives somehow cause .eh_frame (instead of
.debug_frame) to be generated in addition to .sframe.  This causes the
following warning when linking vmlinux (.tmp_vmlinux1, .tmp_vmlinux2,
and vmlinux.unstripped):

  LD      vmlinux.unstripped
aarch64-linux-gnu-ld: warning: orphan section `.eh_frame' from `arch/arm64/kernel/entry.o' being placed in section `.eh_frame'

I don't think this can be controlled using compiler options
-fno-asynchronous-unwind-tables -fno-unwind-tables, as entry.S is
only preprocessed and then fed into the assembler.

The following at the top of entry.S would resolve the issue:

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
@@ -30,6 +30,12 @@
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>

+/*
+ * Do not generate .eh_frame.  Only generate .debug_frame and optionally
+ * .sframe (via assembler option --gsframe[-N]).
+ */
+	.cfi_sections .debug_frame
+
	.macro	clear_gp_regs
	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
	mov	x\n, xzr

Regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/



^ permalink raw reply

* Re: [PATCH v2] raid6: arm64: add SVE optimized implementation for syndrome generation
From: Mark Brown @ 2026-04-16 16:47 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Demian Shulhan, Ard Biesheuvel, Christoph Hellwig, Mark Rutland,
	Song Liu, Yu Kuai, Will Deacon, Catalin Marinas, linux-arm-kernel,
	Li Nan, linux-raid, linux-kernel
In-Reply-To: <8db4defe-8b5e-4cc3-880b-72d46510b034@arm.com>

[-- Attachment #1: Type: text/plain, Size: 703 bytes --]

On Thu, Apr 16, 2026 at 05:26:08PM +0100, Robin Murphy wrote:

> Unless you've got a CPU with truly big wide vector units that _can't_ be
> fully utilised by ASMID ops, then SVE is only really offering whatever
> incidental benefits fall out of smaller code size. However, if you do have
> those wider vectors, then the cost of correctly saving/restoring the SVE
> state - of which a userspace benchmark isn't likely to be very
> representative - is also going to scale up significantly.

The other case will be when there's some SVE only extension that
accelerates something that's relevant for the algorithm.  That's not
really a thing at present but I imagine that we'll run into that at some
point.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH v2] raid6: arm64: add SVE optimized implementation for syndrome generation
From: Robin Murphy @ 2026-04-16 16:26 UTC (permalink / raw)
  To: Demian Shulhan, Ard Biesheuvel
  Cc: Christoph Hellwig, Mark Rutland, Song Liu, Yu Kuai, Will Deacon,
	Catalin Marinas, Mark Brown, linux-arm-kernel, Li Nan, linux-raid,
	linux-kernel
In-Reply-To: <CAOLeWCtf2rZyPeJH-LuZ2A+c7mC9M2r-Ya0VjyOJFpun3TFMnw@mail.gmail.com>

On 16/04/2026 3:59 pm, Demian Shulhan wrote:
> Hi Ard!
> 
>> So what exactly did you fix in your test case?
> 
> I just added the missing memset. You're right, "aliasing" was the
> wrong term for PIP.
> 
>> This is the result where all data buffer pointers point to the same
>> memory, right? I.e., the zero page? So this is an unrealistic use
>> case that we can disregard.
> 
> Yes, that's right. It was a flaw in my previous test setup.
> 
>> Sorry but your result that SVE is 2x faster does not remain fully intact,
>> right? Given that the speedup is now 5.5%?
>> Should we just disregard the above results (and explanations) and focus
>> on the stuff below?
> 
> Yes, it's better to focus on the data from SnapRAID. It was made on
> larger blocks and a wider range of disks, providing more realistic
> metrics.
> 
>> OK, so the takeaway here is that SVE is only worth the hassle if the vector
>> length is at least 256 bits. This is not entirely surprising, but given that
>> Graviton4 went back to 128 bit vectors from 256, I wonder what the future
>> expectation is here.
> 
> I agree. The results from the SnapRAID tests are not as impressive as
> I hoped, and the fact that Neoverse-V2 went back to 128-bit is a red
> flag. It suggests that wide SVE registers might not be a priority in
> future architecture versions.

If you look at the Neoverse V1 software optimisation guide[1], the SVE 
instructions generally have half the throughput of their ASIMD 
equivalents (i.e. presumably the vector pipes are still only 128 bits 
wide and SVE is just using them in pairs), so indeed the total 
instruction count is largely meaningless - IPC might be somewhat more 
relevant, but I'd say the only performance number that's really 
meaningful is the end-to-end MB/s measure of how fast the function 
implementation as a whole can process data.

Unless you've got a CPU with truly big wide vector units that _can't_ be 
fully utilised by ASMID ops, then SVE is only really offering whatever 
incidental benefits fall out of smaller code size. However, if you do 
have those wider vectors, then the cost of correctly saving/restoring 
the SVE state - of which a userspace benchmark isn't likely to be very 
representative - is also going to scale up significantly.

>> These results seem very relevant - perhaps Christoph can give some guidance
>> on how we might use these to improve the built-in benchmarks to be more
>> accurate.
> 
> This is the most important part of this report, I think. SVE looks
> good only like my first idea on paper but in the real scenario it
> brings more problems than benefits.
> 
> I’m happy to drop the SVE implementation for now and instead focus on
> modernizing the built-in benchmarks to ensure the kernel chooses the
> best available NEON path for actual storage workloads.

It's probably also worth checking whether the current NEON routines 
themselves are actually optimal for modern big CPUs - things have moved 
on quite a bit since Cortex-A57 (whose ASIMD performance could also be 
described as "esoteric" at the best of times...)

Thanks,
Robin.

[1] https://developer.arm.com/documentation/110659/

> 
> If you give me the green flag for it, I can start working on improving
> these built-in tests.
> 
> Best regards,
> Demian
> 
> 
> чт, 16 квіт. 2026 р. о 16:40 Ard Biesheuvel <ardb@kernel.org> пише:
>>
>> Hi Demian,
>>
>> On Thu, 16 Apr 2026, at 14:40, Demian Shulhan wrote:
>>> Hi all,
>>>
>>> Sorry for the delay. The tests became more complex than I initially
>>> thought, so I needed to gather more data and properly validate the
>>> results across different hardware configurations.
>>>
>>> Firstly, I want to clarify the results from my March 29 tests. I found
>>> a flaw in my initial custom benchmark. The massive 2x throughput gap on
>>> 24 disks wasn't solely due to SVE's superiority, but rather a severe L1
>>> D-Cache thrashing issue that disproportionately penalized NEON.
>>>
>>> My custom test lacked memset() initialization, causing all data buffers
>>> to map to the Linux Zero Page (Virtually Indexed, Physically Tagged
>>> cache aliasing).
>>
>> D-caches always behave as PIPT on arm64. This is complex stuff, so please
>> don't present conjecture as fact.
>>
>>> Furthermore, even with memset(), allocating contiguous
>>> page-aligned buffers can causes severe Cache Address Sharing (a known
>>> issue that Andrea Mazzoleni solved in SnapRAID 13 years ago using
>>> RAID_MALLOC_DISPLACEMENT).
>>>
>>> Because SVE (svex4) uses 256-bit registers on Neoverse-V1, it performs
>>> exactly half the number of memory load instructions compared to 128-bit
>>> NEON. This dramatically reduced the L1 cache alias thrashing, allowing
>>> SVE to survive the memory bottleneck while NEON choked:
>>>
>>
>> You are drawing some conclusions here without disclosing the actual
>> information that you based this on. D-caches are non-aliasing on arm64.
>>
>> So what exactly did you fix in your test case?
>>
>>> Custom test without memset (4kb block):
>>>   | algo=neonx4 ndisks=24 iterations=1M time=11.014s MB/s=7802.57
>>>   | algo=svex4  ndisks=24 iterations=1M time=5.719s  MB/s=15026.92
>>>
>>
>> This is the result where all data buffer pointers point to the same
>> memory, right? I.e., the zero page? So this is an unrealistic use
>> case that we can disregard.
>>
>>> Custom test with memset (4kb block):
>>>   | algo=neonx4 ndisks=24 iterations=1M time=6.165s  MB/s=13939.08
>>>   | algo=svex4  ndisks=24 iterations=1M time=5.839s  MB/s=14718.23
>>>
>>> Even with the corrected memory setup, the throughput gap narrowed, but
>>> the fundamental CPU-efficiency result remained fully intact.
>>>
>>
>> Sorry but your result that SVE is 2x faster does not remain fully intact,
>> right? Given that the speedup is now 5.5%?
>>
>> Should we just disregard the above results (and explanations) and focus
>> on the stuff below?
>>
>>> To completely isolate these variables and provide accurate real-world
>>> data, the following test campaigns were done based on the SnapRAID
>>> project (https://github.com/amadvance/snapraid) using its
>>> perf_bench.c tool with proper memory displacement and a 256 KiB block
>>> size.
>>>
>>> Test configurations:
>>> - c7g.medium (AWS Graviton3, 1 vCPU): Neoverse-V1, 256-bit SVE
>>> - c7g.xlarge (AWS Graviton3, 4 vCPUs): Neoverse-V1, 256-bit SVE
>>> - c8g.xlarge (AWS Graviton4, 4 vCPUs): Neoverse-V2, 128-bit SVE
>>>
>>>
>>> =========================================================
>>> Section 1: SnapRAID Validation on Graviton3 / Neoverse-V1
>>> =========================================================
>>>
>> ...
>>>
>>> 1.3 Main Graviton3 Conclusions
>>>   - On 256-bit SVE hardware, svex4 consistently retires about ~34% fewer
>>>     instructions and ~10-15% fewer CPU cycles than neonx4.
>>>
>>> =========================================================
>>> Section 2: SnapRAID Validation on Graviton4 / Neoverse-V2
>>> =========================================================
>>>
>> ...
>>>
>>> 2.3 Main Graviton4 Conclusions
>>>   - On Neoverse-V2, SVE vector length is 128-bit (same as NEON).
>>>   - Without the 256-bit width, NEON outperforms SVE.
>>>   - svex4 retires ~32% MORE instructions here and is consistently slower.
>>>
>>> =========================================================
>>> Section 3: Validation on c7g.medium (1 vCPU)
>>> =========================================================
>>>
>> ...
>>> 3.3 Main c7g.medium Conclusions
>>>   - The instruction count reduction (~34%) perfectly matches the 4-vCPU
>>>     instance.
>>>   - The single vCPU is heavily memory-bandwidth constrained (cycle counts
>>>     are much higher waiting for RAM).
>>>
>>
>> OK, so the takeaway here is that SVE is only worth the hassle if the vector
>> length is at least 256 bits. This is not entirely surprising, but given that
>> Graviton4 went back to 128 bit vectors from 256, I wonder what the future
>> expectation is here.
>>
>> But having these numbers is definitely a good first step. Now we need to
>> quantify the overhead associated with having kernel mode SVE state that
>> needs to be preserved/restored.
>>
>> However, 10%-15% speedup that can only be achieved on SVE implementations
>> with 256 bit vectors or more may not be that enticing in the end. (The
>> fact that you are retiring 34% instructions less does not really matter
>> here unless there is some meaningful SMT-like sharing of functional units
>> going on in the meantime, which seems unlikely on a CPU that is maxed out
>> on the data side)
>>
>>
>>> =========================================================
>>> Section 4: The Pitfalls of the Current Kernel Benchmark
>>> =========================================================
>>>
>>
>> These results seem very relevant - perhaps Christoph can give some guidance
>> on how we might use these to improve the built-in benchmarks to be more
>> accurate.
>>
>>
>> Thanks,
>>



^ permalink raw reply

* Re: [PATCH v9 05/11] drm/fourcc: Add DRM_FORMAT_X403
From: Simon Ser @ 2026-04-16 16:24 UTC (permalink / raw)
  To: Tomi Valkeinen
  Cc: Vishal Sagar, Anatoliy Klymenko, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Laurent Pinchart,
	Michal Simek, dri-devel, linux-kernel, linux-arm-kernel,
	Geert Uytterhoeven, Dmitry Baryshkov, Pekka Paalanen
In-Reply-To: <6abc165b-4220-422c-8190-283a3dea1f62@ideasonboard.com>

On Friday, April 10th, 2026 at 08:54, Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> wrote:

> On 10/04/2026 09:07, Tomi Valkeinen wrote:
> > Hi,
> >
> > On 26/03/2026 16:43, Simon Ser wrote:
> >> On Wednesday, March 25th, 2026 at 15:02, Tomi Valkeinen
> >> <tomi.valkeinen@ideasonboard.com> wrote:
> >>
> >>> +/*
> >>> + * 3 plane non-subsampled (444) YCbCr
> >>> + * 10 bpc, 30 bits per sample image data in a single contiguous buffer.
> >>> + * index 0: Y plane,  [31:0] x:Y2:Y1:Y0    [2:10:10:10] little endian
> >>> + * index 1: Cb plane, [31:0] x:Cb2:Cb1:Cb0 [2:10:10:10] little endian
> >>> + * index 2: Cr plane, [31:0] x:Cr2:Cr1:Cr0 [2:10:10:10] little endian
> >>> + */
> >>> +#define DRM_FORMAT_X403        fourcc_code('X', '4', '0', '3')
> >>
> >> So, this one is different from the Q family, because Q has padding in
> >
> > Any idea where the letters (P, Q, S) come from?
> >
> >> LSB rather than MSB. Speaking of, maybe we should add "LSB aligned" to
> >> the doc comment to make that clear?
> >
> > Yes, I can add that.
> >
> >> Re-reading the sibling thread about DRM_FORMAT_XV20, sounds like the
> >> first digit matches my expectations for sub-sampling. How did you pick
> >
> > I just used the name in Xilinx's BSP kernel.
> >
> >> the last two digits? I think I would've expected "30" here rather than
> >> "03", since the last two planes are Cb Cr rather than Cr Cb.
> >
> > Hmm, but X403 is Cb:Cr, and P030 is Cr:Cb, so doesn't 03 make sense
> > here? Oh, but Q401 is Cr:Cb, and it's 01...
> >
> > Now that I look at this... I think I have to go back and do more
> > testing. From the Xilinx docs, it looks to me that the XV15/XV20 should
> > have the same CrCb order than X403. But the comments in these patches
> > say otherwise. I'm pretty sure my tests conformed to the comments here,
> > but now I don't feel so sure anymore. It's been more than a year since I
> > wrote the tests and properly tested these, so I have to spend a bit time
> > to get everything up again.
> 
> I think the comments are correct. I guess it depends on which way you
> look at this: for P030 etc, starting from the lowest bit, the order is
> Cb:Cr. For X403, starting from the lowest plane, the order is Cb:Cr. And
> that's probably how Xilinx HW "sees" it and thus they use the same Cb:Cr
> order.
> 
> But in the comments we describe P030's components starting from the
> highest bit, and thus it's Cr:Cb.

Thanks for checking!

> >> Has the first "X" letter been picked arbitrarily? It's already used to
> >> denote padding in other formats so I wonder if we should pick that
> >> instead of, say, "T".
> > I didn't invent the name, I just took the naming Xilinx used. I don't
> > know the history behind it. I assume the "X" is for Xilinx, but I could
> > be wrong here. What would "T" be for? "Tomi"? =)
> So... While the Cb:Cr order can be seen both ways, perhaps the Q formats
> are a good reference here to follow, and thus it should be "430", not
> "403", as you suggest. As for the letter... Anything that's not
> currently in use is fine for me =).

P, Q, S are used for other YCbCr layouts, R is used for red, and T was the next
letter in the alphabet… But I really like the "Tomi" format layout
explanation! :)

I'd also be fine with pretty much any letter not currently in use in other
formats.


^ permalink raw reply

* Re: [PATCH] arm64/hwcap: Include kernel-hwcap.h in list of generated files
From: Geert Uytterhoeven @ 2026-04-16 16:14 UTC (permalink / raw)
  To: Mark Brown
  Cc: Catalin Marinas, Will Deacon, Marek Vasut, linux-arm-kernel,
	linux-kernel
In-Reply-To: <20260413-arm64-hwcap-gen-fix-v1-1-26c56aed6908@kernel.org>

On Mon, 13 Apr 2026 at 17:46, Mark Brown <broonie@kernel.org> wrote:
> When adding generation for the kernel internal constants for hwcaps the
> generated file was not explicitly flagged as such in the build system,
> causing it to be regenerated on each build. This wasn't obvious when the
> series the change was included in was developed since it was all about
> changes that trigger rebuilds anyway.
>
> Fixes: abed23c3c44f5 (arm64/hwcap: Generate the KERNEL_HWCAP_ definitions for the hwcaps)
> Reported-by: Marek Vasut <marex@nabladev.com>
> Signed-off-by: Mark Brown <broonie@kernel.org>

Just noticed the issue, bisected it, and found a link to the fix...

Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>

Gr{oetje,eeting}s,

                        Geert


--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds


^ permalink raw reply

* Re: [PATCH net-next 5/6] net: stmmac: move PHY handling out of __stmmac_open()/release()
From: Jakub Kicinski @ 2026-04-16 16:08 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Alexander Stein, Andrew Lunn, Heiner Kallweit, Alexandre Torgue,
	Andrew Lunn, David S. Miller, Eric Dumazet, linux-arm-kernel,
	linux-stm32, Maxime Coquelin, netdev, Paolo Abeni
In-Reply-To: <aeDojTdDTELfpT0X@shell.armlinux.org.uk>

On Thu, 16 Apr 2026 14:47:57 +0100 Russell King (Oracle) wrote:
> The next problem will be netdev's policy over reviews vs patches
> balance which I'm already in deficit, and I have *NO* *TIME*
> what so ever to review patches - let alone propose patches to
> fix people's problems.
> 
> So I'm going to say this plainly: if netdev wants to enforce that
> rule, then I won't be fixing people's problems.

Do you have a better proposal?
I'm under the same pressure of million stupid projects from my employer
as you are. Do y'all think that upstream maintainers have time given by
their employers to do the reviews? SMH.


^ permalink raw reply

* Re: [PATCH v7 1/3] dt-bindings: pinctrl: Add aspeed,ast2700-soc0-pinctrl
From: Conor Dooley @ 2026-04-16 15:54 UTC (permalink / raw)
  To: Billy Tsai
  Cc: Lee Jones, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Joel Stanley, Andrew Jeffery, Linus Walleij, Bartosz Golaszewski,
	Ryan Chen, Andrew Jeffery, devicetree, linux-arm-kernel,
	linux-aspeed, linux-kernel, openbmc, linux-gpio, linux-clk
In-Reply-To: <20260416-upstream_pinctrl-v7-1-d72762253163@aspeedtech.com>

[-- Attachment #1: Type: text/plain, Size: 5099 bytes --]

On Thu, Apr 16, 2026 at 03:29:43PM +0800, Billy Tsai wrote:
> Add a device tree binding for the pin controller found in the
> ASPEED AST2700 SoC0.
> 
> The controller manages various peripheral functions such as eMMC, USB,
> VGA DDC, JTAG, and PCIe root complex signals.
> 
> Describe the AST2700 SoC0 pin controller using standard pin multiplexing
> and configuration properties.
> 
> Signed-off-by: Billy Tsai <billy_tsai@aspeedtech.com>
> ---
>  .../pinctrl/aspeed,ast2700-soc0-pinctrl.yaml       | 162 +++++++++++++++++++++
>  1 file changed, 162 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2700-soc0-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2700-soc0-pinctrl.yaml
> new file mode 100644
> index 000000000000..947f3cd09fcc
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2700-soc0-pinctrl.yaml
> @@ -0,0 +1,162 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/pinctrl/aspeed,ast2700-soc0-pinctrl.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: ASPEED AST2700 SoC0 Pin Controller
> +
> +maintainers:
> +  - Billy Tsai <billy_tsai@aspeedtech.com>
> +
> +description:
> +  The AST2700 features a dual-SoC architecture with two interconnected SoCs,
> +  each having its own System Control Unit (SCU) for independent pin control.
> +  This pin controller manages the pin multiplexing for SoC0.
> +
> +  The SoC0 pin controller manages pin functions including eMMC, VGA DDC,
> +  dual USB3/USB2 ports (A and B), JTAG, and PCIe root complex interfaces.
> +
> +properties:
> +  compatible:
> +    const: aspeed,ast2700-soc0-pinctrl
> +  reg:
> +    maxItems: 1
> +
> +patternProperties:
> +  '-state$':
> +    type: object
> +    allOf:
> +      - $ref: pinmux-node.yaml#
> +      - $ref: pincfg-node.yaml#
> +
> +    additionalProperties: false
> +
> +    properties:
> +      function:
> +        enum:
> +          - EMMC
> +          - JTAGDDR
> +          - JTAGM0
> +          - JTAGPCIEA
> +          - JTAGPCIEB
> +          - JTAGPSP
> +          - JTAGSSP
> +          - JTAGTSP
> +          - JTAGUSB3A
> +          - JTAGUSB3B
> +          - PCIERC0PERST
> +          - PCIERC1PERST
> +          - TSPRSTN
> +          - UFSCLKI
> +          - USB2AD0
> +          - USB2AD1
> +          - USB2AH
> +          - USB2AHP
> +          - USB2AHPD0
> +          - USB2AXH
> +          - USB2AXH2B
> +          - USB2AXHD1
> +          - USB2AXHP
> +          - USB2AXHP2B
> +          - USB2AXHPD1
> +          - USB2BD0
> +          - USB2BD1
> +          - USB2BH
> +          - USB2BHP
> +          - USB2BHPD0
> +          - USB2BXH
> +          - USB2BXH2A
> +          - USB2BXHD1
> +          - USB2BXHP
> +          - USB2BXHP2A
> +          - USB2BXHPD1
> +          - USB3AXH
> +          - USB3AXH2B
> +          - USB3AXHD
> +          - USB3AXHP
> +          - USB3AXHP2B
> +          - USB3AXHPD
> +          - USB3BXH
> +          - USB3BXH2A
> +          - USB3BXHD
> +          - USB3BXHP
> +          - USB3BXHP2A
> +          - USB3BXHPD
> +          - VB
> +          - VGADDC
> +
> +      groups:
> +        enum:
> +          - EMMCCDN
> +          - EMMCG1
> +          - EMMCG4
> +          - EMMCG8
> +          - EMMCWPN
> +          - JTAG0
> +          - PCIERC0PERST
> +          - PCIERC1PERST
> +          - TSPRSTN
> +          - UFSCLKI
> +          - USB2A
> +          - USB2AAP
> +          - USB2ABP
> +          - USB2ADAP
> +          - USB2AH
> +          - USB2AHAP
> +          - USB2B
> +          - USB2BAP
> +          - USB2BBP
> +          - USB2BDBP
> +          - USB2BH
> +          - USB2BHBP
> +          - USB3A
> +          - USB3AAP
> +          - USB3ABP
> +          - USB3B
> +          - USB3BAP
> +          - USB3BBP
> +          - VB0
> +          - VB1
> +          - VGADDC
> +      pins:
> +        enum:
> +          - AB13
> +          - AB14
> +          - AC13
> +          - AC14
> +          - AD13
> +          - AD14
> +          - AE13
> +          - AE14
> +          - AE15
> +          - AF13
> +          - AF14
> +          - AF15

Why do you have groups and pins?

Is it valid in your device to have groups and pins in the same node?

> +
> +      drive-strength:
> +        enum: [3, 6, 8, 11, 16, 18, 20, 23, 30, 32, 33, 35, 37, 38, 39, 41]
> +
> +      bias-disable: true
> +      bias-pull-up: true
> +      bias-pull-down: true
> +
> +required:
> +  - compatible
> +  - reg
> +
> +allOf:
> +  - $ref: pinctrl.yaml#
> +
> +additionalProperties: false
> +
> +examples:
> +  - |
> +    pinctrl@400 {
> +        compatible = "aspeed,ast2700-soc0-pinctrl";
> +        reg = <0x400 0x318>;
> +        emmc-state {
> +            function = "EMMC";
> +            groups = "EMMCG1";
> +        };
> +    };
> 
> -- 
> 2.34.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox