Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v7 4/6] clk: en7523: Add support for selecting the Serdes port in SCU
From: Christian Marangi @ 2026-05-19 22:08 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Brian Masney, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Christian Marangi, Vinod Koul,
	Neil Armstrong, Lorenzo Bianconi, Felix Fietkau, linux-clk,
	devicetree, linux-kernel, linux-arm-kernel, linux-phy
In-Reply-To: <20260519220813.28468-1-ansuelsmth@gmail.com>

In the SCU register for clock and reset, there are also some register to
select the Serdes port mode. The Airoha AN7581 SoC have 4 different Serdes
that can switch between PCIe, USB or Ethernet mode.

Add a simple PHY provider that expose the .set_mode OP to toggle the
requested mode for the Serdes port.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 drivers/clk/Kconfig      |   1 +
 drivers/clk/clk-en7523.c | 207 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 205 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index b2efbe9f6acb..e60a824b5117 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -221,6 +221,7 @@ config COMMON_CLK_EN7523
 	bool "Clock driver for Airoha/EcoNet SoC system clocks"
 	depends on OF
 	depends on ARCH_AIROHA || ECONET || COMPILE_TEST
+	select GENERIC_PHY
 	default ARCH_AIROHA
 	help
 	  This driver provides the fixed clocks and gates present on Airoha
diff --git a/drivers/clk/clk-en7523.c b/drivers/clk/clk-en7523.c
index 1ab0e2eca5d3..58ec071388a4 100644
--- a/drivers/clk/clk-en7523.c
+++ b/drivers/clk/clk-en7523.c
@@ -6,6 +6,8 @@
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/reset-controller.h>
@@ -14,6 +16,7 @@
 #include <dt-bindings/reset/airoha,en7581-reset.h>
 #include <dt-bindings/clock/econet,en751221-scu.h>
 #include <dt-bindings/reset/econet,en751221-scu.h>
+#include <dt-bindings/soc/airoha,scu-ssr.h>
 
 #define RST_NR_PER_BANK			32
 
@@ -40,9 +43,22 @@
 #define   REG_HIR_MASK			GENMASK(31, 16)
 /* EN7581 */
 #define REG_NP_SCU_PCIC			0x88
+#define REG_NP_SCU_SSR3			0x94
+#define REG_SSUSB_HSGMII_SEL_MASK	BIT(29)
+#define REG_SSUSB_HSGMII_SEL_HSGMII	FIELD_PREP_CONST(REG_SSUSB_HSGMII_SEL_MASK, 0x0)
+#define REG_SSUSB_HSGMII_SEL_USB	FIELD_PREP_CONST(REG_SSUSB_HSGMII_SEL_MASK, 0x1)
 #define REG_NP_SCU_SSTR			0x9c
 #define REG_PCIE_XSI0_SEL_MASK		GENMASK(14, 13)
+#define REG_PCIE_XSI0_SEL_PCIE		FIELD_PREP_CONST(REG_PCIE_XSI0_SEL_MASK, 0x0)
+#define REG_PCIE_XSI0_SEL_XFI		FIELD_PREP_CONST(REG_PCIE_XSI0_SEL_MASK, 0x1)
+#define REG_PCIE_XSI0_SEL_HSGMII	FIELD_PREP_CONST(REG_PCIE_XSI0_SEL_MASK, 0x2)
 #define REG_PCIE_XSI1_SEL_MASK		GENMASK(12, 11)
+#define REG_PCIE_XSI1_SEL_PCIE		FIELD_PREP_CONST(REG_PCIE_XSI1_SEL_MASK, 0x0)
+#define REG_PCIE_XSI1_SEL_XFI		FIELD_PREP_CONST(REG_PCIE_XSI1_SEL_MASK, 0x1)
+#define REG_PCIE_XSI1_SEL_HSGMII	FIELD_PREP_CONST(REG_PCIE_XSI1_SEL_MASK, 0x2)
+#define REG_USB_PCIE_SEL_MASK		BIT(3)
+#define REG_USB_PCIE_SEL_PCIE		FIELD_PREP_CONST(REG_USB_PCIE_SEL_MASK, 0x0)
+#define REG_USB_PCIE_SEL_USB		FIELD_PREP_CONST(REG_USB_PCIE_SEL_MASK, 0x1)
 #define REG_CRYPTO_CLKSRC2		0x20c
 /* EN751221 */
 #define EN751221_REG_SPI_DIV		0x0cc
@@ -81,6 +97,8 @@ enum en_hir {
 	HIR_MAX		= 14,
 };
 
+#define EN_SERDES_PHY_NUM		4
+
 struct en_clk_desc {
 	int id;
 	const char *name;
@@ -113,6 +131,16 @@ struct en_rst_data {
 	struct reset_controller_dev rcdev;
 };
 
+struct en_serdes_phy_instance {
+	struct phy *phy;
+	unsigned int serdes_port;
+};
+
+struct en_clk_priv {
+	void __iomem *base;
+	struct en_serdes_phy_instance *serdes_phys[EN_SERDES_PHY_NUM];
+};
+
 struct en_clk_soc_data {
 	u32 num_clocks;
 	const struct clk_ops pcie_ops;
@@ -830,12 +858,173 @@ static int en7581_reset_register(struct device *dev, void __iomem *base,
 	return devm_reset_controller_register(dev, &rst_data->rcdev);
 }
 
+static int en7581_serdes_phy_set_mode(struct phy *phy, enum phy_mode mode,
+				      int submode)
+{
+	struct en_serdes_phy_instance *instance = phy_get_drvdata(phy);
+	struct en_clk_priv *priv = dev_get_drvdata(phy->dev.parent);
+	u32 reg, mask, sel, val;
+
+	switch (instance->serdes_port) {
+	case AIROHA_SCU_SERDES_PCIE1:
+		reg = REG_NP_SCU_SSTR;
+		mask = REG_PCIE_XSI0_SEL_MASK;
+
+		if (mode != PHY_MODE_ETHERNET && mode != PHY_MODE_PCIE)
+			return -EINVAL;
+
+		if (mode == PHY_MODE_ETHERNET) {
+			switch (submode) {
+			case PHY_INTERFACE_MODE_USXGMII:
+			case PHY_INTERFACE_MODE_10GBASER:
+				sel = REG_PCIE_XSI0_SEL_XFI;
+				break;
+			case PHY_INTERFACE_MODE_SGMII:
+			case PHY_INTERFACE_MODE_1000BASEX:
+			case PHY_INTERFACE_MODE_2500BASEX:
+				sel = REG_PCIE_XSI0_SEL_HSGMII;
+				break;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			sel = REG_PCIE_XSI0_SEL_PCIE;
+		}
+
+		break;
+	case AIROHA_SCU_SERDES_PCIE2:
+		if (mode != PHY_MODE_ETHERNET && mode != PHY_MODE_PCIE)
+			return -EINVAL;
+
+		if (mode == PHY_MODE_ETHERNET) {
+			switch (submode) {
+			case PHY_INTERFACE_MODE_USXGMII:
+			case PHY_INTERFACE_MODE_10GBASER:
+				sel = REG_PCIE_XSI1_SEL_XFI;
+				break;
+			case PHY_INTERFACE_MODE_SGMII:
+			case PHY_INTERFACE_MODE_1000BASEX:
+			case PHY_INTERFACE_MODE_2500BASEX:
+				sel = REG_PCIE_XSI1_SEL_HSGMII;
+				break;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			sel = REG_PCIE_XSI1_SEL_PCIE;
+		}
+
+		break;
+	case AIROHA_SCU_SERDES_USB1:
+		reg = REG_NP_SCU_SSR3;
+		mask = REG_SSUSB_HSGMII_SEL_MASK;
+
+		if (mode != PHY_MODE_ETHERNET && mode != PHY_MODE_USB_DEVICE &&
+		    mode != PHY_MODE_USB_DEVICE_SS)
+			return -EINVAL;
+
+		if (mode == PHY_MODE_ETHERNET)
+			sel = REG_SSUSB_HSGMII_SEL_HSGMII;
+		else
+			sel = REG_SSUSB_HSGMII_SEL_USB;
+
+		break;
+	case AIROHA_SCU_SERDES_USB2:
+		reg = REG_NP_SCU_SSTR;
+		mask = REG_USB_PCIE_SEL_MASK;
+
+		if (mode != PHY_MODE_PCIE && mode != PHY_MODE_USB_DEVICE &&
+		    mode != PHY_MODE_USB_DEVICE_SS)
+			return -EINVAL;
+
+		if (mode == PHY_MODE_PCIE)
+			sel = REG_USB_PCIE_SEL_PCIE;
+		else
+			sel = REG_USB_PCIE_SEL_USB;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	val = readl(priv->base + reg);
+	val &= ~mask;
+	val |= sel;
+	writel(val, priv->base + reg);
+
+	return 0;
+}
+
+static const struct phy_ops en7581_serdes_phy_ops = {
+	.set_mode	= en7581_serdes_phy_set_mode,
+	.owner		= THIS_MODULE,
+};
+
+static struct phy *en7581_serdes_phy_xlate(struct device *dev,
+					   const struct of_phandle_args *args)
+{
+	struct en_clk_priv *priv = dev_get_drvdata(dev);
+	struct en_serdes_phy_instance *instance;
+	unsigned int serdes_port;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid number of cells in 'phy' property\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	serdes_port = args->args[0];
+	if (serdes_port >= EN_SERDES_PHY_NUM) {
+		dev_err(dev, "invalid serdes port: %d\n", serdes_port);
+		return ERR_PTR(-EINVAL);
+	}
+
+	instance = priv->serdes_phys[serdes_port];
+	if (!instance) {
+		dev_err(dev, "failed to find appropriate serdes phy\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return instance->phy;
+}
+
+static int en7581_serdes_phy_register(struct device *dev)
+{
+	struct en_clk_priv *priv = dev_get_drvdata(dev);
+	struct phy_provider *phy_provider;
+	int i;
+
+	for (i = 0; i < EN_SERDES_PHY_NUM; i++) {
+		struct en_serdes_phy_instance *instance;
+
+		instance = devm_kzalloc(dev, sizeof(*instance),
+					GFP_KERNEL);
+		if (!instance)
+			return -ENOMEM;
+
+		instance->phy = devm_phy_create(dev, NULL,
+						&en7581_serdes_phy_ops);
+		if (IS_ERR(instance->phy))
+			return dev_err_probe(dev, PTR_ERR(instance->phy), "failed to create phy\n");
+
+		instance->serdes_port = i;
+		priv->serdes_phys[i] = instance;
+
+		phy_set_drvdata(instance->phy, instance);
+	}
+
+	phy_provider = devm_of_phy_provider_register(dev, en7581_serdes_phy_xlate);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
 static int en7581_clk_hw_init(struct platform_device *pdev,
 			      struct clk_hw_onecell_data *clk_data)
 {
+	struct en_clk_priv *priv = platform_get_drvdata(pdev);
 	struct regmap *map;
 	void __iomem *base;
 	u32 val;
+	int ret;
 
 	map = syscon_regmap_lookup_by_compatible("airoha,en7581-chip-scu");
 	if (IS_ERR(map))
@@ -845,6 +1034,8 @@ static int en7581_clk_hw_init(struct platform_device *pdev,
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
+	priv->base = base;
+
 	en7581_register_clocks(&pdev->dev, clk_data, map, base);
 
 	val = readl(base + REG_NP_SCU_SSTR);
@@ -853,9 +1044,12 @@ static int en7581_clk_hw_init(struct platform_device *pdev,
 	val = readl(base + REG_NP_SCU_PCIC);
 	writel(val | 3, base + REG_NP_SCU_PCIC);
 
-	return en7581_reset_register(&pdev->dev, base, en7581_rst_map,
-				     ARRAY_SIZE(en7581_rst_map),
-				     en7581_rst_ofs);
+	ret = en7581_reset_register(&pdev->dev, base, en7581_rst_map,
+				    ARRAY_SIZE(en7581_rst_map), en7581_rst_ofs);
+	if (ret)
+		return ret;
+
+	return en7581_serdes_phy_register(&pdev->dev);
 }
 
 static enum en_hir get_hw_id(void __iomem *np_base)
@@ -962,16 +1156,23 @@ static int en7523_clk_probe(struct platform_device *pdev)
 	struct device_node *node = pdev->dev.of_node;
 	const struct en_clk_soc_data *soc_data;
 	struct clk_hw_onecell_data *clk_data;
+	struct en_clk_priv *priv;
 	int r;
 
 	soc_data = device_get_match_data(&pdev->dev);
 
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
 	clk_data = devm_kzalloc(&pdev->dev,
 				struct_size(clk_data, hws, soc_data->num_clocks),
 				GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, priv);
+
 	clk_data->num = soc_data->num_clocks;
 	r = soc_data->hw_init(pdev, clk_data);
 	if (r)
-- 
2.53.0



^ permalink raw reply related

* [PATCH v7 3/6] dt-bindings: phy: Add documentation for Airoha AN7581 USB PHY
From: Christian Marangi @ 2026-05-19 22:08 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Brian Masney, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Christian Marangi, Vinod Koul,
	Neil Armstrong, Lorenzo Bianconi, Felix Fietkau, linux-clk,
	devicetree, linux-kernel, linux-arm-kernel, linux-phy
In-Reply-To: <20260519220813.28468-1-ansuelsmth@gmail.com>

Add documentation for Airoha AN7581 USB PHY that describe the USB PHY
for the USB controller.

Airoha AN7581 SoC support a maximum of 2 USB port. The USB 2.0 mode is
always supported. The USB 3.0 mode is optional and depends on the Serdes
mode currently configured on the system for the relevant USB port.

To correctly calibrate, the USB 2.0 port require correct value in
"airoha,usb2-monitor-clk-sel" property. Both the 2 USB 2.0 port permit
selecting one of the 4 monitor clock for calibration (internal clock not
exposed to the system) but each port have only one of the 4 actually
connected in HW hence the correct value needs to be specified in DT
based on board and the physical port. Normally it's monitor clock 1 for
USB1 and monitor clock 2 for USB2.

To correctly setup the Serdes mode attached to the USB 3.0 mode, a phys
property is required with the phandle pointing to the correct Serdes port
provided by the SCU node.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 .../bindings/phy/airoha,an7581-usb-phy.yaml   | 62 +++++++++++++++++++
 MAINTAINERS                                   |  6 ++
 2 files changed, 68 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml

diff --git a/Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml b/Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml
new file mode 100644
index 000000000000..f561cf2a8103
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/airoha,an7581-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha AN7581 SoC USB PHY
+
+maintainers:
+  - Christian Marangi <ansuelsmth@gmail.com>
+
+description: >
+  The Airoha AN7581 SoC USB PHY describes the USB PHY for the USB controller.
+
+  Airoha AN7581 SoC support a maximum of 2 USB port. The USB 2.0 mode is
+  always supported. The USB 3.0 mode is optional and depends on the Serdes
+  mode currently configured on the system for the relevant USB port.
+
+properties:
+  compatible:
+    const: airoha,an7581-usb-phy
+
+  reg:
+    maxItems: 1
+
+  airoha,usb2-monitor-clk-sel:
+    description: Describe what oscillator across the available 4
+      should be selected for USB 2.0 Slew Rate calibration.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1, 2, 3]
+
+  phys:
+    items:
+      - description: phandle to Serdes PHY
+
+  '#phy-cells':
+    description: The cell contains the mode, PHY_TYPE_USB2 or PHY_TYPE_USB3,
+      as defined in dt-bindings/phy/phy.h.
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - airoha,usb2-monitor-clk-sel
+  - '#phy-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/soc/airoha,scu-ssr.h>
+
+    phy@1fac0000 {
+        compatible = "airoha,an7581-usb-phy";
+        reg = <0x1fac0000 0x10000>;
+
+        airoha,usb2-monitor-clk-sel = <1>;
+        phys = <&scu AIROHA_SCU_SERDES_USB1>;
+
+        #phy-cells = <1>;
+    };
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 21c0ef0b9ce5..932044785a39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -771,6 +771,12 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/spi/airoha,en7581-snand.yaml
 F:	drivers/spi/spi-airoha-snfi.c
 
+AIROHA USB PHY DRIVER
+M:	Christian Marangi <ansuelsmth@gmail.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml
+
 AIRSPY MEDIA DRIVER
 L:	linux-media@vger.kernel.org
 S:	Orphan
-- 
2.53.0



^ permalink raw reply related

* [PATCH v7 2/6] dt-bindings: clock: airoha: Add PHY binding for Serdes port
From: Christian Marangi @ 2026-05-19 22:08 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Brian Masney, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Christian Marangi, Vinod Koul,
	Neil Armstrong, Lorenzo Bianconi, Felix Fietkau, linux-clk,
	devicetree, linux-kernel, linux-arm-kernel, linux-phy
In-Reply-To: <20260519220813.28468-1-ansuelsmth@gmail.com>

Add PHY cell property for Serdes port selection. Currently supported only
for Airoha AN7581 SoC, that support up to 4 Serdes port.

The Serdes port can support both PCIe, USB3 or Ethernet mode.

The available Serdes port can be selected following the dt-binding header
in [2].

[2] <include/dt-bindings/soc/airoha,scu-ssr.h>

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 .../devicetree/bindings/clock/airoha,en7523-scu.yaml     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml b/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml
index eb24a5687639..913ddc16182b 100644
--- a/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml
+++ b/Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml
@@ -23,6 +23,7 @@ description: |
 
   All these identifiers can be found in:
   [1]: <include/dt-bindings/clock/en7523-clk.h>.
+  [2]: <include/dt-bindings/soc/airoha,scu-ssr.h>.
 
   The clocks are provided inside a system controller node.
 
@@ -50,6 +51,12 @@ properties:
     description: ID of the controller reset line
     const: 1
 
+  '#phy-cells':
+    description:
+      The first cell indicates the serdes phy number, see [2] for the
+      available serdes port.
+    const: 1
+
 required:
   - compatible
   - reg
@@ -65,6 +72,8 @@ allOf:
         reg:
           minItems: 2
 
+        '#phy-cells': false
+
   - if:
       properties:
         compatible:
-- 
2.53.0



^ permalink raw reply related

* [PATCH v7 1/6] dt-bindings: soc: Add bindings for Airoha SCU Serdes lines
From: Christian Marangi @ 2026-05-19 22:08 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Brian Masney, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Christian Marangi, Vinod Koul,
	Neil Armstrong, Lorenzo Bianconi, Felix Fietkau, linux-clk,
	devicetree, linux-kernel, linux-arm-kernel, linux-phy
In-Reply-To: <20260519220813.28468-1-ansuelsmth@gmail.com>

The Airoha AN7581 SoC can configure the SCU serdes lines for multiple
purpose. For example the Serdes for the USB1 port can be both
used for USB 3.0 operation or for Ethernet. Or the USB2 serdes can both
used for USB 3.0 operation or for PCIe.

The PCIe Serdes can be both used for PCIe operation or Ethernet.

Add bindings to permit correct reference of the different ports in DT,
mostly to differentiate the different supported modes internally to the
drivers.

Values are just symbolic and enumerates the Serdes port with a specific
number for precise reference.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 include/dt-bindings/soc/airoha,scu-ssr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 include/dt-bindings/soc/airoha,scu-ssr.h

diff --git a/include/dt-bindings/soc/airoha,scu-ssr.h b/include/dt-bindings/soc/airoha,scu-ssr.h
new file mode 100644
index 000000000000..33c64844ada3
--- /dev/null
+++ b/include/dt-bindings/soc/airoha,scu-ssr.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef __DT_BINDINGS_AIROHA_SCU_SSR_H
+#define __DT_BINDINGS_AIROHA_SCU_SSR_H
+
+#define AIROHA_SCU_SERDES_PCIE1		0
+#define AIROHA_SCU_SERDES_PCIE2		1
+#define AIROHA_SCU_SERDES_USB1		2
+#define AIROHA_SCU_SERDES_USB2		3
+
+#endif /* __DT_BINDINGS_AIROHA_SCU_SSR_H */
-- 
2.53.0

^ permalink raw reply related

* [PATCH v7 0/6] airoha: an7581: USB support
From: Christian Marangi @ 2026-05-19 22:08 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Brian Masney, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Christian Marangi, Vinod Koul,
	Neil Armstrong, Lorenzo Bianconi, Felix Fietkau, linux-clk,
	devicetree, linux-kernel, linux-arm-kernel, linux-phy

This is a major rework of the old v2 series.

The SoC always support USB 2.0 but for USB 3.0 it needs additional
configuration for the Serdes port. Such port can be either configured
for USB usage or for PCIe lines or HSGMII and these are configured
in the SCU space.

The previous implementation of a dedicated SSR driver was too
complex and fragile for the simple task of configuring a register
hence it was dropped and the handling is entirely in the PHY driver.

Everything was reducted to the dt-bindings to describe the Serdes line.

Also the property for the PHY are renamed to a more suitable name and
everything is now mandatory to simplify the implementation.
(the PHY are always present and active on the SoC)

Also other unrelated patch are dropped from this series.

Changes v7:
- Rework to double PHY implementation
  (suggested by Rob)
  Now the clk driver expose a PHY for Serdes port
  USB PHY driver selects it
- Rebase on top of linux-next

Changes v6:
- Fix kernel test robot (sparse warning)
  Link: https://lore.kernel.org/all/20260306190156.22297-1-ansuelsmth@gmail.com/

Changes v5:
- Add Ack and Review tag from Connor
- Implement Ethernet support in the USB driver
  (testing support for this Serdes on a special reference board)
- Use an7581 prefix for USB PHY driver
  Link: https://lore.kernel.org/all/20251107160251.2307088-1-ansuelsmth@gmail.com/

Changes v4:
- Rename PCIe and USB PHY to AN7581
- Drop airoha,scu (handled directly in driver)
- Drop dt-bindings for monitor clock in favor of raw values
- Better describe the usage of airoha,usb3-serdes
- Simplify values of dt-bindings SSR SERDES
  Link: https://lore.kernel.org/all/20251107160251.2307088-1-ansuelsmth@gmail.com/

Changes v3:
- Drop clk changes
- Drop SSR driver
- Rename property in Documentation
- Simplify PHY handling
- Move SSR handling inside the PHY driver
  Link: https://lore.kernel.org/all/20251029173713.7670-1-ansuelsmth@gmail.com/

Changes v2:
- Drop changes for simple-mfd
- Rework PHY node structure to single node
- Drop port-id property in favor of serdes-port and
  usb2-monitor-clock-sel
- Make the SSR driver probe from the clock driver

Christian Marangi (6):
  dt-bindings: soc: Add bindings for Airoha SCU Serdes lines
  dt-bindings: clock: airoha: Add PHY binding for Serdes port
  dt-bindings: phy: Add documentation for Airoha AN7581 USB PHY
  clk: en7523: Add support for selecting the Serdes port in SCU
  phy: move and rename Airoha PCIe PHY driver to dedicated directory
  phy: airoha: Add support for Airoha AN7581 USB PHY

 .../bindings/clock/airoha,en7523-scu.yaml     |    9 +
 .../bindings/phy/airoha,an7581-usb-phy.yaml   |   62 +
 MAINTAINERS                                   |   11 +-
 drivers/clk/Kconfig                           |    1 +
 drivers/clk/clk-en7523.c                      |  207 ++-
 drivers/phy/Kconfig                           |   11 +-
 drivers/phy/Makefile                          |    4 +-
 drivers/phy/airoha/Kconfig                    |   23 +
 drivers/phy/airoha/Makefile                   |    4 +
 drivers/phy/airoha/phy-an7581-pcie-regs.h     |  494 +++++++
 drivers/phy/airoha/phy-an7581-pcie.c          | 1290 +++++++++++++++++
 drivers/phy/airoha/phy-an7581-usb.c           |  562 +++++++
 include/dt-bindings/soc/airoha,scu-ssr.h      |   11 +
 13 files changed, 2672 insertions(+), 17 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/phy/airoha,an7581-usb-phy.yaml
 create mode 100644 drivers/phy/airoha/Kconfig
 create mode 100644 drivers/phy/airoha/Makefile
 create mode 100644 drivers/phy/airoha/phy-an7581-pcie-regs.h
 create mode 100644 drivers/phy/airoha/phy-an7581-pcie.c
 create mode 100644 drivers/phy/airoha/phy-an7581-usb.c
 create mode 100644 include/dt-bindings/soc/airoha,scu-ssr.h

-- 
2.53.0



^ permalink raw reply

* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
From: Barry Song @ 2026-05-19 22:01 UTC (permalink / raw)
  To: Liam R. Howlett
  Cc: Suren Baghdasaryan, Lorenzo Stoakes, Matthew Wilcox, akpm,
	linux-mm, david, vbabka, rppt, mhocko, jack, pfalcato, wanglian,
	chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl, kasong,
	shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
	linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
	Nanzhe Zhao
In-Reply-To: <5zdaa5uv5oj27q3taopl3amops57ouxgyfsdveudz4ovmbdw7z@6lwrlyvmhcp2>

On Tue, May 19, 2026 at 10:17 PM Liam R. Howlett <liam@infradead.org> wrote:
>
> On 26/05/19 05:14AM, Barry Song wrote:
> > On Tue, May 19, 2026 at 3:57 AM Suren Baghdasaryan <surenb@google.com> wrote:
> > >
> > > On Mon, May 18, 2026 at 4:26 AM Barry Song <baohua@kernel.org> wrote:
> > > >
> > > > On Mon, May 18, 2026 at 5:47 PM Lorenzo Stoakes <ljs@kernel.org> wrote:
> > > > >
> > > > > On Sun, May 17, 2026 at 04:45:15PM +0800, Barry Song wrote:
> > [...]
> > > >
> > > > I think we either need to fix `fork()`, or keep the current
> > > > behavior of dropping the VMA lock before performing I/O.
> > >
> > > I see. So, this problem arises from the fact that we are changing the
> > > pagefaults requiring I/O operation to hold VMA lock...
> > > And you want to lock VMA on fork only if vma_is_anonymous(vma) ||
> > > is_cow_mapping(vma->vm_flags). So, we will be blocking page faults for
> > > anonymous and COW VMAs only while holding mmap_write_lock, preventing
> > > any VMA modification. On the surface, that looks ok to me but I might
> > > be missing some corner cases. If nobody sees any obvious issues, I
> > > think it's worth a try.
>
> From Barry's description, I think what he is saying is that the vma
> locking has caused the mmap_lock to become unfair?  I think what is

For now, we do not have this problem. Before per-VMA
locks, we dropped mmap_lock before doing I/O in the
page-fault path and then retried the page fault. After
per-VMA locks, we dropped the VMA lock before doing I/O in
the page-fault path and then retried the page fault.

The problem only starts to exist if we decide to perform
I/O without releasing the VMA lock — which is what Matthew
is suggesting, because it would allow us to rip out a large
amount of page-fault retry code.

> implied is that the per-vma locking may stall mmap_lock writes for
> longer than if the mmap_lock was taken in read mode?  Barry, is that
> correct?

Not the case — the actual situation is (if we modify the
current kernel to perform I/O without releasing VMA read locks):

thread 1 PF: lock vma1 read ----  IO ----- ;
thread 2 PF: lock vma2 read ----- IO ----- ;
thread 3 PF:  lock vma3 read ---- IO ----- ;
thread 4 fork:  mmap_lock_write ---- lock vma1, vma2, vma3 write ;
thread 5 :  take mmap_lock for any read/write reason

Now you can see that thread 4 has to wait for the I/O of
VMA1, VMA2, and VMA3 to complete, and thread 5 then has to
wait for thread 4 to release mmap_lock. Both thread 4 and
thread 5 can become extremely slow, because I/O may be stuck
anywhere in the bio/request queue or filesystem GC.

So now we have two choices:

1. Change fork() to avoid taking the vma write lock for vma1/2/3 where possible;
2. Keep the current kernel behavior and drop the VMA lock before I/O:

thread 1 PF: lock vma1 read; drop vma1 read_lock ----  IO ----- retry PF
thread 2 PF: lock vma2 read; drop vma2 read_lock ----- IO ----- retry PF
thread 3 PF:  lock vma3 read; drop vma3 read_lock ---- IO ----- retry PF

Option 2 is what mainline is currently doing, and what this
patchset also follows. The only difference in this patchset is
that page faults are retried under the VMA read lock, rather
than under mmap_lock as in the current kernel, which is causing
mmap_lock contention.

>
> Since Android is doing something (according to Barry) that should not be
> done (according to Willy), both of these together are causing slow down?

The only thing that would cause slowdown is holding the VMA
lock while performing I/O in the page-fault path, which is not
happening today. It would only happen if we insist on doing I/O
under the VMA lock without changing fork().

>
> >
> > Thanks. Besides the creation of processes via fork(), I
> > am also beginning to worry about the death of processes.
> >
> > One thing that came to my mind this morning
> > is that when lowmemorykiller decides to kill an app, we
> > want the memory to be released as quickly as possible so
> > the new app or user scenario can get memory sooner.
> >
> > In that case, if the app being killed is performing I/O
> > while holding the VMA lock, the unmapping procedure
> > could end up being blocked as well.
> >
> > If we release the VMA lock as we currently do, we allow
> > process exit to proceed.
> >
> > I haven't thought it through very clearly yet, and I
> > may be wrong. I'd like to do more investigation. I hope
> > the apps being killed stay very still, but who knows—we
> > have so many applications in the market.
> >
> > Meanwhile, if you have any comments regarding the death
> > of processes, they would be very welcome.
>
> The oom killer only cleans out anon/not shared vmas [1].  So, what this
> would hold up would be the actual process exit path.  Although that
> would have resources associated with it, the amount of resources should
> be relatively low compared to the amount freed by the oom reaper, right?
>
> The other entry point that's mostly to do with android,
> process_mrelease() [2] will end up in the same  __oom_reap_task_mm()
> function.
>
> So, for the most part, the memory will be freed while the file backed
> vma completes IO and that sounds like the right thing to do anyways.

Thanks very much for your valuable input!
I’m going to run more experiments to dig deeper into this.

>
> Thanks,
> Liam
>
> [1]. https://elixir.bootlin.com/linux/v7.1-rc4/source/mm/oom_kill.c#L547
> [2]. https://elixir.bootlin.com/linux/v6.18.6/source/mm/oom_kill.c#L1210
>

Best Regards
Barry


^ permalink raw reply

* Re: [PATCH] Documentation: KVM: Document guest-visible compatibility expectations
From: David Woodhouse @ 2026-05-19 21:58 UTC (permalink / raw)
  To: Oliver Upton
  Cc: Paolo Bonzini, Marc Zyngier, Will Deacon, Jonathan Corbet,
	Shuah Khan, kvm, Linux Doc Mailing List,
	Kernel Mailing List, Linux, Sean Christopherson, Jim Mattson,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Raghavendra Rao Ananta, Eric Auger, Kees Cook, Arnd Bergmann,
	Nathan Chancellor, linux-arm-kernel, kvmarm, linux-kselftest
In-Reply-To: <agzR2kaJsNa8X9lF@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 6861 bytes --]

On Tue, 2026-05-19 at 14:10 -0700, Oliver Upton wrote:
> On Tue, May 19, 2026 at 03:13:30PM +0100, David Woodhouse wrote:
> > On Tue, 2026-05-19 at 15:53 +0200, Paolo Bonzini wrote:
> > > On Tue, May 19, 2026 at 3:00 PM David Woodhouse <dwmw2@infradead.org> wrote:
> > > > Or some guest configurations which have only ever been tested under KVM
> > > > could have a bug where they *rely* on the registers not being writable,
> > > > and write values which are inconsistent with the rest of their
> > > > configuration. Which breaks the moment those registers become writable.
> > > 
> > > Yeah, just having guests that worked by utter chance - but you still
> > > don't want to break them - is the case that is most likely. Crappy
> > > code that runs only under emulation/virtualization appears with
> > > probability 1 over time.
> > > 
> > > Is this likely in this specific case---probably not, honestly.
> > > Christoffer's patch dates back to 2018 (commit d53c2c29ae0d); *back
> > > then* KVM/Arm was a lot less mature, and people developing for Arm on
> > > vanilla upstream kernels have moved on from Linux 4.19.
> > 
> > It's not really 2018 though. EC2 is still running kernels with that
> > older commit reverted because of the breaking change that it
> > introduced.
> > 
> > So the behaviour corresponding to GICD_IIDR.implementation_rev=1 is
> > still current for *millions* of guests.
> > 
> > I'm now finding that revert in our tree during a *later* kernel upgrade
> > and trying to eliminate it. 
> 
> Still, as far as upstream is concerned this is damn near a decade old.
> Decisions that you or your peers made in the downstream doesn't change
> that.
>
> > And sure, I have given the engineers responsible for that a very hard
> > stare and suggested that they should have fixed it 'properly' in the
> > first place with a patch like the one we're discussing right now.
> > 
> > And they're looking at this thread and saying "haha no, if fixing
> > things properly for Arm is this hard then we'll stick with the crappy
> > approach".
> 
> The appropriate time to ask for accomodation was a *very* long time ago.
> 
> And in the absence of clear evidence of a guest depending on the broken
> IGROUPR behavior, I don't see how the guest-side changes of Christoffer's
> series are any different from the multitude of bug fixes that we take
> every single release cycle. It is an unfortunate bug and I concur with
> Marc that it doesn't seem like the sort of thing a guest could rely
> upon.

I find this concerning, because I've already explained this.

There is a very real possibility of guests simply not *noticing* that
they had bugs in this area, as it didn't *matter* what they wrote to
these registers since it never worked.

There is an even larger possibility of guests having worked around the
original issue by *detecting* whether the registers were actually
writable before choosing to use the alternative groups. And if such a
guest launches on a new kernel and then needs to be rolled back to an
older kernel, that will also break.

> Because it is very much a bug fix, it should've happened without a
> change to the revision number.

No. Changing the revision number in conjunction with the guest-visible
behaviour change is *absolutely* the right thing to do.

> Now, the handling of GICD_IIDR itself is a separate issue. By my count,
> the series broke UAPI on three separate occasions. Before b489edc36169
> IIDR was RAZ/WI from userspace. And of course dd6251e463d3 and d53c2c29ae0d
> changed the revision with no way of restoring the old value.
> 
> And really, IIDR should've *never* been used as a buy in for new UAPI
> because it unnecessarily becomes guest visible. 49a1a2c70a7f ("KVM: arm64:
> vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") is
> a much better example for IIDR going forward as it gates *guest-side*
> behavior.

Yes, 49a1a2c70a7f is the exemplar. The guest-visible behaviour changes,
so we get a new IIDR revision and the ability to preserve the previous
behaviour by setting IIDR to the old value. That is exactly how it
should always be done.

> > I do not want them to be right. I don't think any of us want that.
> > 
> > > I would still lean towards accepting the code considering the limited
> > > complexity of the addition (in fact I like it more now that it uses
> > > IIDR instead of v2_groups_user_writable, but that's taste). 
> > 
> > I'm absolutely prepared to have a separate technical discussion about
> > the v2_groups_user_writable thing for GICv2, which is the second part
> > of that series.
> > 
> > It seems like the right thing to do, and as far as I can tell, this
> > code *never* worked with QEMU. But I'm not sure who even cares about
> > GICv2 any more. I couldn't find hardware and I had to test the whole
> > thing inside qemu-tcg.
> > 
> > But the 'IIDR defaults to 3 but the *behaviour* doesn't match until you
> > explicitly *set* it to 3' thing seemed so *egregiously* wrong to me,
> > that I fixed it anyway.
> 
> Wrong or not, this behavior is documented unambiguously. From the VGICv2
> UAPI documentation:
> 
> """
> Userspace should set GICD_IIDR before setting any other registers (both
> KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
> the expected behavior. Unless GICD_IIDR has been set from userspace, writes
> to the interrupt group registers (GICD_IGROUPR) are ignored.
> """
> 
> I'm not inclined to change that.

That'll all very well... but as far as I can tell, QEMU *doesn't* set
GICD_IIDR, so it still gets the bizarre behaviour where the *guest* can
write the registers, but userspace can't. So it looks like it'll work
except migration will fail. Am I missing something?

But honestly, I don't care one iota about GICv2; I was only trying to
do the cleanup while I was there. Feel free to drop that part entirely.

>  As a way out of this whole mess, can we
> instead:
> 
>  - Allow userspace to set IIDR.Revision to 1
> 
>  - Drop any bug emulation from the handling of IGROUPR registers

It doesn't make sense to allow setting IIDR.Revision to 1 *without* the
one-liner that actually implements the corresponding behaviour change
in the IGROUPR registers. And as explained at least twice now, it's the
behaviour change that's *important* here.

The fact that it's a long-standing bug in KVM which downstream has been
working around for a long time doesn't matter. The unconditional
behavioural change *is* a bug and we should fix it.

>  - Special-case the stupid GICv2 UAPI where IGROUPR are only writable if
>    the VMM has written to IIDR and the revision >= 2

That already *is* a special case, right? And you'd rather leave it as it is?

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5069 bytes --]

^ permalink raw reply

* Re: [PATCH] ASoC: mediatek: mt2701: fix snprintf bounds
From: Rosen Penev @ 2026-05-19 21:57 UTC (permalink / raw)
  To: Mark Brown
  Cc: linux-sound, Liam Girdwood, Jaroslav Kysela, Takashi Iwai,
	Matthias Brugger, AngeloGioacchino Del Regno,
	open list:ARM/Mediatek SoC support,
	moderated list:ARM/Mediatek SoC support,
	moderated list:ARM/Mediatek SoC support
In-Reply-To: <61fa661f-0281-4b58-8042-f3f60cdc3407@sirena.org.uk>

On Tue, May 19, 2026 at 1:33 AM Mark Brown <broonie@kernel.org> wrote:
>
> On Mon, May 18, 2026 at 06:04:40PM -0700, Rosen Penev wrote:
> > For whatever reason, GCC is unable to figure out that i2s_num is a
> > single digit number, with MT2701_BASE_CLK_NUM being the maximum value it
> > represents. Add a min() call to help it out and fix W=1 errors regarding
> > snprintf bounds.
>
> > +     i2s_num = min(MT2701_BASE_CLK_NUM, afe_priv->soc->i2s_num);
> >       /* Get I2S related clocks */
> > -     for (i = 0; i < afe_priv->soc->i2s_num; i++) {
> > +     for (i = 0; i < i2s_num; i++) {
>
> This makes the code rather obscure and will start silently failing if we
> ever get more than 9 clocks, we should at least put a build time assert
> or a warn on in there.
how so? MT2701_BASE_CLK_NUM would increase.


^ permalink raw reply

* Re: [PATCH v3 0/9] KVM: arm64: selftests: Basic nested guest support
From: Oliver Upton @ 2026-05-19 21:31 UTC (permalink / raw)
  To: Wei-Lin Chang
  Cc: linux-kernel, kvm, linux-kselftest, linux-arm-kernel, kvmarm,
	Paolo Bonzini, Shuah Khan, Marc Zyngier, Joey Gouly,
	Suzuki K Poulose, Zenghui Yu, Catalin Marinas, Will Deacon,
	Itaru Kitayama
In-Reply-To: <20260516183003.799058-1-weilin.chang@arm.com>

Hi Wei-Lin,

Thank you very much for the series.

I haven't had time to read through much of this yet, but I noticed
Itaru's comment about leaving stage-1 translation disabled for the L2.

Since selftests expects things like atomics to work we will definitely
need the stage-1 MMU to be enabled w/ Normal mappings (this was broken
in the past [*]). I wonder if we can (ab)use the pre-existing stage-1
tables that the selftests library creates on behalf of the L1. After all,
L1 and L2 are both effectively in the same virtual address space.

[*] https://lore.kernel.org/kvmarm/20250405001042.1470552-1-rananta@google.com/

Thanks,
Oliver

On Sat, May 16, 2026 at 07:29:54PM +0100, Wei-Lin Chang wrote:
> Hi,
> 
> This is v3 of adding basic support for running nested guests (L2) in
> kselftest. This time a framework for enabling stage-2 in the guest is
> added, including the s2_mmu struct, s2 translation control setup, and
> a stage-2 page table generator. Similar to L2 vCPU management, all
> stage-2 work is done in the guest instead of userspace.
> 
> An additional shadow_stage2 test is added which leverages the framework
> to run L2 with stage-2 enabled.
> 
> * Changes from v2 [1]:
> 
>   - Update vm_paddr_t to gpa_t, vm_vaddr_t to gva_t.
> 
>   - Added framework for enabling stage-2 in the guest.
> 
>   - Added shadow_stage2 test.
> 
> Thanks!
> 
> [1]: https://lore.kernel.org/kvmarm/20260412142216.3806482-1-weilin.chang@arm.com/
> 
> Wei-Lin Chang (9):
>   KVM: arm64: selftests: Add GPR save/restore functions for NV
>   KVM: arm64: selftests: Add helpers for guest hypervisors
>   KVM: arm64: selftests: Add hello_nested basic NV selftest
>   KVM: arm64: selftests: Enhance hello_nested test
>   KVM: arm64: selftests: Add shadow_stage2 test
>   KVM: arm64: selftests: shadow_stage2: Allocate L2 stack from dedicated
>     pool
>   KVM: arm64: selftests: shadow_stage2: Check supported stage-2 granule
>     size
>   KVM: arm64: selftests: Add infrastructure for using stage-2 in guest
>   KVM: arm64: selftests: shadow_stage2: Turn on stage-2 translation for
>     the nested guest
> 
>  tools/testing/selftests/kvm/Makefile.kvm      |   5 +
>  .../selftests/kvm/arm64/hello_nested.c        | 134 +++++++++
>  .../selftests/kvm/arm64/shadow_stage2.c       | 165 +++++++++++
>  .../selftests/kvm/include/arm64/nested.h      |  85 ++++++
>  tools/testing/selftests/kvm/lib/arm64/entry.S | 137 +++++++++
>  .../selftests/kvm/lib/arm64/hyp-entry.S       |  77 +++++
>  .../testing/selftests/kvm/lib/arm64/nested.c  | 264 ++++++++++++++++++
>  7 files changed, 867 insertions(+)
>  create mode 100644 tools/testing/selftests/kvm/arm64/hello_nested.c
>  create mode 100644 tools/testing/selftests/kvm/arm64/shadow_stage2.c
>  create mode 100644 tools/testing/selftests/kvm/include/arm64/nested.h
>  create mode 100644 tools/testing/selftests/kvm/lib/arm64/entry.S
>  create mode 100644 tools/testing/selftests/kvm/lib/arm64/hyp-entry.S
>  create mode 100644 tools/testing/selftests/kvm/lib/arm64/nested.c
> 
> -- 
> 2.43.0
> 
> 


^ permalink raw reply

* Re: [PATCH 0/3] arm64: dts: imx: Fix PCIe EP vpcie-supply properties
From: Frank.Li @ 2026-05-19 21:24 UTC (permalink / raw)
  To: s.hauer, kernel, festevam, robh, krzk+dt, conor+dt, hongxing.zhu,
	shawnguo, Sherry Sun
  Cc: Frank Li, imx, linux-arm-kernel, devicetree, linux-kernel
In-Reply-To: <20260509015411.3218700-1-sherry.sun@nxp.com>

From: Frank Li <Frank.Li@nxp.com>


On Sat, 09 May 2026 09:54:08 +0800, Sherry Sun wrote:
> This series fixes PCIe endpoint mode vpcie-supply properties across
> multiple i.MX platforms.
> 
> For PCIe endpoint mode, the vpcie-supply should either control the
> actual M.2 power supply or be omitted if the power is always on and
> uncontrollable.
> 
> [...]

Applied, thanks!

[1/3] arm64: dts: imx8dxl-evk: Remove unnecessary PCIe EP properties
      commit: 538525f34158deb21b782164bde23ec07a353e4a
[2/3] arm64: dts: imx8qxp-mek: Remove unnecessary PCIe EP vpcie-supply
      commit: b5ba136d535f1060322af31c0cf74d85ea19892d
[3/3] arm64: dts: imx95-19x19-evk: Fix PCIe EP vpcie-supply
      commit: 596d0f9f4fefffbf783ab26cfa90cf50f5dd6bb0

Best regards,
-- 
Frank Li <Frank.Li@nxp.com>


^ permalink raw reply

* Re: [RFC PATCH 1/2] KVM: arm64: Introduce S2 walker SKIP return options
From: Oliver Upton @ 2026-05-19 21:21 UTC (permalink / raw)
  To: Leonardo Bras
  Cc: Will Deacon, Marc Zyngier, Joey Gouly, Suzuki K Poulose,
	Zenghui Yu, Catalin Marinas, Fuad Tabba, Raghavendra Rao Ananta,
	linux-arm-kernel, kvmarm, linux-kernel
In-Reply-To: <agx1J1tQFn0cdtBf@devkitleo>

On Tue, May 19, 2026 at 03:35:19PM +0100, Leonardo Bras wrote:
> On Tue, May 19, 2026 at 02:15:41PM +0100, Will Deacon wrote:
> > On Tue, May 19, 2026 at 01:56:48PM +0100, Leonardo Bras wrote:
> > > On Tue, May 19, 2026 at 01:43:37PM +0100, Will Deacon wrote:
> > > > > > I was wondering along similar lines, but maybe it would be useful just
> > > > > > to pass a maximum level to the walker logic? That feels like the most
> > > > > > general case without complicating the existing logic.

FWIW, I had considered this too but decided that it requires a bit more
churn since we cannot rely on zero initialization in the existing
callsites (level 0 is a valid level).

But that's extremely minor.

> > > > > This proposal seems simpler for me to understand, and indeed looks like a 
> > > > > better solution than what I have proposed, taking care of  the 
> > > > > 'already split' case with better performance, as it don't even walk a 
> > > > > single level-3 entry. 
> > > > > 
> > > > > On the 'splitting' case, it also works flawlessly if the memory is given in 
> > > > > level-2 blocks. There is only one case that I would like to address here:
> > > > > 
> > > > > - Memory given in level-1 blocks (say 1GB)
> > > > > - Walker flag says 'walk down to level-2 only'
> > > > > - Split Walker on level-1 will break page down to (up to) level-3 entries.
> > > > > - Walker will continue to be called on level-2 entries, even though it's 
> > > > >   not necessary.
> > > > 
> > > > If you're only visiting leaves, why would it be called on the level-2
> > > > table entries?
> > > > 
> > > 
> > > Because once the leaf is turned into a table by the splitting walker, it 
> > > gets reloaded and walked. This is an excerpt of __kvm_pgtable_visit():
> > 
> > Sorry, I was musing about the semantics after adding something to limit
> > the maximum level. I don't dispute what the current code would do.
> > 
> > > Example:
> > > - Split this level-1 leave:
> > >   - Walker creates the whole structure up to given level (currently 3)
> > >   - Walker returns, gets reloaded, table detected, go down on that one
> > >   - Level 2 entries walked (which is unnecessary)
> > > 
> > > Please let me know if I am misunderstanding something.
> > 
> > I just don't grok why this would happen if we limited the maximum level
> > to '2' _and_ said we only wanted to visit the leaf entries. In that
> > case, I wouldn't expect to descend into any of the L2 table entries
> > (because that would imply going beyond level 2) and I wouldn't expect to
> > be called for the table entries either (because we're only interested in
> > leaves).
> 
> Agree, if we specify to skip level-3 entries, it would only walk up to 
> level-2 entries, but take above example in detail:
> - Split these level-1 leaves, up to level-3 leaves (regular)
>   - INFO: kvm_pgtable_walk will call walker:
>     - only up to level-2 entries (skip level-3)
>     - only on leaf entries
>   - Walk first level-1 leaf, calls walker
>     - walker will split the level-1 leaf in level-3 leaves
>     - walker return from that first level-1 leaf
>   - level-1 leaf is reloaded as a table
>     - level-2 entries of that table are also walked (unnecessary)
>     - on each of the level-2 table entries, level-3 entries are skipped
> 
> To avoid the unecessary walk of the level-2 entries above, we would need to 
> specify 'skip level-2' that could be an issue if we have a mix of level-1 
> and level-2 leaves, as the level-2 leaves in that case would not be split.
> 
> That's why I suggest something like "skip recently created table" as a flag 
> as well, so we can guarantee no newly created table gets walked 
> unecessarily.
> 
> Please help me if I am missing something important.

I'm not sure the added complexity of handling this case perfectly results
in a measurable performance improvement. Just avoiding the level 3
tables would be an exponential reduction (~ 512-8192x) in the number of
walk steps.

Thanks,
Oliver


^ permalink raw reply

* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
From: Barry Song @ 2026-05-19 21:18 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: Suren Baghdasaryan, Matthew Wilcox, akpm, linux-mm, david, liam,
	vbabka, rppt, mhocko, jack, pfalcato, wanglian, chentao,
	lianux.mm, kunwu.chan, liyangouwen1, chrisl, kasong, shikemeng,
	nphamcs, bhe, youngjun.park, linux-arm-kernel, linux-kernel,
	loongarch, linuxppc-dev, linux-riscv, linux-s390, Nanzhe Zhao
In-Reply-To: <agxbq1TxJdniMQT3@lucifer>

On Tue, May 19, 2026 at 8:53 PM Lorenzo Stoakes <ljs@kernel.org> wrote:
>
> On Mon, May 18, 2026 at 12:56:59PM -0700, Suren Baghdasaryan wrote:
>
> > >
> > > I think we either need to fix `fork()`, or keep the current
> > > behavior of dropping the VMA lock before performing I/O.
> >
> > I see. So, this problem arises from the fact that we are changing the
> > pagefaults requiring I/O operation to hold VMA lock...
> > And you want to lock VMA on fork only if vma_is_anonymous(vma) ||
> > is_cow_mapping(vma->vm_flags). So, we will be blocking page faults for
> > anonymous and COW VMAs only while holding mmap_write_lock, preventing
> > any VMA modification. On the surface, that looks ok to me but I might
> > be missing some corner cases. If nobody sees any obvious issues, I
> > think it's worth a try.
>
> Not sure if you noticed but I did raise concerns ;)
>
> I wonder if you've confused the fault path and fork here, as I think Barry has
> been a little unclear on that.

I think I’ve been absolutely clear :-)
We should either stick to the current behavior - drop
the VMA lock before doing I/O, or change fork() so that it
does not wait on vma_start_write().

Before per-VMA locks, page faults dropped mmap_lock before
doing I/O. After per-VMA locks, page faults dropped the
VMA lock before doing I/O. In both cases, fork() would not
wait for I/O in the page-fault path.

Now you guys are suggesting performing I/O while holding
the VMA lock, which means fork() must wait for that I/O to
complete. Since an application can have more than 1000
VMAs, and I/O can be stalled for an unpredictable amount
of time in the bio/request queue or filesystem GC, fork()
could end up blocked on multiple VMAs while taking
vma_start_write() for each of them.

As a result, fork() could hold mmap_lock for a very, very,
very long time. fork() itself would become extremely slow,
and any other task needing mmap_lock would also be blocked
behind it.

>
> What's being suggested in this thread is to fundamentally change fork behaviour
> so it's different from the entire history of the kernel (or - presumably - at
> least recent history :) and permit concurrent page faults to occur on a forking
> process.
>
> I absolutely object to this for being pretty crazy. I mean I'm not sure we
> really want to be simultaneously modifying page tables while invoking
> copy_page_range()? No?

If you object to touching fork(), can you at least accept
keeping the existing behavior of dropping the VMA lock
before doing I/O? If you object to both approaches, then I
really do not know how we can continue :-)

Thanks
Barry

^ permalink raw reply

* Re: [PATCH] arm64: dts: freescale: imx95-toradex-smarc: replace deprecated gpio property
From: Frank.Li @ 2026-05-19 21:18 UTC (permalink / raw)
  To: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, Antoine Gouby
  Cc: Frank Li, devicetree, imx, linux-arm-kernel, linux-kernel,
	Francesco Dolcini, Antoine Gouby
In-Reply-To: <20260508-replace-gpio-property-v1-1-ec67cc64e576@toradex.com>

From: Frank Li <Frank.Li@nxp.com>


On Fri, 08 May 2026 13:26:36 +0200, Antoine Gouby wrote:
> Replace deprecated "gpio" property with "gpios" in
> regulator-vmmc-usdhc2 fixed regulator node.

Applied, thanks!

[1/1] arm64: dts: freescale: imx95-toradex-smarc: replace deprecated gpio property
      commit: f68aa04e6eda5c1c673c6bd1cd76447bee1d03f2

Best regards,
-- 
Frank Li <Frank.Li@nxp.com>


^ permalink raw reply

* Re: [PATCH] Documentation: KVM: Document guest-visible compatibility expectations
From: Oliver Upton @ 2026-05-19 21:10 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Paolo Bonzini, Marc Zyngier, Will Deacon, Jonathan Corbet,
	Shuah Khan, kvm, Linux Doc Mailing List,
	Kernel Mailing List, Linux, Sean Christopherson, Jim Mattson,
	Joey Gouly, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
	Raghavendra Rao Ananta, Eric Auger, Kees Cook, Arnd Bergmann,
	Nathan Chancellor, linux-arm-kernel, kvmarm, linux-kselftest
In-Reply-To: <9d0429ddbe4d8c6993e74237c4395697f80092d6.camel@infradead.org>

On Tue, May 19, 2026 at 03:13:30PM +0100, David Woodhouse wrote:
> On Tue, 2026-05-19 at 15:53 +0200, Paolo Bonzini wrote:
> > On Tue, May 19, 2026 at 3:00 PM David Woodhouse <dwmw2@infradead.org> wrote:
> > > Or some guest configurations which have only ever been tested under KVM
> > > could have a bug where they *rely* on the registers not being writable,
> > > and write values which are inconsistent with the rest of their
> > > configuration. Which breaks the moment those registers become writable.
> > 
> > Yeah, just having guests that worked by utter chance - but you still
> > don't want to break them - is the case that is most likely. Crappy
> > code that runs only under emulation/virtualization appears with
> > probability 1 over time.
> > 
> > Is this likely in this specific case---probably not, honestly.
> > Christoffer's patch dates back to 2018 (commit d53c2c29ae0d); *back
> > then* KVM/Arm was a lot less mature, and people developing for Arm on
> > vanilla upstream kernels have moved on from Linux 4.19.
> 
> It's not really 2018 though. EC2 is still running kernels with that
> older commit reverted because of the breaking change that it
> introduced.
> 
> So the behaviour corresponding to GICD_IIDR.implementation_rev=1 is
> still current for *millions* of guests.
> 
> I'm now finding that revert in our tree during a *later* kernel upgrade
> and trying to eliminate it. 

Still, as far as upstream is concerned this is damn near a decade old.
Decisions that you or your peers made in the downstream doesn't change
that.

> And sure, I have given the engineers responsible for that a very hard
> stare and suggested that they should have fixed it 'properly' in the
> first place with a patch like the one we're discussing right now.
> 
> And they're looking at this thread and saying "haha no, if fixing
> things properly for Arm is this hard then we'll stick with the crappy
> approach".

The appropriate time to ask for accomodation was a *very* long time ago.

And in the absence of clear evidence of a guest depending on the broken
IGROUPR behavior, I don't see how the guest-side changes of Christoffer's
series are any different from the multitude of bug fixes that we take
every single release cycle. It is an unfortunate bug and I concur with
Marc that it doesn't seem like the sort of thing a guest could rely
upon.

Because it is very much a bug fix, it should've happened without a
change to the revision number.

Now, the handling of GICD_IIDR itself is a separate issue. By my count,
the series broke UAPI on three separate occasions. Before b489edc36169
IIDR was RAZ/WI from userspace. And of course dd6251e463d3 and d53c2c29ae0d
changed the revision with no way of restoring the old value.

And really, IIDR should've *never* been used as a buy in for new UAPI
because it unnecessarily becomes guest visible. 49a1a2c70a7f ("KVM: arm64:
vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") is
a much better example for IIDR going forward as it gates *guest-side*
behavior.

> I do not want them to be right. I don't think any of us want that.
> 
> > I would still lean towards accepting the code considering the limited
> > complexity of the addition (in fact I like it more now that it uses
> > IIDR instead of v2_groups_user_writable, but that's taste). 
> 
> I'm absolutely prepared to have a separate technical discussion about
> the v2_groups_user_writable thing for GICv2, which is the second part
> of that series.
> 
> It seems like the right thing to do, and as far as I can tell, this
> code *never* worked with QEMU. But I'm not sure who even cares about
> GICv2 any more. I couldn't find hardware and I had to test the whole
> thing inside qemu-tcg.
> 
> But the 'IIDR defaults to 3 but the *behaviour* doesn't match until you
> explicitly *set* it to 3' thing seemed so *egregiously* wrong to me,
> that I fixed it anyway.

Wrong or not, this behavior is documented unambiguously. From the VGICv2
UAPI documentation:

"""
Userspace should set GICD_IIDR before setting any other registers (both
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
to the interrupt group registers (GICD_IGROUPR) are ignored.
"""

I'm not inclined to change that. As a way out of this whole mess, can we
instead:

 - Allow userspace to set IIDR.Revision to 1

 - Drop any bug emulation from the handling of IGROUPR registers

 - Special-case the stupid GICv2 UAPI where IGROUPR are only writable if
   the VMM has written to IIDR and the revision >= 2

Thanks,
Oliver

^ permalink raw reply

* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
From: Yang Shi @ 2026-05-19 21:02 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: Barry Song, Matthew Wilcox, surenb, akpm, linux-mm, david, liam,
	vbabka, rppt, mhocko, jack, pfalcato, wanglian, chentao,
	lianux.mm, kunwu.chan, liyangouwen1, chrisl, kasong, shikemeng,
	nphamcs, bhe, youngjun.park, linux-arm-kernel, linux-kernel,
	loongarch, linuxppc-dev, linux-riscv, linux-s390, Nanzhe Zhao
In-Reply-To: <CAHbLzkpjOLrdRSUF3-G4d9uz6tn6QYwgB66UU9bud5WHr5FWEw@mail.gmail.com>

On Tue, May 19, 2026 at 11:41 AM Yang Shi <shy828301@gmail.com> wrote:
>
> On Tue, May 19, 2026 at 6:39 AM Lorenzo Stoakes <ljs@kernel.org> wrote:
> >
> > On Tue, May 19, 2026 at 02:12:10PM +0100, Lorenzo Stoakes wrote:
> > > On Mon, May 18, 2026 at 02:21:14PM -0700, Yang Shi wrote:
> > > > Maybe a little bit off topic. This is an interesting idea. It seems
> > > > possible we don't have to take vma write lock unconditionally. IIUC
> > > > the write lock is mainly used to serialize against page fault and
> > > > madvise, right? I got a crazy idea off the top of my head. We may be
> > >
> > > Err no, it serialises against literally any modification or read of any
> > > characteristic of VMAs.
>
> If I remember correctly, you are not supposed to change VMA
> flags/size/mm pointer/vm_file/pgoff/prot, etc, under read vma lock or
> read mmap_lock.
>
> > >
> > > > able to just take vma write lock iff vma->anon_vma is not NULL.
> > >
> > > Except if we don't take it and vma->anon_vma is NULL, then somebody can
> > > anon_vma_prepare() and change vma->anon_vma midway through a fork and completely
> > > screw up the anon_vma fork hierarchy.
> >
> > correction: this won't happen as per Barry (see - I managed to confuse myself
> > here :), since for vma->anon_vma install we take the mmap read lock.
> >
> > BUT we also have to consider other cases.
> >
> > >
> > > So no.
> > >
> > > >
> > > > First of all, write mmap_lock is held, so the vma can't go or be
> > > > changed under us.
> > >
> > > vma->anon_vma can be changed.
> >
> > Correction: no it can't :)
>
> Yes, vma->anon_vma change should require taking read mmap_lock.
>
> >
> > >
> > > >
> > > > Secondly, if vma->anon_vma is NULL, it basically means either no page
> > > > fault happened or no cow happened, so there is no page table to copy,
> > > > this is also what copy_page_range() does currently. So we can shrink
> > > > the critical section to:
> > >
> > > Firstly, with no VMA write lock, !vma->anon_vma means a fault can race and
> > > secondly copy_page_range() checks vma_needs_copy(), there are other cases - PFN
> > > maps, mixed maps, UFFD W/P (ugh), guard regions.
> > >
> > > So yeah this isn't sufficient.
> >
> > However this is true...
>
> Yes, fault can race with fork. Basically this is actually the purpose
> of this idea. We can have improved page fault scalability. In my
> proposal (take write vma lock if vma->anon_vma is not NULL), the race
> just happens on the VMAs which page fault has not happened on before.

Sorry, this is incorrect. Page fault can't happen on those VMAs
because page fault needs to create anon_vma, but it requires taking
mmap_lock.
If anon_vma is not NULL, vma write lock will serialize against page
fault. So there should be no race with page fault. Removing vma write
lock suggested by Barry may increase race.

Thanks,
Yang

> vma_needs_copy() also skips the VMAs which don't have vma->anon_vma.
> So there is basically no difference in semantics other than more page
> fault races IIUC. It should be safe as long as we can guarantee there
> is no writable PTE point to a shared page after fork.
>
> For guard regions, it can be serialized by vma write lock if
> vma->anon_vma exists. If vma->anon_vma is NULL, it will prepare
> anon_vma, which will take read mmap_lock if I read the code correctly.
>
> I have not investigated UFFD yet.
>
> >
> > >
> > > >
> > > > if (vma->anon_vma) {
> > > >     vma_start_write_killable(src_vma);
> > > >     anon_vma_fork(dst_vma, src_vma);
> > > >     copy_page_range(dst_vma, src_vma);
> > > > }
> > >
> > > Yeah that's totally broken fo reasons above as I said :)
> > >
> > > >
> > > > But page fault can happen before write mmap_lock is taken, when we
> > > > check vma->anon_vma, it is possible it has not been set up yet. But it
> > > > seems to be equivalent to page fault after fork and won't break the
> > > > semantic.
> > >
> > > It will totally break how the anon_vma hierarchy works :) See the links at the
> > > top of https://ljs.io/talks for a link to various slides on anon_vma behaviour
> > > (it's really a pain to think about because it's a super broken abstraction).
> > >
> > > You could end up with a CoW mapping that's unreachable from rmap and you could
> > > get some nasty issues with page table entries pointing at freed folios :)
> >
> > Correction: actually we should be safe given mmap read lock on anon_vma install.
> >
> > >
> > > >
> > > > Anyway, just a crazy idea, I may miss some corner cases.
> > >
> > > Yeah sorry to push back here but this is just not a viable approach.
>
> No worries. Thanks for all the feedback. Just tried to explore whether
> such an idea is feasible or not.
>
> > >
> > > And this is forgetting that we have relied on page faults being blocked by fork
> > > _forever_, who knows what else has baked in assumptions about that
> > > serialisation.
> > >
> > > Forking is one of the nastiest parts of mm and has had multiple, subtle, corner
> > > case breakages that have been a nightmare to deal with.
>
> Yes, this might be the biggest concern. The page fault can race with
> fork. If some applications rely on such subtle behavior, it may break,
> but such applications are fragile too.
>
> > >
> > > So I'm very much against changing this behaviour to try to fix something in the
> > > fault path.
> > >
> > > We should address the fault path issues in the fault path :)
>
> Yeah, this idea was inspired by Barry's "not take vma read lock
> unconditionally" idea. Maybe irrelevant to Barry's priority inversion
> problem, just an idea for further optimization on page fault
> scalability. This probably should be a separate topic.
>
> Thanks,
> Yang
>
> >
> > Above still all true though.
> >
> > >
> > > >
> > > > Thanks,
> > > > Yang
> > > >
> > > > }
> > > >
> > > > >
> > > > > Based on the above, we may want to re-check whether fork()
> > > > > can be blocked by page faults. At the same time, if Suren,
> > > > > you, or anyone else has any comments, please feel free to
> > > > > share them.
> > > > >
> > > > > Best Regards
> > > > > Barry
> > > > >
> > >
> > > Cheers, Lorenzo
> >
> > So still a nope :)
> >
> > Cheers, Lorenzo


^ permalink raw reply

* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
From: Yang Shi @ 2026-05-19 20:53 UTC (permalink / raw)
  To: Barry Song
  Cc: Matthew Wilcox, surenb, akpm, linux-mm, david, ljs, liam, vbabka,
	rppt, mhocko, jack, pfalcato, wanglian, chentao, lianux.mm,
	kunwu.chan, liyangouwen1, chrisl, kasong, shikemeng, nphamcs, bhe,
	youngjun.park, linux-arm-kernel, linux-kernel, loongarch,
	linuxppc-dev, linux-riscv, linux-s390, Nanzhe Zhao
In-Reply-To: <CAHbLzkpbQ4Ca0xC74BJ6iJUVKdpH90qE+E=g7HkDXvcNH1+8+w@mail.gmail.com>

On Tue, May 19, 2026 at 11:50 AM Yang Shi <shy828301@gmail.com> wrote:
>
> On Tue, May 19, 2026 at 4:07 AM Barry Song <baohua@kernel.org> wrote:
> >
> > On Tue, May 19, 2026 at 5:21 AM Yang Shi <shy828301@gmail.com> wrote:
> > >
> > > On Sun, May 17, 2026 at 1:45 AM Barry Song <baohua@kernel.org> wrote:
> > > >
> > > > On Sat, May 2, 2026 at 1:58 AM Matthew Wilcox <willy@infradead.org> wrote:
> > > > >
> > > > > On Sat, May 02, 2026 at 01:44:34AM +0800, Barry Song wrote:
> > > > > > On Fri, May 1, 2026 at 10:57 PM Matthew Wilcox <willy@infradead.org> wrote:
> > > > > > >
> > > > > > > On Fri, May 01, 2026 at 06:49:58AM +0800, Barry Song wrote:
> > > > > > > > 1. There is no deterministic latency for I/O completion. It depends on
> > > > > > > > both the hardware and the software stack (bio/request queues and the
> > > > > > > > block scheduler). Sometimes the latency is short; at other times it can
> > > > > > > > be quite long. In such cases, a high-priority thread performing operations
> > > > > > > > such as mprotect, unmap, prctl_set_vma, or madvise may be forced to wait
> > > > > > > > for an unpredictable amount of time.
> > > > > > >
> > > > > > > But does that actually happen?  I find it hard to believe that thread A
> > > > > > > unmaps a VMA while thread B is in the middle of taking a page fault in
> > > > > > > that same VMA.  mprotect() and madvise() are more likely to happen, but
> > > > > > > it still seems really unlikely to me.
> > > > > >
> > > > > > It doesn’t have to involve unmapping or applying mprotect to
> > > > > > the entire VMA—just a portion of it is sufficient.
> > > > >
> > > > > Yes, but that still fails to answer "does this actually happen".  How much
> > > > > performance is all this complexity in the page fault handler buying us?
> > > > > If you don't answer this question, I'm just going to go in and rip it
> > > > > all out.
> > > > >
> > > >
> > > > Hi Matthew (and Lorenzo, Jan, and anyone else who may be
> > > > waiting for answers),
> > > >
> > > > As promised during LSF/MM/BPF, we conducted thorough
> > > > testing on Android phones to determine whether performing
> > > > I/O in `filemap_fault()` can block `vma_start_write()`.
> > > > I wanted to give a quick update on this question.
> > > >
> > > > Nanzhe at Xiaomi created tracing scripts and ran various
> > > > applications on Android devices with I/O performed under
> > > > the VMA lock in `filemap_fault()`. We found that:
> > > >
> > > > 1. There are very few cases where unmap() is blocked by
> > > >    page faults. I assume this is due to buggy user code
> > > >    or poor synchronization between reads and unmap().
> > > > So I assume it is not a problem.
> > > >
> > > > 2. We observed many cases where `vma_start_write()`
> > > >    is blocked by page-fault I/O in some applications.
> > > >    The blocking occurs in the `dup_mmap()` path during
> > > >    fork().
> > > >
> > > > With Suren's commit fb49c455323ff ("fork: lock VMAs of
> > > > the parent process when forking"), we now always hold
> > > > `vma_write_lock()` for each VMA. Note that the
> > > > `mmap_lock` write lock is also held, which could lead to
> > > > chained waiting if page-fault I/O is performed without
> > > > releasing the VMA lock.
> > > >
> > > > My gut feeling is that Suren's commit may be overshooting,
> > > > so my rough idea is that we might want to do something like
> > > > the following (we haven't tested it yet and it might be
> > > > wrong):
> > > >
> > > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > > index 2311ae7c2ff4..5ddaf297f31a 100644
> > > > --- a/mm/mmap.c
> > > > +++ b/mm/mmap.c
> > > > @@ -1762,7 +1762,13 @@ __latent_entropy int dup_mmap(struct mm_struct
> > > > *mm, struct mm_struct *oldmm)
> > > >         for_each_vma(vmi, mpnt) {
> > > >                 struct file *file;
> > > >
> > > > -               retval = vma_start_write_killable(mpnt);
> > > > +               /*
> > > > +                * For anonymous or writable private VMAs, prevent
> > > > +                * concurrent CoW faults.
> > > > +                */
> > > > +               if (!mpnt->vm_file || (!(mpnt->vm_flags & VM_SHARED) &&
> > > > +                                       (mpnt->vm_flags & VM_WRITE)))
> > > > +                       retval = vma_start_write_killable(mpnt);
> > > >                 if (retval < 0)
> > > >                         goto loop_out;
> > > >                 if (mpnt->vm_flags & VM_DONTCOPY) {
> > >
> > > Maybe a little bit off topic. This is an interesting idea. It seems
> > > possible we don't have to take vma write lock unconditionally. IIUC
> > > the write lock is mainly used to serialize against page fault and
> > > madvise, right? I got a crazy idea off the top of my head. We may be
> > > able to just take vma write lock iff vma->anon_vma is not NULL.
> > >
> > > First of all, write mmap_lock is held, so the vma can't go or be
> > > changed under us.
> > >
> > > Secondly, if vma->anon_vma is NULL, it basically means either no page
> > > fault happened or no cow happened, so there is no page table to copy,
> > > this is also what copy_page_range() does currently. So we can shrink
> > > the critical section to:
> > >
> > > if (vma->anon_vma) {
> > >     vma_start_write_killable(src_vma);
> > >     anon_vma_fork(dst_vma, src_vma);
> > >     copy_page_range(dst_vma, src_vma);
> > > }
> > >
> > > But page fault can happen before write mmap_lock is taken, when we
> > > check vma->anon_vma, it is possible it has not been set up yet. But it
> > > seems to be equivalent to page fault after fork and won't break the
> > > semantic.
> >
> > Re-reading Suren's commit log for fb49c455323ff8
> > ("fork: lock VMAs of the parent process when forking"),
> > it seems that vm_start_write() is used to protect
> > against a race where anon_vma changes from NULL to
> > non-NULL during fork. In that scenario, we hold the
> > mmap_lock write lock, but not vma_start_write(), so a
> > concurrent anon_vma_prepare() could still install an
> > anon_vma.
> >
> > "    A concurrent page fault on a page newly marked read-only by the page
> >     copy might trigger wp_page_copy() and a anon_vma_prepare(vma) on the
> >     source vma, defeating the anon_vma_clone() that wasn't done because the
> >     parent vma originally didn't have an anon_vma, but we now might end up
> >     copying a pte entry for a page that has one.
> > "
> >
> > If that is the case, then your change does not work.
> >
> > Nowadays, nobody calls anon_vma_prepare(vma) directly.
> > Instead, vmf_anon_prepare() is used, and we always
> > require the mmap_lock read lock before calling
> > __anon_vma_prepare(). As a result, anon_vma cannot
> > transition from NULL to non-NULL during fork.
> >
> > So the original race condition has effectively
> > disappeared.
>
> anon_vma_prepare() has some usecases too, but it seems like it
> requires taking read mmap_lock too if I read the code correctly.
>
> >
> > You also mentioned the madvise() case. If I understand
> > correctly, madvise() should take mmap_lock before
> > modifying anon_vma. Only some parts of madvise() can
> > support per-VMA locking. Therefore, we probably do not
> > need:
> >
> > if (vma->anon_vma) {
> > vma_start_write_killable(src_vma);
> > ...
> > }
>
> I think we still need write vma lock to serialize anon_vma fork
> otherwise we may see:
>
>         CPU 0                                                 CPU 1
> fork                                                       page fault
>    src vma has no anon_vma
>        skip vma fork
>
> allocate anon_vma for src vma
> vma_needs_copy() sees anon_vma
> copy page
>
> Then we may end up being no anon_vma for dst vma, but with pages mapped in it.

Sorry, this should not happen because creating anon_vma in page fault
needs to take mmap_lock.

Thanks,
Yang

>
> Thanks,
> Yang
>
> >
> > >
> > > Anyway, just a crazy idea, I may miss some corner cases.
> >
> > To me, it seems that we could remove vma_start_write()
> > entirely now. Or is that an even crazier idea?
>
>
> >
> > Thanks
> > Barry


^ permalink raw reply

* Re: [PATCH 00/12] crypto: atmel - refactor common i2c support and add SHA256 ahash support
From: Lothar Rubusch @ 2026-05-19 20:47 UTC (permalink / raw)
  To: Thorsten Blum
  Cc: herbert, davem, nicolas.ferre, alexandre.belloni, claudiu.beznea,
	linux-crypto, linux-arm-kernel, linux-kernel
In-Reply-To: <agYnpsiG8bNeVw57@linux.dev>

Hi Thorsten,

(...)
>
> Sashiko [1] also reviewed the series and found potential regressions
> that might be helpful to consider.
>
> Thanks,
> Thorsten
>
> [1] https://sashiko.dev/#/patchset/20260512224349.64621-1-l.rubusch%40gmail.com

FYI

As you asked to consider the Sashiko output, I will paste here the sashiko
review feedback and inlined my answers down below. I may continue with this
still some more iterations against AI/sashiko bot, no problem. How eventually
can I signal you (guys), when I need someone more "human" / maintainer?
What exciting times, we're living in... - let me know, if I use this tool wrong.

[PATCH 01/12] crypto: atmel-ecc - rename driver_data before moving it
into atmel-i2c
> commit 0ed25c0f5044cd5112cafff9ff8ab8248d0e2676
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-ecc - rename driver_data before moving it into atmel-i2c
> This commit renames the local driver_data instance to atmel_i2c_mgmt
> to prepare for moving shared I2C client management infrastructure into
> the atmel-i2c core driver. No functional changes are intended.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index 9660f6426a849..c9f798ebf44fe 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> [ ... ]
> > @@ -323,16 +323,16 @@ static int atmel_ecc_probe(struct i2c_client *client)
> >
> >      i2c_priv = i2c_get_clientdata(client);
> >
> > -    spin_lock(&driver_data.i2c_list_lock);
> > +    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> >      list_add_tail(&i2c_priv->i2c_client_list_node,
> > -              &driver_data.i2c_client_list);
> > -    spin_unlock(&driver_data.i2c_list_lock);
> > +              &atmel_i2c_mgmt.i2c_client_list);
> > +    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> >
> >      ret = crypto_register_kpp(&atmel_ecdh_nist_p256);
> This isn't a bug introduced by this patch, but does adding i2c_priv to the

It is IMHO not very helpful to flag a(ll) particular patch(es) for regressions
in the existing code aside, but ok. Perhaps I can fix something.

> global list before crypto_register_kpp() create a use-after-free risk?
> If algorithm registration fails, the error path removes the client from the
> list and returns an error, leading the driver core to free the i2c_priv
> memory.
> However, during the brief window it was on the list, a concurrent
> atmel_ecc_i2c_client_alloc() could discover the client and assign it to an
> active crypto request.
> When the probe fails and memory is freed, would the concurrent crypto
> request operate on freed memory?
> Also, does calling crypto_register_kpp() with a single static global
> algorithm structure cause memory corruption if multiple devices are probed?
> The crypto core modifies the algorithm structure before taking locks, which
> would corrupt the state of the active algorithm registered by the first
> device.
> The second probe would also fail, breaking the multiple-device load-balancing
> design. Unregistering a single device also unconditionally unregisters the
> shared algorithm.

As soon as being registered, the struct `atmel_ecdh_nist_p256` will require to
access the `atmel_i2c_mgmt.i2c_client_list`. Being it for the selftests, or a
consumer. Thus I cannot simply change the order. I added a ready/state flag to
address the use-after-free risk.

> >      if (ret) {
> > -        spin_lock(&driver_data.i2c_list_lock);
> > +        spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> >          list_del(&i2c_priv->i2c_client_list_node);
> > -        spin_unlock(&driver_data.i2c_list_lock);
> > +        spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> [ ... ]
> > @@ -363,9 +363,9 @@ static void atmel_ecc_remove(struct i2c_client *client)
> >
> >      crypto_unregister_kpp(&atmel_ecdh_nist_p256);
> >
> > -    spin_lock(&driver_data.i2c_list_lock);
> > +    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> >      list_del(&i2c_priv->i2c_client_list_node);
> > -    spin_unlock(&driver_data.i2c_list_lock);
> > +    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> >  }
> This isn't a bug introduced by this patch, but does the lockless check of

Sure, let's continue drive-by-reviewing...

> tfm_count earlier in atmel_ecc_remove() introduce a race condition?
> A concurrent atmel_ecc_i2c_client_alloc() could acquire the list lock and
> increment tfm_count immediately after the check, causing the device to be
> freed while a new session is starting.
> Additionally, if tfm_count is detected as greater than zero, the driver
> initially returns early. This leaves the freed i2c_priv on the global list
> which leads to list corruption and use-after-free on subsequent allocations.
> Returning from remove does not prevent the driver core from freeing the
> devres-managed memory. Should this synchronously block until tfm_count
> reaches zero to safely unbind?
> The dangling list pointer issue is fixed later in the series by commit
> "crypto: atmel-ecc - simplify remove path and relax busy handling", but the
> underlying active-user use-after-free remains unfixed.

I added a global lock with refcnting to address this.

[PATCH 02/12] crypto: atmel - rename atmel_ecc_driver_data to
atmel_i2c_client_mgmt
> Result:
> No issues found.

[PATCH 03/12] crypto: atmel - move i2c client management instance into
core driver
> commit 81ed08d3b64ea6591f1cadc2ccdca81d40c20205
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel - move i2c client management instance into core driver
> This commit moves the atmel_i2c client management instance from the ecc
> driver into the atmel-i2c core driver. This prepares for consolidating
> shared i2c client tracking infrastructure in the core.
> [ ... ]
> > diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
> > index 0e275dbdc8c55..db24f65ae90e8 100644
> > --- a/drivers/crypto/atmel-i2c.c
> > +++ b/drivers/crypto/atmel-i2c.c
> > @@ -21,6 +21,12 @@
> >  #include <linux/workqueue.h>
> >  #include "atmel-i2c.h"
> >
> > +struct atmel_i2c_client_mgmt atmel_i2c_mgmt = {
> > +    .i2c_list_lock = __SPIN_LOCK_UNLOCKED(atmel_i2c_mgmt.i2c_list_lock),
> > +    .i2c_client_list = LIST_HEAD_INIT(atmel_i2c_mgmt.i2c_client_list),
> > +};
> > +EXPORT_SYMBOL_GPL(atmel_i2c_mgmt);
> While this regression wasn't introduced by this patch, does exposing this

Sure..

> globally shared list allow for a time-of-check to time-of-use race in
> atmel_ecc_remove() that can result in a use-after-free?
> In atmel_ecc_remove(), the check if (atomic_read(&i2c_priv->tfm_count)) is
> performed without holding the i2c_list_lock.
> If the count is 0, the function proceeds to call crypto_unregister_kpp().
> During this algorithm unregistration window, the client remains accessible on
> the globally shared atmel_i2c_mgmt.i2c_client_list.
> If a concurrent thread calls crypto_alloc_kpp(), it triggers the .init callback
> atmel_ecdh_init_tfm(), which calls atmel_ecc_i2c_client_alloc().
> The allocator acquires the list lock, discovers the dying client still on the
> list, increments tfm_count, and returns it.
> Then, atmel_ecc_remove() acquires the list lock, removes the client from the
> list, and returns.
> Returning from the remove function causes the devres framework to automatically
> free i2c_priv, leaving the concurrent thread with a tfm bound to freed memory.
> Can this sequence lead to memory corruption?

The two additional patches will cover the remove function.

[PATCH 04/12] crypto: atmel-ecc - simplify probe error handling
> commit 3bca954bd7e39050b3fe5381aad35c915da883f3
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-ecc - simplify probe error handling
> This commit refactors the error path in atmel_ecc_probe to use a goto
> statement instead of duplicating the list removal code. It also claims to
> add comments clarifying client list insertion and algorithm registration.
> > crypto: atmel-ecc - simplify probe error handling
> >
> > Replace early return in atmel_ecc_probe() with explicit error handling
> > using a goto-based cleanup path.
> >
> > Add comments to clarify client list insertion and algorithm registration
> > steps.
> This isn't a bug, but the commit message states it replaces an early
> return and adds comments, yet the diff doesn't show any comments being
> added or an early return being removed. Should the commit message be
> updated to reflect the actual changes?
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index 76fb1d0cf0753..696ab1d51fc67 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> > @@ -328,17 +328,20 @@ static int atmel_ecc_probe(struct i2c_client *client)
> >
> >      ret = crypto_register_kpp(&atmel_ecdh_nist_p256);
> Is it safe to register the static global atmel_ecdh_nist_p256 algorithm
> structure here directly from the probe function? If multiple devices are
> probed, a second call to crypto_register_kpp with the same static struct
> could cause crypto_check_alg to reset the active algorithm's reference
> count, and __crypto_register_alg to re-initialize the cra_users list,
> potentially causing a regression that corrupts the state for active
> crypto users.
> >      if (ret) {
> > -        spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> > -        list_del(&i2c_priv->i2c_client_list_node);
> > -        spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> > -
> >          dev_err(&client->dev, "%s alg registration failed\n",
> >              atmel_ecdh_nist_p256.base.cra_driver_name);
> > +        goto err_list_del;
> By moving the list removal to a goto block after the dev_err call, does this
> code introduce a regression by widening the window for a race condition?
> Since the client is added to the global atmel_i2c_mgmt.i2c_client_list earlier
> in the probe, if registration fails, a concurrent task might find and use the
> failed client. Executing a potentially slow console log before removing the
> client from the list seems to worsen the window for a use-after-free when
> devres cleans up i2c_priv.
> [ ... ]

Patch replaced and merged

[PATCH 05/12] crypto: atmel - factor out i2c client unregistration helper
> commit 67148be9a0cfbd189723055fbc1cdfa19bd244ad
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel - factor out i2c client unregistration helper
> This commit moves the i2c client removal logic into a dedicated helper
> function in the atmel-i2c core. It also converts the ECC driver remove
> path to use this new helper.
> >     Convert ECC driver remove path to use the new helper and
> >     ensure queue flushing is performed after removing the device
> >     from the shared client list.
> Is this description accurate? Looking at the implementation below, the queue
> flushing is performed before removing the device from the list.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index 696ab1d51fc67..e5dd77008b97f 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> [ ... ]
> > @@ -364,9 +364,8 @@ static void atmel_ecc_remove(struct i2c_client *client)
> >
> >      crypto_unregister_kpp(&atmel_ecdh_nist_p256);
> >
> > -    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> > -    list_del(&i2c_priv->i2c_client_list_node);
> > -    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> > +    atmel_i2c_flush_queue();
> > +    atmel_i2c_unregister_client(i2c_priv);
> >  }
> While crypto_unregister_kpp() prevents new lookups, concurrent threads that
> already resolved the algorithm can still execute atmel_ecdh_init_tfm().
> Because atmel_i2c_flush_queue() is called first, it will sleep waiting for
> the workqueue to drain while the client is still present in the global
> atmel_i2c_mgmt.i2c_client_list.
> Could a concurrent atmel_ecdh_init_tfm() call atmel_i2c_client_alloc(),
> find the still-linked client, increment its tfm_count, and assign it to
> ctx->client?
> Once the flush completes, atmel_i2c_unregister_client() removes the client,
> the driver returns, and the device core frees the i2c_priv memory.
> Would this result in a use-after-free on subsequent crypto operations since
> ctx->client now points to freed memory?

Replaced by a more comprehensive dealing with the teardown, patch replaced.

[PATCH 06/12] crypto: atmel-sha204a - add i2c hw client list and
improve probe error handling
> commit 14fd691f797c32576c19b57c937cd6cf75be5ef8
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-sha204a - add i2c hw client list and improve probe error handling
> This patch adds registration of the sha204a i2c client into the shared
> atmel_i2c client management list during probe. This allows the driver to
> participate in the common hardware selection infrastructure.
> > diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
> > index 6e6ac4770416e..cdfdcf2e43a7d 100644
> > --- a/drivers/crypto/atmel-sha204a.c
> > +++ b/drivers/crypto/atmel-sha204a.c
> > @@ -173,6 +173,13 @@ static int atmel_sha204a_probe(struct i2c_client *client)
> >
> >      i2c_priv = i2c_get_clientdata(client);
> >
> > +    /* add to client list */
> > +    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> > +    list_add_tail(&i2c_priv->i2c_client_list_node,
> > +              &atmel_i2c_mgmt.i2c_client_list);
> > +    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> Is it possible for another thread to discover this client before the rest of
> the probe completes?
> If another thread like atmel_i2c_client_alloc() grabs a reference to this
> client from the shared list, and then a subsequent initialization step fails,
> the error path will fail the probe. The devres cleanup will free i2c_priv,
> but the concurrent user will be left with a dangling pointer.
> Also, since the shared atmel_i2c_mgmt.i2c_client_list is used by the
> atmel-ecc driver for hardware selection, could adding the sha204a device
> here cause the atmel-ecc driver to incorrectly dispatch ecdh/ecdsa
> operations to this device?
> Since the selection logic picks the client with the lowest active workload,
> it might select this device and the hardware would reject unsupported
> commands. This was addressed later in the series by commit "crypto: atmel -
> add capability-based i2c client selection".
> Additionally, if the atmel-sha204a module is loaded first, could loading the
> atmel-ecc module later completely wipe out this shared list?
> If the atmel-ecc initialization blindly uses INIT_LIST_HEAD() on this list,
> it would orphan the sha204a clients. This was fixed later in the series by
> commit "crypto: atmel-ecc - switch to module_i2c_driver".
> > +
> > +    /* register rng */
> >      memset(&i2c_priv->hwrng, 0, sizeof(i2c_priv->hwrng));
> >
> >      i2c_priv->hwrng.name = dev_name(&client->dev);
> > @@ -183,15 +190,24 @@ static int atmel_sha204a_probe(struct i2c_client *client)
> >          i2c_priv->hwrng.quality = *quality;
> >
> >      ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
> > -    if (ret)
> > +    if (ret) {
> >          dev_warn(&client->dev, "failed to register rng (%d)\n", ret);
> > +        goto err_list_del;
> > +    }
> >
> >      ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups);
> >      if (ret) {
> >          dev_err(&client->dev, "failed to register sysfs entry\n");
> > -        return ret;
> > +        goto err_list_del;
> >      }
> Is it possible for userspace to queue an asynchronous non-blocking read
> between devm_hwrng_register() and this sysfs_create_group() call?
> If an async rng read is queued to atmel_wq and work_data is allocated, and
> then sysfs_create_group() fails, devres will unwind allocations and free
> i2c_priv.
> Without calling atmel_i2c_flush_queue() in the error path, the pending work
> would execute after i2c_priv is freed. This would also leak the dynamically
> allocated work_data.
> >
> > +    return ret;
> > +
> > +err_list_del:
> > +    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> > +    list_del(&i2c_priv->i2c_client_list_node);
> > +    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> > +
> >      return ret;
> >  }
> Does the atmel_sha204a_remove() function need an update to remove the i2c
> client from the global list?
> Since the client is added during probe, it appears it would be left dangling
> in the list when the device is unbound and devres frees i2c_priv,
> potentially causing a crash if the list is traversed later.
> This was fixed later in the series by commit "crypto: atmel-sha204a -
> switch to module_i2c_driver".

Chronologically this is the wrong order. I moved this patch down to the end.
The issue should be fully covered. In this way, Atmel SHA204a devices are
added to the i2c list, when they can configure not being available for ECDH
operations. This is a preparation for future capabilities
(as in the initial roughly splitted series).

[PATCH 07/12] crypto: atmel-sha204a - switch to module_i2c_driver
> commit e46024ee80d35e3759c2e5365d027bec6fb2ea51
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-sha204a - switch to module_i2c_driver
> Replace custom module init/exit functions with module_i2c_driver() for
> driver registration. Update remove path to unregister the client from the
> shared I2C management list before flushing pending work and cleaning up
> sysfs and hwrng resources.
> > Update remove path to unregister the client from the shared I2C management
> > list before flushing pending work and cleaning up sysfs and hwrng
> > resources.
> >
> > No functional change intended.
> This isn't a bug, but this patch appears to fix a list corruption regression.
> Before this patch, unloading the module would leave a freed i2c_priv structure
> linked into the shared atmel_i2c_mgmt.i2c_client_list. Does adding the
> atmel_i2c_unregister_client() call fix this dangling pointer issue, making
> the "No functional change intended" claim inaccurate?
> > diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
> > index cdfdcf2e43a7d..613ed5e7b3f61 100644
> > --- a/drivers/crypto/atmel-sha204a.c
> > +++ b/drivers/crypto/atmel-sha204a.c
> > @@ -216,6 +216,8 @@ static void atmel_sha204a_remove(struct i2c_client *client)
> >      struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
> >
> >      devm_hwrng_unregister(&client->dev, &i2c_priv->hwrng);
> > +
> > +    atmel_i2c_unregister_client(i2c_priv);
> The commit message states the intent to unregister the client from the shared
> list before cleaning up hwrng resources. Since devm_hwrng_unregister()
> blocks until active synchronous readers complete, does placing
> atmel_i2c_unregister_client() after it leave the client exposed in the shared
> list while the device is actively being torn down?
> Could another subsystem discover and bind to this partially removed client
> during the blocking window, resulting in memory corruption once the unbind
> completes?
> Should atmel_i2c_unregister_client() be moved to the beginning of the remove
> function to match the design described in the commit message?
> >      atmel_i2c_flush_queue();
> >
> >      sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups);
> Additionally, I noticed a potential use-after-free regression in the probe
> error path.
> In atmel_sha204a_probe(), if sysfs_create_group() fails, the function
> returns an error:
> drivers/crypto/atmel-sha204a.c:atmel_sha204a_probe() {
> [ ... ]
>     ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
>     if (ret) {
>         dev_warn(&client->dev, "failed to register RNG (%d)\n", ret);
>         goto err_list_del;
>     }
>     ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups);
>     if (ret) {
>         dev_err(&client->dev, "failed to register sysfs entry\n");
>         goto err_list_del;
>     }
>     return ret;
> err_list_del:
>     spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
>     list_del(&i2c_priv->i2c_client_list_node);
>     spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
>     return ret;
> }
> At this point, the RNG has already been registered and exposed to user-space
> via devm_hwrng_register(). A concurrent user-space read could have initiated
> an asynchronous read, which queues a work item onto atmel_wq.
> When probe() returns an error, devres will immediately unregister the RNG and
> free the i2c_priv structure. Since the pending work on atmel_wq is never
> flushed or canceled, will it dereference the freed i2c_priv when it
> eventually executes?
> Should atmel_i2c_flush_queue() be explicitly called in the probe error path,
> just as it is in remove()?
> I also noticed a potential kernel heap memory leak to user-space in the
> asynchronous read path.
> In atmel_sha204a_rng_done(), if an asynchronous I2C transaction fails,
> the buffer is unconditionally cached:
> drivers/crypto/atmel-sha204a.c:atmel_sha204a_rng_done() {
> [ ... ]
>     if (status)
>         dev_warn_ratelimited(&i2c_priv->client->dev,
>                      "i2c transaction failed (%d)\n",
>                      status);
>     rng->priv = (unsigned long)work_data;
> [ ... ]
> }
> If status != 0, the work_data->cmd.data buffer is left uninitialized or
> partially written.
> On the next call to atmel_sha204a_rng_read_nonblocking(), the code blindly
> copies up to 32 bytes of this buffer:
> drivers/crypto/atmel-sha204a.c:atmel_sha204a_rng_read_nonblocking() {
> [ ... ]
>     if (rng->priv) {
>         work_data = (struct atmel_i2c_work_data *)rng->priv;
>         max = min(RANDOM_RSP_SIZE - CMD_OVERHEAD_SIZE, max);
>         memcpy(data, &work_data->cmd.data[RSP_DATA_IDX], max);
>         rng->priv = 0;
>     } else {
>         work_data = kmalloc_obj(*work_data, GFP_ATOMIC);
> [ ... ]
> }
> Because work_data is allocated without zeroing memory, does this copy
> uninitialized kernel heap memory into the hwrng output data buffer?
> Could this allow an attacker to leak kernel heap memory to user-space via
> /dev/hwrng by inducing I2C read failures?

Chronologically this is the wrong order. I moved this patch down to the end.
Add different error labels in probe to address failing sysfs_create_group().
Fix kernel heap information leak in a separate patch.

[PATCH 08/12] crypto: atmel-ecc - switch to module_i2c_driver
> commit 1156053e97f2cc049a62750ad595b29dc8a4e921
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> Date:   Sun May 17 18:06:35 2026 +0000
>     crypto: atmel-ecc - switch to module_i2c_driver
>
>     Replace custom module init/exit functions with module_i2c_driver() for
>     simplified driver registration.
>
>     Initialization of the shared I2C client management structure is handled by
>     the core driver and no longer performed in the ECC module init path.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index e5dd77008b97f..dcfc09d244973 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> > @@ -392,21 +392,7 @@ static struct i2c_driver atmel_ecc_driver = {
> >      .id_table    = atmel_ecc_id,
> >  };
> >
> > -static int __init atmel_ecc_init(void)
> > -{
> > -    spin_lock_init(&atmel_i2c_mgmt.i2c_list_lock);
> > -    INIT_LIST_HEAD(&atmel_i2c_mgmt.i2c_client_list);
> > -    return i2c_add_driver(&atmel_ecc_driver);
> > -}
> > -
> > -static void __exit atmel_ecc_exit(void)
> > -{
> > -    atmel_i2c_flush_queue();
> > -    i2c_del_driver(&atmel_ecc_driver);
> > -}
> > -
> > -module_init(atmel_ecc_init);
> > -module_exit(atmel_ecc_exit);
> > +module_i2c_driver(atmel_ecc_driver);
> While this change simplifies driver registration, does this driver have a
> pre-existing regression involving a use-after-free in its remove path?

again, "pre-existing regression". I already addressed this topic above.

> Looking at the driver's remove path, it can return early if the client is
> busy:
> drivers/crypto/atmel-ecc.c:atmel_ecc_remove() {
>     struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
>     if (atomic_read(&i2c_priv->tfm_count)) {
>         dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
>         return;
>     }
>     ...
> }
> When atmel_ecc_remove() returns, the driver core unconditionally frees all
> devres-managed memory, including i2c_priv, which is allocated via
> devm_kmalloc() during probe.
> If there are open crypto transforms holding pointers to this freed memory,
> will subsequently closing a busy transform cause atmel_ecdh_exit_tfm() to
> access the freed i2c_priv->tfm_count and trigger a use-after-free of
> i2c_priv?
> Furthermore, does the early return leave the atmel_ecdh_nist_p256 algorithm
> permanently registered with dangling pointers to module text?
> Additionally, could a time-of-check to time-of-use race occur here?
> If atmel_ecc_remove() sees tfm_count == 0, a concurrent transform allocation
> might acquire the client just before it is removed from the global list,
> leading to the same devres use-after-free.
> Note that later commits like "crypto: atmel-ecc - simplify remove path and
> relax busy handling" remove the early return, but the fundamental
> use-after-free persists because devres still frees i2c_priv while active
> transforms hold references to it.
> Is there also a pre-existing regression in the probe error path that can lead
> to a use-after-free?
> The client is added to the global atmel_i2c_mgmt.i2c_client_list before
> crypto_register_kpp() is called:
> drivers/crypto/atmel-ecc.c:atmel_ecc_probe() {
>     ...
>     spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
>     list_add_tail(&i2c_priv->i2c_client_list_node,
>               &atmel_i2c_mgmt.i2c_client_list);
>     spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
>     ret = crypto_register_kpp(&atmel_ecdh_nist_p256);
>     if (ret) {
>     ...
>         goto err_list_del;
>     }
>     ...
> }
> If registration fails (for example, because another device already registered
> the algorithm), the error path removes the client from the list and returns
> an error. This causes the driver core to free the devm-allocated i2c_priv.
> While the client was briefly in the global list, could a concurrent transform
> allocation acquire it via atmel_ecc_i2c_client_alloc()?
> This would leave the concurrent transform holding a dangling pointer to the
> freed i2c_priv memory, resulting in a use-after-free when cryptographic
> operations are submitted.
> >
> >  MODULE_AUTHOR("Tudor Ambarus");
> >  MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");

All of the issues are supposed to be already addressed here. Let's have a look
into next Sahiko iteration. Patch updated.

[PATCH 09/12] crypto: atmel-ecc - simplify remove path and relax busy handling
> commit e814c428ad784badd861d6adbe0e2dbf99beae12
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-ecc - simplify remove path and relax busy handling
> This commit simplifies the removal path of the atmel-ecc driver by removing
> the early exit logic when active transform users are detected. Instead, it
> issues a warning and continues with the full teardown of the crypto
> registration and device cleanup.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index dcfc09d244973..ce7a2e750ba88 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> > @@ -346,21 +346,14 @@ static int atmel_ecc_probe(struct i2c_client *client)
> >
> >  static void atmel_ecc_remove(struct i2c_client *client)
> >  {
> > -    struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
> > +    struct atmel_i2c_client_priv *i2c_priv;
> >
> > -    /* Return EBUSY if i2c client already allocated. */
> > -    if (atomic_read(&i2c_priv->tfm_count)) {
> [ ... ]
> > -        dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
> > +    i2c_priv = i2c_get_clientdata(client);
> > +    if (WARN_ON(!i2c_priv))
> >          return;
> > -    }
> > +
> > +    if (atomic_read(&i2c_priv->tfm_count))
> > +        dev_warn(&client->dev, "Device is busy, remove it anyhow\n");
> Could this still lead to a use-after-free on i2c_priv?
> The i2c_priv structure is allocated using devres during probe. When
> atmel_ecc_remove() completes, the Linux device model automatically triggers
> devres cleanup, which unconditionally frees i2c_priv.
> If active crypto transforms (indicated by tfm_count > 0) retain pointers to
> the device context, won't they dereference the freed i2c_priv when enqueuing
> new crypto operations or when they are eventually closed?
> >      crypto_unregister_kpp(&atmel_ecdh_nist_p256);
> Does unregistering the statically allocated crypto algorithm while busy
> trigger a warning and corrupt its reference count?
> Because the atmel_ecdh_nist_p256 structure is statically allocated, it has
> no cra_destroy callback. Calling crypto_unregister_alg() while active
> transforms exist (cra_refcnt > 1) deterministically hits the safety check
> WARN_ON(!alg->cra_destroy && refcount_read(&alg->cra_refcnt) != 1) in
> crypto/algapi.c.
> Additionally, if the device is subsequently re-probed, crypto_register_kpp()
> will overwrite the structure's cra_refcnt to 1. When the original, orphaned
> transforms finally close and call crypto_alg_put(), they will decrement the
> newly reset refcount to 0. Future allocations of the algorithm would then
> trigger refcount_inc() on a zeroed refcount, permanently corrupting the
> algorithm's state.
> >
> > -    atmel_i2c_flush_queue();

Patch dropped.

[PATCH 10/12] crypto: atmel-sha204a - guard remove path against
missing client data
> commit 940da01227a9a17cd454a0d965228cb8cdc7880a
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel-sha204a - guard remove path against missing client data
> This commit attempts to prevent dereferencing invalid client state during
> driver removal by adding a NULL check for the I2C client private data.
> It also mentions retrieving the data only after sysfs cleanup.
> > Retrieve the I2C client private data in atmel_sha204a_remove() only
> > after sysfs cleanup and add a NULL check before continuing device
> > teardown.
> Is this description accurate? The patch places the i2c_get_clientdata()
> call at the top of the function and leaves sysfs_remove_group() at the
> bottom, which does not appear to reorder these operations.
> > diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
> > --- a/drivers/crypto/atmel-sha204a.c
> > +++ b/drivers/crypto/atmel-sha204a.c
> > @@ -213,7 +213,11 @@ static int atmel_sha204a_probe(struct i2c_client *client)
> >
> >  static void atmel_sha204a_remove(struct i2c_client *client)
> >  {
> > -    struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
> > +    struct atmel_i2c_client_priv *i2c_priv;
> > +
> > +    i2c_priv = i2c_get_clientdata(client);
> > +    if (WARN_ON(!i2c_priv))
> > +        return;
> Since the driver core guarantees atmel_sha204a_remove() is only called if
> atmel_sha204a_probe() completed successfully, can i2c_priv ever actually be
> NULL here?
> If this early return were to trigger, would it skip the rest of the teardown
> sequence?
> Bypassing mandatory cleanup like sysfs_remove_group() and
> atmel_i2c_flush_queue() would leave dangling sysfs files and uncancelled
> work queues.
> Could the crash this patch attempts to mitigate actually be caused by a
> pre-existing issue in the probe() error path?
> If sysfs_create_group() fails after devm_hwrng_register() succeeds in
> atmel_sha204a_probe(), the function returns an error and devres cleans up
> i2c_priv.
> However, if a concurrent non-blocking read queued asynchronous work to
> atmel_wq, this work is not flushed because atmel_i2c_flush_queue() is
> omitted from the probe error path.
> When atmel_i2c_work_handler() executes later, would it dereference the
> already freed i2c_priv structure?

Patch dropped.

[PATCH 11/12] crypto: atmel - move i2c client selection to core driver
> commit ec87e048a52b48d6d9799216ee15e9f3c0e86cbc
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel - move i2c client selection to core driver
> Moves the I2C client selection logic from the ECC-specific driver into the
> shared Atmel I2C core. This allows the load-balancing logic to be reused
> across other Atmel crypto drivers.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > index ce7a2e750ba88..f877f236552f7 100644
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> [ ... ]
> > -static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
> > -{
> [ ... ]
> > -    return client;
> > -}
> > -
> >  static void atmel_ecc_i2c_client_free(struct i2c_client *client)
> >  {
> >      struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
> > @@ -249,7 +214,7 @@ static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
> This isn't a bug, but the patch exports atmel_i2c_client_alloc() to share
> allocation logic while leaving the corresponding free function static here.
> Since the goal is for other Atmel crypto drivers to use the new shared
> allocation API, won't they be forced to manually access i2c_priv->tfm_count
> to decrement the counter? Should we also move and export a shared free
> function to create a symmetric API lifecycle?
> > diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
> > index 861af52d7a88b..4beab68997c4f 100644
> > --- a/drivers/crypto/atmel-i2c.c
> > +++ b/drivers/crypto/atmel-i2c.c
> [ ... ]
> > +struct i2c_client *atmel_i2c_client_alloc(void)
> > +{
> > +    struct atmel_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
> > +    struct i2c_client *client = ERR_PTR(-ENODEV);
> > +    int min_tfm_cnt = INT_MAX;
> > +    int tfm_cnt;
> > +
> > +    spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> > +
> > +    if (list_empty(&atmel_i2c_mgmt.i2c_client_list)) {
> > +        spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> > +        return ERR_PTR(-ENODEV);
> > +    }
> > +
> > +    list_for_each_entry(i2c_priv, &atmel_i2c_mgmt.i2c_client_list,
> > +                i2c_client_list_node) {
> > +        tfm_cnt = atomic_read(&i2c_priv->tfm_count);
> > +        if (tfm_cnt < min_tfm_cnt) {
> > +            min_tfm_cnt = tfm_cnt;
> > +            min_i2c_priv = i2c_priv;
> > +        }
> > +        if (!min_tfm_cnt)
> > +            break;
> > +    }
> > +
> > +    if (min_i2c_priv) {
> > +        atomic_inc(&min_i2c_priv->tfm_count);
> > +        client = min_i2c_priv->client;
> > +    }
> > +
> > +    spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
> > +
> > +    return client;
> > +}
> Could this result in a use-after-free if the device is unbound?
> If the underlying I2C device is asynchronously unbound while a crypto
> transformation is active, the driver's remove callback executes. The remove
> callback unregisters the algorithm to prevent new transformations, but it
> does not block waiting for active ones to be freed.
> When remove returns, the I2C subsystem frees the i2c_client structure, and
> devres automatically frees the allocated i2c_priv structure. The active
> transformation would retain the dangling ctx->client pointer. Any subsequent
> crypto operation would enqueue a work item that dereferences the freed client
> and accesses freed memory.
> Should we take a reference on the device via get_device(&client->dev) before
> returning it here to ensure it stays alive?

No. atmel_ecc_remove() was reworked, this issue should be addressed now.

> > +EXPORT_SYMBOL(atmel_i2c_client_alloc);
> This isn't a bug, but this function internally acquires locks and iterates
> over atmel_i2c_mgmt, which is exported via EXPORT_SYMBOL_GPL.
> Does exporting this as a non-GPL symbol allow non-GPL modules to bypass the
> GPL restriction of the underlying shared data structure? Should this be
> exported using EXPORT_SYMBOL_GPL to maintain consistency?

Renamed patch title. I changed to EXPORT_SYMBOL_GPL. The alloc and free
functions are now migrated.

[PATCH 12/12] crypto: atmel - add capability-based I2C client selection
> commit 67dd11917ea1c6943f1088bb31e4aa5da0cb31f4
> Author: Lothar Rubusch <l.rubusch@gmail.com>
> crypto: atmel - add capability-based I2C client selection
> This commit introduces capability-based filtering for the I2C client
> allocator. Devices now advertise supported features during probe, and the
> allocator selects clients that match the requested capabilities. This
> prepares the driver for using the hardware beyond a single algorithm type.
> > diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
> > --- a/drivers/crypto/atmel-ecc.c
> > +++ b/drivers/crypto/atmel-ecc.c
> [ ... ]
> > @@ -286,6 +286,8 @@ static int atmel_ecc_probe(struct i2c_client *client)
> >
> >      i2c_priv = i2c_get_clientdata(client);
> >
> > +    i2c_priv->caps = BIT(ATMEL_CAP_ECDH);
> > +
> >      spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
> >      list_add_tail(&i2c_priv->i2c_client_list_node,
> >                &atmel_i2c_mgmt.i2c_client_list);
> This problem wasn't introduced by this patch, but there appears to be a race

Sure

> condition if a device probe fails.
> In atmel_ecc_probe(), the i2c_priv object is added to the global list before
> all initialization steps are complete:
> atmel_ecc_probe()
>     list_add_tail(&i2c_priv->i2c_client_list_node, ...)
>     ...
>     ret = crypto_register_kpp(&atmel_ecdh_nist_p256);
>     if (ret) {
>         goto err_list_del;
> If multiple Atmel ECC devices are present, the second device will fail to
> register the same static algorithm and return an error.
> During this window, could a concurrent call to atmel_i2c_client_alloc()
> select this incompletely probed device for a new crypto transformation?
> If so, when the probe function hits the error path, removes the device from
> the list, and devres frees i2c_priv, the crypto transformation would be left
> with a dangling pointer, potentially causing a use-after-free on subsequent
> operations. This same pattern also appears to be present in
> atmel_sha204a_probe().
> > diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
> > --- a/drivers/crypto/atmel-i2c.c
> > +++ b/drivers/crypto/atmel-i2c.c
> [ ... ]
> > @@ -73,6 +73,9 @@ struct i2c_client *atmel_i2c_client_alloc(void)
> >
> >      list_for_each_entry(i2c_priv, &atmel_i2c_mgmt.i2c_client_list,
> >                  i2c_client_list_node) {
> > +        if (!(i2c_priv->caps & BIT(cap)))
> > +            continue;
> > +
> >          tfm_cnt = atomic_read(&i2c_priv->tfm_count);
> >          if (tfm_cnt < min_tfm_cnt) {
> This problem wasn't introduced by this patch, but it seems active

Already mentioned...

> cryptographic transformations might not properly secure the lifetime of the
> underlying I2C device.
> The atmel_i2c_client_alloc() function increments a local tfm_count to track
> active references, but does not pin the device structure itself or delay its
> destruction.
> If a device is hot-unplugged or manually unbound via sysfs, the driver's
> remove callback executes. Since i2c_priv is tied to the device's lifecycle
> via devres, it will be automatically freed upon returning from the remove
> function. The unbind process does not block waiting for tfm_count to drop to
> zero.
> Does this mean any concurrently active crypto TFM previously allocated via
> atmel_i2c_client_alloc() will maintain a dangling pointer to these freed
> structures?
> When the TFM is later destroyed or performs an operation, it might
> dereference the freed client and execute atomic_dec() on freed memory.

The time-of-check to time-of-use situation should be addressed, as also the
multiple hardware devices competing for the algo structure.

Best,
L


^ permalink raw reply

* [PATCH v2 07/12] crypto: atmel-ecc - switch to module_i2c_driver
From: Lothar Rubusch @ 2026-05-19 20:47 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Remove custom boilerplate module configuration code and convert the module
init/exit paths to use the modern module_i2c_driver() helper macro.

This shortens and simplifies driver initialization. Custom structure setup
is no longer required here since management tracking context initialization
was already safely moved into the atmel-i2c core library module.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-ecc.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 29706e4bfa04..4f27e1caf106 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -401,18 +401,7 @@ static struct i2c_driver atmel_ecc_driver = {
 	.id_table	= atmel_ecc_id,
 };
 
-static int __init atmel_ecc_init(void)
-{
-	return i2c_add_driver(&atmel_ecc_driver);
-}
-
-static void __exit atmel_ecc_exit(void)
-{
-	i2c_del_driver(&atmel_ecc_driver);
-}
-
-module_init(atmel_ecc_init);
-module_exit(atmel_ecc_exit);
+module_i2c_driver(atmel_ecc_driver);
 
 MODULE_AUTHOR("Tudor Ambarus");
 MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 04/12] crypto: atmel - rename atmel_ecc_driver_data to atmel_i2c_client_mgmt
From: Lothar Rubusch @ 2026-05-19 20:47 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Rename struct atmel_ecc_driver_data to atmel_i2c_client_mgmt to reflect its
generic role in shared I2C client tracking and locking. A subsequent change
will move the client management infrastructure into the atmel-i2c core
driver.

No functional changes intended.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-ecc.c | 2 +-
 drivers/crypto/atmel-i2c.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index e5dd166fd785..aa2dde99b2b1 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -26,7 +26,7 @@
 static DEFINE_MUTEX(atmel_ecc_kpp_lock);
 static int atmel_ecc_kpp_refcnt;
 
-static struct atmel_ecc_driver_data atmel_i2c_mgmt;
+static struct atmel_i2c_client_mgmt atmel_i2c_mgmt;
 
 /**
  * struct atmel_ecdh_ctx - transformation context
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index e3b12030f9c4..30ed816814af 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -115,7 +115,7 @@ struct atmel_i2c_cmd {
 #define ECDH_PREFIX_MODE		0x00
 
 /* Used for binding tfm objects to i2c clients. */
-struct atmel_ecc_driver_data {
+struct atmel_i2c_client_mgmt {
 	struct list_head i2c_client_list;
 	spinlock_t i2c_list_lock;
 } ____cacheline_aligned;
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 09/12] crypto: atmel-i2c - implement capability-based client selection
From: Lothar Rubusch @ 2026-05-19 20:48 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Extend the shared I2C client allocation interface to support feature-aware
hardware selection by introducing capability filtering.

Add a 'caps' mask to 'struct atmel_i2c_client_priv' alongside an
'atmel_i2c_capability' enum. The allocator now explicitly filters hardware
nodes by a requested capability bit while retaining the least-loaded device
load-balancing scheme.

Update the ECC driver to advertise ATMEL_CAP_ECDH configuration capability
during probe, and adapt the tfm context setup execution path to request
this specific capability variant. Initialize the bitmask field to zero
inside the SHA204A driver context for now.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-ecc.c     | 4 +++-
 drivers/crypto/atmel-i2c.c     | 6 +++++-
 drivers/crypto/atmel-i2c.h     | 8 +++++++-
 drivers/crypto/atmel-sha204a.c | 2 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 7d090c557330..11ee8ef71b94 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -210,7 +210,7 @@ static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
 	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
 
 	ctx->curve_id = ECC_CURVE_NIST_P256;
-	ctx->client = atmel_i2c_client_alloc();
+	ctx->client = atmel_i2c_client_alloc(ATMEL_CAP_ECDH);
 	if (IS_ERR(ctx->client)) {
 		pr_err("tfm - i2c_client binding failed\n");
 		return PTR_ERR(ctx->client);
@@ -283,6 +283,8 @@ static int atmel_ecc_probe(struct i2c_client *client)
 	i2c_priv = i2c_get_clientdata(client);
 	i2c_priv->ready = false;
 
+	i2c_priv->caps = BIT(ATMEL_CAP_ECDH);
+
 	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
 	list_add_tail(&i2c_priv->i2c_client_list_node,
 		      &atmel_i2c_mgmt.i2c_client_list);
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 4621aa57833f..e6eeba1f6554 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -57,7 +57,7 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd)
 	*__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len)));
 }
 
-struct i2c_client *atmel_i2c_client_alloc(void)
+struct i2c_client *atmel_i2c_client_alloc(enum atmel_i2c_capability cap)
 {
 	struct atmel_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
 	struct i2c_client *client = ERR_PTR(-ENODEV);
@@ -75,6 +75,10 @@ struct i2c_client *atmel_i2c_client_alloc(void)
 			    i2c_client_list_node) {
 		if (!i2c_priv->ready)
 			continue;
+
+		if (!(i2c_priv->caps & BIT(cap)))
+			continue;
+
 		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
 		if (tfm_cnt < min_tfm_cnt) {
 			min_tfm_cnt = tfm_cnt;
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index 6c2d86fd9068..636d21bd1348 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -115,6 +115,10 @@ struct atmel_i2c_cmd {
 #define ECDH_PREFIX_MODE		0x00
 
 /* Used for binding tfm objects to i2c clients. */
+enum atmel_i2c_capability {
+	ATMEL_CAP_ECDH = 0,
+};
+
 struct atmel_i2c_client_mgmt {
 	struct list_head i2c_client_list;
 	spinlock_t i2c_list_lock;
@@ -131,6 +135,7 @@ extern struct atmel_i2c_client_mgmt atmel_i2c_mgmt;
  * @tfm_count           : number of active crypto transformations on i2c client
  * @hwrng               : hold the hardware generated rng
  * @ready               : hw client is ready to use
+ * @caps                : feature capability of the particular driver
  *
  * Reads and writes from/to the i2c client are sequential. The first byte
  * transmitted to the device is treated as the byte size. Any attempt to send
@@ -148,6 +153,7 @@ struct atmel_i2c_client_priv {
 	atomic_t tfm_count ____cacheline_aligned;
 	struct hwrng hwrng;
 	bool ready;
+	u32 caps;
 };
 
 /**
@@ -192,7 +198,7 @@ void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid);
 int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
 			    struct scatterlist *pubkey);
 
-struct i2c_client *atmel_i2c_client_alloc(void);
+struct i2c_client *atmel_i2c_client_alloc(enum atmel_i2c_capability cap);
 void atmel_i2c_client_free(struct i2c_client *client);
 
 void atmel_i2c_deactivate_client(struct atmel_i2c_client_priv *i2c_priv);
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index 6e6ac4770416..3853d2b95449 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -173,6 +173,8 @@ static int atmel_sha204a_probe(struct i2c_client *client)
 
 	i2c_priv = i2c_get_clientdata(client);
 
+	i2c_priv->caps = 0;
+
 	memset(&i2c_priv->hwrng, 0, sizeof(i2c_priv->hwrng));
 
 	i2c_priv->hwrng.name = dev_name(&client->dev);
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 08/12] crypto: atmel-i2c - move shared client allocation logic to core
From: Lothar Rubusch @ 2026-05-19 20:47 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Migrate the I2C client allocation and runtime load-balancing routines out
of the ECC driver code and into the central atmel-i2c core library module.

Export the symmetric lifecycle helper interfaces atmel_i2c_client_alloc()
and atmel_i2c_client_free() using EXPORT_SYMBOL_GPL() to expose a unified
client management API. This consolidation enables the dynamic selection
subsystem (which chooses the least-loaded client device based on the active
transformation count) to be shared by both the ECC driver and upcoming
Atmel crypto modules.

Refactor the ECC driver's transformation context initialization (init_tfm)
and teardown (exit_tfm) paths to use this centralized core API.

No functional change is intended.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-ecc.c | 50 +++-----------------------------------
 drivers/crypto/atmel-i2c.c | 46 +++++++++++++++++++++++++++++++++++
 drivers/crypto/atmel-i2c.h |  3 +++
 3 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 4f27e1caf106..7d090c557330 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -203,50 +203,6 @@ static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
 	return ret;
 }
 
-static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
-{
-	struct atmel_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
-	struct i2c_client *client = ERR_PTR(-ENODEV);
-	int min_tfm_cnt = INT_MAX;
-	int tfm_cnt;
-
-	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
-
-	if (list_empty(&atmel_i2c_mgmt.i2c_client_list)) {
-		spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
-		return ERR_PTR(-ENODEV);
-	}
-
-	list_for_each_entry(i2c_priv, &atmel_i2c_mgmt.i2c_client_list,
-			    i2c_client_list_node) {
-		if (!i2c_priv->ready)
-			continue;
-		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
-		if (tfm_cnt < min_tfm_cnt) {
-			min_tfm_cnt = tfm_cnt;
-			min_i2c_priv = i2c_priv;
-		}
-		if (!min_tfm_cnt)
-			break;
-	}
-
-	if (min_i2c_priv) {
-		atomic_inc(&min_i2c_priv->tfm_count);
-		client = min_i2c_priv->client;
-	}
-
-	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
-
-	return client;
-}
-
-static void atmel_ecc_i2c_client_free(struct i2c_client *client)
-{
-	struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
-
-	atomic_dec(&i2c_priv->tfm_count);
-}
-
 static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
 {
 	const char *alg = kpp_alg_name(tfm);
@@ -254,7 +210,7 @@ static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
 	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
 
 	ctx->curve_id = ECC_CURVE_NIST_P256;
-	ctx->client = atmel_ecc_i2c_client_alloc();
+	ctx->client = atmel_i2c_client_alloc();
 	if (IS_ERR(ctx->client)) {
 		pr_err("tfm - i2c_client binding failed\n");
 		return PTR_ERR(ctx->client);
@@ -264,7 +220,7 @@ static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
 	if (IS_ERR(fallback)) {
 		dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n",
 			alg, PTR_ERR(fallback));
-		atmel_ecc_i2c_client_free(ctx->client);
+		atmel_i2c_client_free(ctx->client);
 		return PTR_ERR(fallback);
 	}
 
@@ -280,7 +236,7 @@ static void atmel_ecdh_exit_tfm(struct crypto_kpp *tfm)
 
 	kfree(ctx->public_key);
 	crypto_free_kpp(ctx->fallback);
-	atmel_ecc_i2c_client_free(ctx->client);
+	atmel_i2c_client_free(ctx->client);
 }
 
 static unsigned int atmel_ecdh_max_size(struct crypto_kpp *tfm)
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index c73ef3cadf0e..4621aa57833f 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -57,6 +57,52 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd)
 	*__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len)));
 }
 
+struct i2c_client *atmel_i2c_client_alloc(void)
+{
+	struct atmel_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
+	struct i2c_client *client = ERR_PTR(-ENODEV);
+	int min_tfm_cnt = INT_MAX;
+	int tfm_cnt;
+
+	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
+
+	if (list_empty(&atmel_i2c_mgmt.i2c_client_list)) {
+		spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	list_for_each_entry(i2c_priv, &atmel_i2c_mgmt.i2c_client_list,
+			    i2c_client_list_node) {
+		if (!i2c_priv->ready)
+			continue;
+		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
+		if (tfm_cnt < min_tfm_cnt) {
+			min_tfm_cnt = tfm_cnt;
+			min_i2c_priv = i2c_priv;
+		}
+		if (!min_tfm_cnt)
+			break;
+	}
+
+	if (min_i2c_priv) {
+		atomic_inc(&min_i2c_priv->tfm_count);
+		client = min_i2c_priv->client;
+	}
+
+	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+
+	return client;
+}
+EXPORT_SYMBOL_GPL(atmel_i2c_client_alloc);
+
+void atmel_i2c_client_free(struct i2c_client *client)
+{
+	struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	atomic_dec(&i2c_priv->tfm_count);
+}
+EXPORT_SYMBOL_GPL(atmel_i2c_client_free);
+
 void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd)
 {
 	cmd->word_addr = COMMAND;
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index 351306c426aa..6c2d86fd9068 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -192,6 +192,9 @@ void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid);
 int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
 			    struct scatterlist *pubkey);
 
+struct i2c_client *atmel_i2c_client_alloc(void);
+void atmel_i2c_client_free(struct i2c_client *client);
+
 void atmel_i2c_deactivate_client(struct atmel_i2c_client_priv *i2c_priv);
 void atmel_i2c_unregister_client(struct atmel_i2c_client_priv *i2c_priv);
 
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 10/12] crypto: atmel-sha204a - integrate into core management tracking
From: Lothar Rubusch @ 2026-05-19 20:48 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Register the SHA204A I2C device instance into the shared atmel_i2c client
management tracking list during the probe phase. This allows the driver to
participate in the central hardware selection infrastructure.

Rework the error-unwind paths inside atmel_sha204a_probe() to prevent stale
entries from remaining in the global tracking structures if a partial
initialization failure occurs. If sysfs group creation fails, explicitly
trigger devm_hwrng_unregister() to preserve the strict lifecycle ordering
introduced in previous stability fixes.

Convert the removal path to use the core teardown helpers. Ensure the
device readiness state is deactivated using atmel_i2c_deactivate_client()
before any local hardware cleanup runs, and call
atmel_i2c_unregister_client() at the end of the sequence to safely drop the
node from global tracking.

No functional change intended beyond improved lifecycle handling.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-sha204a.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index 3853d2b95449..38a269186e2a 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -172,9 +172,15 @@ static int atmel_sha204a_probe(struct i2c_client *client)
 		return ret;
 
 	i2c_priv = i2c_get_clientdata(client);
+	i2c_priv->ready = false;
 
 	i2c_priv->caps = 0;
 
+	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
+	list_add_tail(&i2c_priv->i2c_client_list_node,
+		      &atmel_i2c_mgmt.i2c_client_list);
+	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+
 	memset(&i2c_priv->hwrng, 0, sizeof(i2c_priv->hwrng));
 
 	i2c_priv->hwrng.name = dev_name(&client->dev);
@@ -185,15 +191,28 @@ static int atmel_sha204a_probe(struct i2c_client *client)
 		i2c_priv->hwrng.quality = *quality;
 
 	ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
-	if (ret)
+	if (ret) {
 		dev_warn(&client->dev, "failed to register RNG (%d)\n", ret);
+		goto err_list_del;
+	}
 
 	ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups);
 	if (ret) {
 		dev_err(&client->dev, "failed to register sysfs entry\n");
-		return ret;
+		goto err_hwrng_unregister;
 	}
 
+	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
+	i2c_priv->ready = true;
+	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+
+	return 0;
+
+err_hwrng_unregister:
+	devm_hwrng_unregister(&client->dev, &i2c_priv->hwrng);
+err_list_del:
+	atmel_i2c_unregister_client(i2c_priv);
+
 	return ret;
 }
 
@@ -201,12 +220,13 @@ static void atmel_sha204a_remove(struct i2c_client *client)
 {
 	struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
 
-	devm_hwrng_unregister(&client->dev, &i2c_priv->hwrng);
-	atmel_i2c_flush_queue();
+	atmel_i2c_deactivate_client(i2c_priv);
 
+	devm_hwrng_unregister(&client->dev, &i2c_priv->hwrng);
 	sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups);
-
 	kfree((void *)i2c_priv->hwrng.priv);
+
+	atmel_i2c_unregister_client(i2c_priv);
 }
 
 static const struct of_device_id atmel_sha204a_dt_ids[] = {
@@ -239,7 +259,6 @@ static int __init atmel_sha204a_init(void)
 
 static void __exit atmel_sha204a_exit(void)
 {
-	atmel_i2c_flush_queue();
 	i2c_del_driver(&atmel_sha204a_driver);
 }
 
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 11/12] crypto: atmel-sha204a - fix heap info leak on I2C transfer failure
From: Lothar Rubusch @ 2026-05-19 20:48 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

When a non-blocking read operation is requested, the driver dynamically
allocates memory to track asynchronous transfer status. If the underlying
I2C transmission fails, atmel_sha204a_rng_done() logs a rate-limited
warning but incorrectly proceeds to cache the pointer to this uninitialized
buffer inside the rng->priv data field anyway.

On subsequent execution passes, atmel_sha204a_rng_read_nonblocking()
detects the stale rng->priv value, skips executing a hardware data read,
and copies up to 32 bytes of uninitialized kernel heap data from this
garbage memory pool straight back into the system's hwrng data stream.

Fix this information disclosure vector by immediately releasing the
allocated asynchronous work data buffer and explicitly clearing the
tracking pointer context whenever an I2C transaction returns a non-zero
error status.

Additionally, ensure that tfm counter is decremented within the error path
to prevent reference counter stagnation, which would otherwise leave the
driver in a permanently busy state. Finding by a sashiko side-review.

Fixes: da001fb651b0 ("crypto: atmel-i2c - add support for SHA204A random number generator")
Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-sha204a.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index 38a269186e2a..3d29543032cc 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -31,10 +31,15 @@ static void atmel_sha204a_rng_done(struct atmel_i2c_work_data *work_data,
 	struct atmel_i2c_client_priv *i2c_priv = work_data->ctx;
 	struct hwrng *rng = areq;

-	if (status)
+	if (status) {
 		dev_warn_ratelimited(&i2c_priv->client->dev,
 				     "i2c transaction failed (%d)\n",
 				     status);
+		kfree(work_data);
+		rng->priv = 0;
+		atomic_dec(&i2c_priv->tfm_count);
+		return;
+	}

 	rng->priv = (unsigned long)work_data;
 	atomic_dec(&i2c_priv->tfm_count);
-- 
2.39.5

^ permalink raw reply related

* [PATCH v2 12/12] crypto: atmel-sha204a - switch to module_i2c_driver
From: Lothar Rubusch @ 2026-05-19 20:48 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Replace custom module init/exit functions with module_i2c_driver() for
driver registration.

Update remove path to unregister the client from the shared I2C management
list before flushing pending work and cleaning up sysfs and hwrng
resources.

No functional change intended.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-sha204a.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index 3d29543032cc..c65630a989a5 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -257,18 +257,7 @@ static struct i2c_driver atmel_sha204a_driver = {
 	.driver.of_match_table	= atmel_sha204a_dt_ids,
 };
 
-static int __init atmel_sha204a_init(void)
-{
-	return i2c_add_driver(&atmel_sha204a_driver);
-}
-
-static void __exit atmel_sha204a_exit(void)
-{
-	i2c_del_driver(&atmel_sha204a_driver);
-}
-
-module_init(atmel_sha204a_init);
-module_exit(atmel_sha204a_exit);
+module_i2c_driver(atmel_sha204a_driver);
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_DESCRIPTION("Microchip / Atmel SHA204A (I2C) driver");
-- 
2.39.5



^ permalink raw reply related

* [PATCH v2 06/12] crypto: atmel-i2c - introduce shared teardown helpers and fix queue flush
From: Lothar Rubusch @ 2026-05-19 20:47 UTC (permalink / raw)
  To: thorsten.blum, herbert, davem, nicolas.ferre, alexandre.belloni,
	claudiu.beznea
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, l.rubusch
In-Reply-To: <20260519204803.17034-1-l.rubusch@gmail.com>

Introduce atmel_i2c_deactivate_client() and atmel_i2c_unregister_client()
helpers in the atmel-i2c core library to modularize client teardown. This
encapsulates common client state tracking and list manipulation operations.

Convert the ECC driver's error recovery and device removal paths to utilize
these new helpers, ensuring consistent execution ordering when modifying
device-readiness states and deleting linked-list nodes.

Inside the unregistration helper, use list_empty_careful() to safely
validate the membership state of the individual node before triggering
list_del_init().

Additionally, migrate the atmel_i2c_flush_queue() call out of the module
exit path. It now runs inside the core unregistration helper under the
protection of the management spinlock. This configuration ensures the
shared workqueue is only flushed when the global client list becomes
completely empty, enabling proper scaling for multi-driver setups.

Export both new tracking symbols via EXPORT_SYMBOL_GPL() to match the
existing core driver licensing standard.

Signed-off-by: Lothar Rubusch <l.rubusch@gmail.com>
---
 drivers/crypto/atmel-ecc.c | 13 +++----------
 drivers/crypto/atmel-i2c.c | 22 ++++++++++++++++++++++
 drivers/crypto/atmel-i2c.h |  3 +++
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 33b90667c872..29706e4bfa04 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -336,9 +336,7 @@ static int atmel_ecc_probe(struct i2c_client *client)
 	if (atmel_ecc_kpp_refcnt == 0) {
 		ret = crypto_register_kpp(&atmel_ecdh_nist_p256);
 		if (ret) {
-			spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
-			list_del(&i2c_priv->i2c_client_list_node);
-			spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+			atmel_i2c_unregister_client(i2c_priv);
 
 			dev_err(&client->dev, "%s alg registration failed\n",
 				atmel_ecdh_nist_p256.base.cra_driver_name);
@@ -363,9 +361,7 @@ static void atmel_ecc_remove(struct i2c_client *client)
 {
 	struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
 
-	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
-	i2c_priv->ready = false;
-	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+	atmel_i2c_deactivate_client(i2c_priv);
 
 	/*
 	 * Note, the Linux Crypto Core automatically blocks until all active
@@ -378,9 +374,7 @@ static void atmel_ecc_remove(struct i2c_client *client)
 		crypto_unregister_kpp(&atmel_ecdh_nist_p256);
 	mutex_unlock(&atmel_ecc_kpp_lock);
 
-	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
-	list_del(&i2c_priv->i2c_client_list_node);
-	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+	atmel_i2c_unregister_client(i2c_priv);
 }
 
 static const struct of_device_id atmel_ecc_dt_ids[] = {
@@ -414,7 +408,6 @@ static int __init atmel_ecc_init(void)
 
 static void __exit atmel_ecc_exit(void)
 {
-	atmel_i2c_flush_queue();
 	i2c_del_driver(&atmel_ecc_driver);
 }
 
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index db24f65ae90e..c73ef3cadf0e 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -354,6 +354,28 @@ static int device_sanity_check(struct i2c_client *client)
 	return ret;
 }
 
+void atmel_i2c_deactivate_client(struct atmel_i2c_client_priv *i2c_priv)
+{
+	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
+	i2c_priv->ready = false;
+	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_i2c_deactivate_client);
+
+void atmel_i2c_unregister_client(struct atmel_i2c_client_priv *i2c_priv)
+{
+	spin_lock(&atmel_i2c_mgmt.i2c_list_lock);
+
+	if (!list_empty_careful(&i2c_priv->i2c_client_list_node))
+		list_del_init(&i2c_priv->i2c_client_list_node);
+
+	if (list_empty(&atmel_i2c_mgmt.i2c_client_list))
+		atmel_i2c_flush_queue();
+
+	spin_unlock(&atmel_i2c_mgmt.i2c_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_i2c_unregister_client);
+
 int atmel_i2c_probe(struct i2c_client *client)
 {
 	struct atmel_i2c_client_priv *i2c_priv;
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index d54bd836e0f5..351306c426aa 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -192,4 +192,7 @@ void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid);
 int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
 			    struct scatterlist *pubkey);
 
+void atmel_i2c_deactivate_client(struct atmel_i2c_client_priv *i2c_priv);
+void atmel_i2c_unregister_client(struct atmel_i2c_client_priv *i2c_priv);
+
 #endif /* __ATMEL_I2C_H__ */
-- 
2.39.5



^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox