Linux Documentation
 help / color / mirror / Atom feed
* [PATCH v5 2/7] bus: mhi: Move Sahara protocol driver under MHI host client drivers
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

The Sahara protocol driver currently lives under the QAIC accelerator
subsystem even though the protocol is transported over MHI and is used by
multiple Qualcomm flashless devices. This makes Sahara appear QAIC specific
and complicates reuse by other MHI based devices.

Move the Sahara protocol driver under drivers/bus/mhi as a host client
driver and build it as an independent MHI protocol driver. This keeps the
QAIC driver focused on the accelerator device while allowing other MHI
users to enable Sahara without depending on QAIC.

As part of the move, add a dedicated Kconfig/Makefile hierarchy under the
MHI host client drivers and convert the driver to use module_mhi_driver()
instead of register/unregister hooks.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 drivers/accel/qaic/Kconfig                           |  1 +
 drivers/accel/qaic/Makefile                          |  3 +--
 drivers/accel/qaic/qaic_drv.c                        |  9 ---------
 drivers/accel/qaic/sahara.h                          | 10 ----------
 drivers/bus/mhi/Kconfig                              |  1 +
 drivers/bus/mhi/host/Makefile                        |  1 +
 drivers/bus/mhi/host/clients/Kconfig                 |  5 +++++
 drivers/bus/mhi/host/clients/Makefile                |  1 +
 drivers/bus/mhi/host/clients/sahara/Kconfig          | 15 +++++++++++++++
 drivers/bus/mhi/host/clients/sahara/Makefile         |  2 ++
 .../qaic => bus/mhi/host/clients/sahara}/sahara.c    | 20 +++++++-------------
 11 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
index 116e42d152ca885b8c59e33c7a87519a0abc6bb3..af90fdfcf77eeb6dd5ad309b33d793d4fdc91b1e 100644
--- a/drivers/accel/qaic/Kconfig
+++ b/drivers/accel/qaic/Kconfig
@@ -8,6 +8,7 @@ config DRM_ACCEL_QAIC
 	depends on DRM_ACCEL
 	depends on PCI && HAS_IOMEM
 	depends on MHI_BUS
+	depends on MHI_SAHARA
 	select CRC32
 	select WANT_DEV_COREDUMP
 	help
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 71f727b74da3bb4478324689f02a7cea24a05c2d..e7b8458800072aa627f7f36c3257883aa56f4ce4 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -13,7 +13,6 @@ qaic-y := \
 	qaic_ras.o \
 	qaic_ssr.o \
 	qaic_sysfs.o \
-	qaic_timesync.o \
-	sahara.o
+	qaic_timesync.o
 
 qaic-$(CONFIG_DEBUG_FS) += qaic_debugfs.o
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 63fb8c7b4abcbe4f1b76c32106f4e8b9ea5e2c8e..3907b13e426064f4fa069e803cc44462feea4063 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -32,7 +32,6 @@
 #include "qaic_ras.h"
 #include "qaic_ssr.h"
 #include "qaic_timesync.h"
-#include "sahara.h"
 
 MODULE_IMPORT_NS("DMA_BUF");
 
@@ -791,12 +790,6 @@ static int __init qaic_init(void)
 		goto free_pci;
 	}
 
-	ret = sahara_register();
-	if (ret) {
-		pr_debug("qaic: sahara_register failed %d\n", ret);
-		goto free_mhi;
-	}
-
 	ret = qaic_timesync_init();
 	if (ret)
 		pr_debug("qaic: qaic_timesync_init failed %d\n", ret);
@@ -818,7 +811,6 @@ static int __init qaic_init(void)
 
 free_bootlog:
 	qaic_bootlog_unregister();
-free_mhi:
 	mhi_driver_unregister(&qaic_mhi_driver);
 free_pci:
 	pci_unregister_driver(&qaic_pci_driver);
@@ -847,7 +839,6 @@ static void __exit qaic_exit(void)
 	qaic_ras_unregister();
 	qaic_bootlog_unregister();
 	qaic_timesync_deinit();
-	sahara_unregister();
 	mhi_driver_unregister(&qaic_mhi_driver);
 	pci_unregister_driver(&qaic_pci_driver);
 }
diff --git a/drivers/accel/qaic/sahara.h b/drivers/accel/qaic/sahara.h
deleted file mode 100644
index 640208acc0d13d423bd9220e6861b7c141af74ff..0000000000000000000000000000000000000000
--- a/drivers/accel/qaic/sahara.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */
-
-#ifndef __SAHARA_H__
-#define __SAHARA_H__
-
-int sahara_register(void);
-void sahara_unregister(void);
-#endif /* __SAHARA_H__ */
diff --git a/drivers/bus/mhi/Kconfig b/drivers/bus/mhi/Kconfig
index b39a11e6c624ba00349cca22d74bd876020590ab..720115218c2401c99b29f79bbd4113cd877503ac 100644
--- a/drivers/bus/mhi/Kconfig
+++ b/drivers/bus/mhi/Kconfig
@@ -7,3 +7,4 @@
 
 source "drivers/bus/mhi/host/Kconfig"
 source "drivers/bus/mhi/ep/Kconfig"
+source "drivers/bus/mhi/host/clients/Kconfig"
diff --git a/drivers/bus/mhi/host/Makefile b/drivers/bus/mhi/host/Makefile
index 859c2f38451c669b3d3014c374b2b957c99a1cfe..2e8949f1a2fe6f3f3b2e1dc541f97d2c393d6a0f 100644
--- a/drivers/bus/mhi/host/Makefile
+++ b/drivers/bus/mhi/host/Makefile
@@ -4,3 +4,4 @@ mhi-$(CONFIG_MHI_BUS_DEBUG) += debugfs.o
 
 obj-$(CONFIG_MHI_BUS_PCI_GENERIC) += mhi_pci_generic.o
 mhi_pci_generic-y += pci_generic.o
+obj-$(CONFIG_MHI_BUS) += clients/
diff --git a/drivers/bus/mhi/host/clients/Kconfig b/drivers/bus/mhi/host/clients/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..a4f2a3c1d20c887cc474646ea91532d775a13f57
--- /dev/null
+++ b/drivers/bus/mhi/host/clients/Kconfig
@@ -0,0 +1,5 @@
+menu "MHI host client drivers"
+
+source "drivers/bus/mhi/host/clients/sahara/Kconfig"
+
+endmenu
diff --git a/drivers/bus/mhi/host/clients/Makefile b/drivers/bus/mhi/host/clients/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..62e2fa161565225be7f6a23d3cdf4f2f169cb7ce
--- /dev/null
+++ b/drivers/bus/mhi/host/clients/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MHI_SAHARA) += sahara/
diff --git a/drivers/bus/mhi/host/clients/sahara/Kconfig b/drivers/bus/mhi/host/clients/sahara/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..f1fc7013a2dee0be645c50f32305659e591de7e7
--- /dev/null
+++ b/drivers/bus/mhi/host/clients/sahara/Kconfig
@@ -0,0 +1,15 @@
+config MHI_SAHARA
+	tristate "Sahara protocol driver"
+	depends on MHI_BUS
+	help
+	  Enable support for the Sahara protocol transported over the MHI bus.
+
+	  The Sahara protocol is used to transfer firmware images, retrieve
+	  memory dumps and exchange command mode DDR calibration data between
+	  host and device. This driver is not tied to a specific SoC and may be
+	  used by multiple MHI based devices.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mhi_sahara.
diff --git a/drivers/bus/mhi/host/clients/sahara/Makefile b/drivers/bus/mhi/host/clients/sahara/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..fc02a25935011cbd7138ea8f24b88cf5b032a4ce
--- /dev/null
+++ b/drivers/bus/mhi/host/clients/sahara/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_MHI_SAHARA) += mhi_sahara.o
+mhi_sahara-y := sahara.o
diff --git a/drivers/accel/qaic/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
similarity index 99%
rename from drivers/accel/qaic/sahara.c
rename to drivers/bus/mhi/host/clients/sahara/sahara.c
index fd3c3b2d1fd3bb698809e6ca669128e2dce06613..858dc5bc39c1ad42922cabef3b1abcd43bc4f0f4 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-
-/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ */
 
 #include <linux/devcoredump.h>
 #include <linux/firmware.h>
@@ -13,8 +15,6 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 
-#include "sahara.h"
-
 #define SAHARA_HELLO_CMD		0x1  /* Min protocol version 1.0 */
 #define SAHARA_HELLO_RESP_CMD		0x2  /* Min protocol version 1.0 */
 #define SAHARA_READ_DATA_CMD		0x3  /* Min protocol version 1.0 */
@@ -923,13 +923,7 @@ static struct mhi_driver sahara_mhi_driver = {
 		.name = "sahara",
 	},
 };
+module_mhi_driver(sahara_mhi_driver);
 
-int sahara_register(void)
-{
-	return mhi_driver_register(&sahara_mhi_driver);
-}
-
-void sahara_unregister(void)
-{
-	mhi_driver_unregister(&sahara_mhi_driver);
-}
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Qualcomm Sahara MHI protocol driver");

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 1/7] Add documentation for Sahara protocol
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

Introduce documentation for the Sahara protocol, describing its
operational modes and their respective functions. The image transfer mode
enables firmware transfer from host to device. The memory debug mode
allows extraction of device memory contents to host. The command mode
facilitates retrieval of DDR training data from the device and also
to restore the training data back to device in subsequent boot of device
to save boot time.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 Documentation/mhi/index.rst           |    1 +
 Documentation/mhi/sahara_protocol.rst | 1241 +++++++++++++++++++++++++++++++++
 2 files changed, 1242 insertions(+)

diff --git a/Documentation/mhi/index.rst b/Documentation/mhi/index.rst
index 0aa00482aa2e2d7ec4941154a8c6947dc0a0ac40..39a38978398b81727514ec95dee4e060a1063b34 100644
--- a/Documentation/mhi/index.rst
+++ b/Documentation/mhi/index.rst
@@ -9,3 +9,4 @@ MHI
 
    mhi
    topology
+   sahara_protocol
diff --git a/Documentation/mhi/sahara_protocol.rst b/Documentation/mhi/sahara_protocol.rst
new file mode 100644
index 0000000000000000000000000000000000000000..bea72a98b9529ee7d5ce875b00dda5665237830a
--- /dev/null
+++ b/Documentation/mhi/sahara_protocol.rst
@@ -0,0 +1,1241 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+
+=============================
+Sahara protocol Specification
+=============================
+
+The Qualcomm Sahara protocol driver is primarily designed for transferring
+software images from a host device to a target device using a simplified data
+transfer mechanism over a link. However, the Sahara protocol does not support
+any authentication/validation of the data sent between devices. Such a mechanism
+is beyond the scope of the protocol.
+
+The Sahara protocol defines two types of packets - Command packet and Data
+packet.
+
+Command packet
+--------------
+  These packets are sent between the host and the target to setup transfers of
+  data packets. The command packets contain a command ID and packet length.
+  Depending on the command, the packet may contain additional command specific
+  field.
+
++-------------+---------------+----------------+----------------+
+| Command ID  | Packet length | Optional field | Optional field |
++-------------+---------------+----------------+----------------+
+
+Data packet
+-----------
+  The data packets contain RAW data as shown below.
+
++---------------------------------------------------------+
+|           RAW Data (arbitrary number of bytes)          |
++---------------------------------------------------------+
+
+Command packet optional fields
+------------------------------
+
++---------+---------------+---------+-----------------------------------------+
+| ID val  |     Field     | Sent by |             Description                 |
++---------+---------------+---------+-----------------------------------------+
+|   0x0   |       -       |    -    |             Invalid                     |
++---------+---------------+---------+-----------------------------------------+
+|   0x1   | Hello packet  |  Target | Initializes connection and protocol     |
++---------+---------------+---------+-----------------------------------------+
+|   0x2   | Hello response|  Host   | Acknowledges connection and protocol    |
+|         |               |         | sent by target. Also used to set mode of|
+|         |               |         | operation for target to execute.        |
++---------+---------------+---------+-----------------------------------------+
+|   0x3   | Read data     |  Target | Reads specified number of bytes from    |
+|         |               |         | host for a given image.                 |
++---------+---------------+---------+-----------------------------------------+
+|   0x4   | End of image  |  Target | Indicates host that the single image tx |
+|         | transfer      |         | is complete. Also used to indicate a    |
+|         |               |         | target failure during an image transfer |
++---------+---------------+---------+-----------------------------------------+
+|   0x5   | Done packet   |  Host   | Sends acknowledgment from host that a   |
+|         |               |         | single image transfer is complete.      |
++---------+---------------+---------+-----------------------------------------+
+|   0x6   | Done response |  Target | Provides the following information to   |
+|         |               |         | host.                                   |
+|         |               |         | 1. Target is exiting protocol           |
+|         |               |         | 2. Whether the target expects to        |
+|         |               |         | re-enter protocol to transfer another   |
+|         |               |         | image.                                  |
++---------+---------------+---------+-----------------------------------------+
+|   0x7   | Reset packet  |  Host   | Instructs target to perform a reset.    |
++---------+---------------+---------+-----------------------------------------+
+|   0x8   | Reset response|  Target | Indicates host that target is about to  |
+|         |               |         | reset.                                  |
++---------+---------------+---------+-----------------------------------------+
+|   0x9   | Memory debug  |  Target | Indicates host that target has entered  |
+|         | packet        |         | a debug mode where it is ready to       |
+|         |               |         | transfer its system memory contents     |
++---------+---------------+---------+-----------------------------------------+
+|   0xA   | Memory read   |  Host   | Reads specified number of bytes from    |
+|         | packet        |         | target's system memory, starting from a |
+|         |               |         | specified address.                      |
++---------+---------------+---------+-----------------------------------------+
+|   0xB   | Command ready |  Target | Indicates host that target is ready to  |
+|         | packet        |         | receive client commands.                |
++---------+---------------+---------+-----------------------------------------+
+|   0xC   | Command switch|  Host   | Indicates target to switch modes.       |
+|         | mode packet   |         | 1. Image transfer pending mode.         |
+|         |               |         | 2. Image transfer complete mode.        |
+|         |               |         | 3. Memory debug mode.                   |
+|         |               |         | 4. Command mode.                        |
++---------+---------------+---------+-----------------------------------------+
+|   0xD   | Command       |  Host   | Indicates target to execute a given     |
+|         | execute packet|         | client command.                         |
++---------+---------------+---------+-----------------------------------------+
+|   0xE   | Command       |  Target | Indicates host that target has executed |
+|         | execute       |         | client command. Also used to indicate   |
+|         | response      |         | status of executed command.             |
+|         | packet        |         |                                         |
++---------+---------------+---------+-----------------------------------------+
+|   0xF   | Command       |  Host   | Indicates target that host is ready to  |
+|         | execute       |         | receive data resulting from executing   |
+|         | data          |         | previous client command.                |
+|         | packet        |         |                                         |
++---------+---------------+---------+-----------------------------------------+
+|   0x10  | 64 bit Memory |  Target | Indicates host that target has entered  |
+|         | debug packet  |         | a debug mode where it is ready to       |
+|         |               |         | transfer its 64 bit system memory       |
+|         |               |         | contents.                               |
++---------+---------------+---------+-----------------------------------------+
+|   0x11  | 64 bit Memory |  Host   | Reads specified number of bytes from    |
+|         | read packet   |         | target's system memory, starting from a |
+|         |               |         | 64 bit specified address.               |
++---------+---------------+---------+-----------------------------------------+
+|   0x12  | 64 bit Read   |  Target | Reads specified number of bytes from    |
+|         | data          |         | host for a given 64 bit image.          |
++---------+---------------+---------+-----------------------------------------+
+|   0x13  | Reset Sahara  |  Host   | Resets Sahara state machine and enters  |
+|         | sate machine  |         | Sahara entry without target reset       |
+|         | packet        |         |                                         |
++---------+---------------+---------+-----------------------------------------+
+|   0x14  | Write data    |  Target | Writes specified number of bytes to host|
+|         | packet        |         | for a given image                       |
++---------+---------------+---------+-----------------------------------------+
+|  Others |       -       |    -    |             Invalid                     |
++---------+---------------+---------+-----------------------------------------+
+
+
+Hello Packet
+------------
+
+The hello packet is the first packet that the target sends to the host. If the
+host receives any other packet, it sends a reset command to the target. When the
+host receives a valid hello packet, it first verifies that the protocol running
+on the target is compatible with the protocol running on the host. If the
+protocol mismatch, the host sends a reset command to the target. The target uses
+the following format while sending a hello packet.
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Version   |     4       | Version number of this protocol      |
++-----------+-------------+--------------------------------------+
+| Version   |     4       | Lowest Compatible version            |
+| Compatible|             |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       | Maximum command packet length        |
+| packet    |             | (in bytes) the protocol supports.    |
+| length    |             |                                      |
++-----------+-------------+--------------------------------------+
+| Mode      |     4       | Expected mode of target operation    |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+| Reserved  |     4       | Reserved for future use.             |
++-----------+-------------+--------------------------------------+
+
+The target also sends the following information:
+  1. Maximum length of the command packet that it supports. The host uses this
+     information to avoid sending more bytes than the target can support in the
+     receiving command buffer.
+  2. Mode of operation it expects to enter, based on the boot up sequence. The
+     supported modes of operation for the target are as follows:
+
++-----------------------------+---------+------------------------------------+
+|          Mode               | Mode ID |         Description                |
++-----------------------------+---------+------------------------------------+
+| SAHARA_MODE_IMAGE_TX_PENDING|  0x0    | Image transfer is in the pending   |
+|                             |         | mode. Transfer image from the host.|
+|                             |         | After completion, the host should  |
+|                             |         | expect another image transfer      |
+|                             |         | request.                           |
++-----------------------------+---------+------------------------------------+
+|SAHARA_MODE_IMAGE_TX_COMPLETE|  0x1    | Image transfer is in the complete  |
+|                             |         | mode. Transfer image from the host.|
+|                             |         | After completion, the host should  |
+|                             |         | not expect another image transfer  |
+|                             |         | request.                           |
++-----------------------------+---------+------------------------------------+
+|  SAHARA_MODE_MEMORY_DBEUG   |  0x2    | Memory debug mode. The host should |
+|                             |         | prepare to receive a memory dump   |
+|                             |         | from the target.                   |
++-----------------------------+---------+------------------------------------+
+|    SAHARA_MODE_COMMAND      |  0x3    | Command mode. The host executes    |
+|                             |         | operations on the target by sending|
+|                             |         | the appropriate client command to  |
+|                             |         | the Sahara client running on the   |
+|                             |         | target. The Sahar client interprets|
+|                             |         | the client command and the response|
+|                             |         | is sent after execution of the     |
+|                             |         | given command.                     |
++-----------------------------+---------+------------------------------------+
+
+Hello response packet
+---------------------
+
+After the host validates the protocol running on the target, it sends a response
+to the target. The response contains the following information.
+1. The protocol version that is running.
+2. The minimum protocol version that it supports.
+3. The mode of operation.
+
+The host sets the packet status field to success if no errors occur on the host
+side. After the target receives this packet, it can proceed with data transfer
+requests or memory debug. The host uses the following format while sending a
+hello response packet.
+
++-----------+-------------+--------------------------------------+
+| Field     | Length      | Description                          |
+|           | (bytes)     |                                      |
++===========+=============+======================================+
+| Command   | 4           | Command identifier code              |
++-----------+-------------+--------------------------------------+
+| Length    | 4           | Length of the packet (in bytes)      |
++-----------+-------------+--------------------------------------+
+| Version   | 4           | Version number of this protocol      |
++-----------+-------------+--------------------------------------+
+| Compatible| 4           | Lowest Compatible version            |
++-----------+-------------+--------------------------------------+
+| Status    | 4           | Success or error code                |
++-----------+-------------+--------------------------------------+
+| Mode      | 4           | Mode of operation for target to      |
+|           |             | execute                              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+| Reserved  | 4           | Reserved for future use              |
++-----------+-------------+--------------------------------------+
+
+
+Read data packet / 64 bit read data packet
+------------------------------------------
+
+The read data packet serves as a generic data transfer packet when any image
+data is to be transferred from the host to the target. This packet allows
+flexibility in the way that the image is transferred from the host to the
+target. As the target controls which data gets transferred, the target can
+determine what parts of the image get transferred and in what order. The host
+need not be familiar about the structure of the image. It must open the file and
+start transferring the data to the target based on the parameters specified in
+the packet.
+
+This gives the target complete control over how the images are transferred and
+processed. To initiate an image transfer, the target fills the read data packet
+with the image ID corresponding to the image that it wants to receive. The
+target also sends the offset into the image file and the length of the data(in
+bytes) it wants to read from the image. After the host receives this packet, the
+host responds with a data packet, which contains image data with the length
+specified in the read data packet. The host uses the following format while
+transferring the read data packet and 64-bit read data packet.
+
+
+Read data packet format
+=======================
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Image ID  |     4       | ID of the image to be transferred.   |
++-----------+-------------+--------------------------------------+
+| Data      |     4       | Offset into the image file to start  |
+| offset    |             | transferring data.                   |
++-----------+-------------+--------------------------------------+
+| Data      |     4       | Number of bytes target wants to      |
+| Length    |             | transfer from the image.             |
++-----------+-------------+--------------------------------------+
+
+
+64-bit read data packet format
+==============================
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Image ID  |     8       | ID of the image to be transferred.   |
++-----------+-------------+--------------------------------------+
+| Data      |     8       | Offset into the image file to start  |
+| offset    |             | transferring data.                   |
++-----------+-------------+--------------------------------------+
+| Data      |     8       | Number of bytes target wants to      |
+| Length    |             | transfer from the image.             |
++-----------+-------------+--------------------------------------+
+
+If any of the preceding fields are invalid, or if any other error occurs on the
+host, the host sends a data packet with length that does not match with what the
+target was expecting. The resulting error forces the target to send an end of
+image transfer packet with an error code in the status field and enables both
+the target and the host to enter an error handling state.
+
+End of Image transfer packet
+----------------------------
+
+If an image transfer is successfully completed, the target sends the host an end
+of image transfer packet with a success status. The target then waits for the
+host to send a done packet. If any error occurs during the transfer or
+processing of the image data, the status is set to the corresponding error code,
+and the target waits for a different command packet.
+
+The host uses the following format while transferring end of image transfer
+packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Image ID  |     4       | ID of the image that was being       |
+|           |             | transferred.                         |
++-----------+-------------+--------------------------------------+
+| Status    |     4       | Success or error code                |
++-----------+-------------+--------------------------------------+
+
+Done packet
+-----------
+
+If the host receives an end of image transfer packet with a success status, the
+host sends a done packet to indicate the target that it can exit the protocol
+and continue execution of code. The host uses the following format while sending
+the done packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+
+To transfer another image from the host, the target must re-initiate the
+protocol by starting with another hello packet.
+
+Done Response packet
+--------------------
+
+If the target receives a done packet, it responds with a done response packet
+containing the image transfer status. The target uses the following format while
+sending the done response packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Image Tx  |     4       | Indicates whether target is          |
+| Status    |             | expecting to receive another image   |
+|           |             | or not.                              |
++-----------+-------------+--------------------------------------+
+
+If all the images are transferred, the target sends a complete status to enable
+the host to exit the protocol. If all the images are not transferred, the target
+sends a pending status and waits for another hello packet to arrive.
+
+Reset Packet
+------------
+
+The host sends a reset packet to reset the target. The target services a reset
+request only if its in a state where reset requests are valid. If the target
+receives an invalid reset request, the target sends an error in an end of image
+transfer packet. The format of reset packet is as follows:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+
+
+Reset response packet
+---------------------
+
+If the target receives a valid reset request, it sends a reset response packet
+just before it resets. The purpose of this response is to acknowledge the host
+that the target received the reset request. The format of reset response packet
+is as follows:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+
+
+Memory debug packet
+-------------------
+
+The target initiates a memory dump by sending the host a memory debug packet.
+This packet contains the address and length of the memory debug table. The
+memory debug table is a listing of memory locations that can be accessed and
+dumped to the host. The target uses the following format while sending the
+memory debug packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Memory    |     4       | Target sets this field to the address|
+| table     |             | in memory that stores the memory     |
+| Address   |             | debug table.                         |
++-----------+-------------+--------------------------------------+
+| Memory    |     4       | Length in bytes of memory debug      |
+| table     |             | table.                               |
+| Length    |             |                                      |
++-----------+-------------+--------------------------------------+
+
+Given the memory table address and length, the host issues a memory read to
+retrieve the table. After the host receives the memory table information, it can
+decode each entry and issue memory read requests to dump each memory location.
+
+Memory read packet / 64-bit memory read packet
+----------------------------------------------
+
+The host issues memory read commands for each section of memory that it dumps.
+The host uses the following format while sending the memory read packet and 64
+bit memory read packet:
+
+Memory read packet format
+=========================
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Memory    |     4       | Memory location to read.             |
+| Address   |             |                                      |
++-----------+-------------+--------------------------------------+
+| Memory    |     4       | Length in bytes of memory to read    |
+| Length    |             |                                      |
++-----------+-------------+--------------------------------------+
+
+64 bit memory read packet format
+================================
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Memory    |     8       | Memory location to read.             |
+| Address   |             |                                      |
++-----------+-------------+--------------------------------------+
+| Memory    |     8       | Length in bytes of memory to read    |
+| Length    |             |                                      |
++-----------+-------------+--------------------------------------+
+
+The accessible regions are defined in the memory debug table. For each memory
+read command received, the target verifies that the specified memory(address and
+length) is accessible and responds with a raw data packet. The content and
+length of the raw data packet is the memory dump starting from the memory
+address and length specified in the memory read packet. The memory debug table
+can also be read using a memory read command by setting the address and length
+to the values specified in the memory debug packet.
+
+If any error occurs on the target, an end of image transfer packet is sent with
+the corresponding error code and the host recognizes whether it is actual memory
+data or an end of image transfer packet. The host issues a reset command on
+completion of a successful memory dump. However, the protocol does not force
+this implementation.
+
+Command ready packet
+--------------------
+
+The target sends this packet to the host to indicate that the target is ready to
+execute client commands. The target uses the following format while sending the
+command ready packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+
+
+Command switch mode packet
+--------------------------
+
+The host sends the command switch mode packet to the target so that the target
+can switch to another mode. The host uses the following format while sending the
+command switch mode packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Mode      |     4       | Mode of operation for target         |
+|           |             | to execute.                          |
++-----------+-------------+--------------------------------------+
+
+Command execute packet
+----------------------
+
+The host sends this packet to execute the given client command on the target. If
+the client command successfully executes, the target sends a command execute
+response packet. If an error occurs, the target sends an end of image transfer
+packet with the corresponding error code. The host uses the following format
+while sending command execute packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Client    |     4       | Client Command to be executed.       |
+| Command   |             |                                      |
++-----------+-------------+--------------------------------------+
+
+
+Client commands
+===============
+
++------------+-------------+--------------------------------------+
+|  Client ID |  Length     |          Description                 |
++------------+-------------+--------------------------------------+
+|   0x8      |    4        |      Get Command ID list.            |
++------------+-------------+--------------------------------------+
+|   0x9      |    4        |      Get DDR training data.          |
++------------+-------------+--------------------------------------+
+
+Command execute Response packet
+-------------------------------
+
+The target sends this packet if it successfully executes the client command. The
+target uses the following format while sending the command execute response
+packet.
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Client    |     4       | Client Command to be executed.       |
+| Command   |             |                                      |
++-----------+-------------+--------------------------------------+
+| Response  |     4       | Number of bytes for response data.   |
+| Length    |             |                                      |
++-----------+-------------+--------------------------------------+
+
+Command execute data packet
+---------------------------
+
+The host sends this packet if the response length received in the command
+execute response packet is greater than 0. The host uses the following format
+while sending command execute data packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Client    |     4       | Client Command executed.             |
+| Command   |             |                                      |
++-----------+-------------+--------------------------------------+
+
+The packet indicates the target to send the response data in a raw data packet.
+The target sends the response data upon receiving this packet.
+
+64-bit memory debug packet
+--------------------------
+
+The target sends this packet to the host to initiate a memory dump. The packet
+contains 64-bit address and length of the memory table. The target uses the
+following format while sending 64-bit memory debug packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Memory    |     8       | Target sets this field to the 64-bit |
+| table     |             | address in memory that stores the    |
+| Address   |             | memory debug table.                  |
++-----------+-------------+--------------------------------------+
+| Memory    |     8       | Length in bytes of memory debug      |
+| table     |             | table.                               |
+| Length    |             |                                      |
++-----------+-------------+--------------------------------------+
+
+Reset Sahara state machine packet
+---------------------------------
+
+The host sends a reset Sahara state machine packet whenever it wants to reset
+Sahara state machine. When the target receives a reset Sahara state machine
+request, it reinitializes Sahara protocol and sends the hello packet to the
+host. The Sahara protocol is restarted without a target reset. The host uses the
+following format while sending the reset Sahara state machine packet:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+
+Write data packet
+-----------------
+
+Write data packet serves as a generic data transfer packet when any data is
+transferred from the target to the host. This packet allows flexible data
+transfer from the target to the host.
+
+As the target controls what data gets transferred, target can determine what
+parts of the data get transferred and in what order. The host does not need to
+know anything about the structure of the data. It only needs to open the file
+and start accepting the data to the host based on the parameters specified in
+the packet.
+
+To initiate a write data transfer, the target fills the write data packet with
+the image ID corresponding to the image data that it wants to send. The target
+also sends the offset into the output file and the length of the data(in bytes)
+it wants to write from the target. As soon as the host receives the packet, the
+host opens an output file and waits to receive the data packets. After the
+packet is received, the content from the data pcket is written to the output
+file, The format of the write data packet is as follows:
+
++-----------+-------------+--------------------------------------+
+|  Field    |  Length     |          Description                 |
+|           |  (bytes)    |                                      |
++-----------+-------------+--------------------------------------+
+| Command   |     4       |      Command identifier code         |
++-----------+-------------+--------------------------------------+
+| Length    |     4       | Length of the packet(in bytes)       |
++-----------+-------------+--------------------------------------+
+| Data      |     8       | Offset into the image file to start  |
+| offset    |             | writing the data to host.            |
++-----------+-------------+--------------------------------------+
+| Image ID  |     4       | ID of the image to be transferred.   |
++-----------+-------------+--------------------------------------+
+| Data      |     4       | Number of bytes target wants to      |
+| Length    |             | transfer the data to the host.       |
++-----------+-------------+--------------------------------------+
+
+
+Command packet flow between host and target
+-------------------------------------------
+
+Packet flow is a process of exchange of information as packets between the host
+and the target in a specific way using command packets. The Sahara protocol
+allows packet processing for the following scenarios:
+
+1. Transferring an image from the host to the target.
+2. Dumping memory from the target to the host.
+3. Loading DDR calibration data on flashless target.
+
+Packet flow for Image transfer
+------------------------------
+
+The packet flow is performed between the host and target for a successful image
+transfer.
+
+.. code-block:: text
+
+                        Host                       Target
+                          |          HELLO            |
+                          |   (mode = image transfer) |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          |   (mode = image transfer) |
+                          |-------------------------->|
+                          |                           |
+                          |         READ_DATA         |
+                          |   (img ID, 0, offset,     |
+                          |   size of image header)   |
+                          |<--------------------------|
+                          |                           |
+                          |         RAW_DATA          |
+                          | (size of image header)    |
+                          |-------------------------->|
+                          |                           |
+                          |         READ_DATA         |
+                          | (img ID, segment 0 offset,|
+                          |  size of segment 0)       |
+                          |<--------------------------|
+                          |         RAW_DATA          |
+                          |     (size of segment 0)   |
+                          |-------------------------->|
+                          |                           |
+                          |         READ_DATA         |
+                          | (img ID, segment 1 offset,|
+                          |  size of segment 1)       |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |         RAW_DATA          |
+                          |    (size of segment 1)    |
+                          |-------------------------->|
+                          |          ...              |
+                          |          ...              |
+                          |          ...              |
+                          |          ...              |
+                          |                           |
+                          |                           |
+                          |         READ_DATA         |
+                          | (img ID, segment N offset,|
+                          |  size of segment N)       |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |                           |
+                          |         RAW_DATA          |
+                          |    (size of segment N)    |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |       END_IMAGE_TX        |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |          DONE             |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |         DONE_RESP         |
+                          |<--------------------------|
+                          |                           |
+
+The packet flow sequence for image transfer is as follows:
+
+1. A hello packet is sent from the target to the host to initiate the protocol
+   with the mode set to either image transfer pending or image transfer
+   complete (depending on the target's boot sequence).
+
+2. The host sends a hello response packet with a success status and sets the
+   mode to the mode received in the hello packet. After it receives the hello
+   packet and validates the protocol version running on the target.
+
+3. After the target receives the hello response, the target initiates the
+   image transfer request by sending read data packets. Each read data packet
+   specifies the image that the target wishes to receive and what part of the
+   image is to be transferred.
+
+4. During normal operation, the target first requests image header information.
+
+   a. The image header information specifies image size and location of the
+      image data that is to be transferred.
+
+   b. The image header information (which is sent as a read data request)
+      allows the target to know the format of the image to be transferred.
+      The protocol does not require the host to know anything about the
+      image formats and allows the host to read and transfer data from the
+      image as requested by the target.
+
+   c. If the image is a standalone binary image without any data segmentation
+      (which means the data is entirely contiguous when stored as well as
+      transferred to the target system memory), then the target requests for
+      entire image data to be sent in one transfer.
+
+   d. If the image data is segmented and requires scattering of the data
+      segments to noncontiguous system memory locations, the target issues
+      multiple read data requests to enable each data segment to be
+      transferred directly to the respective destination address. This
+      scattered information resides in the image header and is parsed by the
+      target before issuing the read data requests.
+
+5. After receiving a read data request, the host parses the image ID, data
+   offset, and data length to transfer data from the corresponding image file.
+   The host sends the requested data without any packet header.
+
+6. The target directly transfers the data to the destination address without
+   any software processing or temporarily buffering of the data in system
+   memory by transferring the image header to the targert and setting the
+   receive buffer for the data as the destination address in system memory.
+
+7. After the target successfully receives all segments for an image, the
+   target sends an end of image transfer packet with the image ID of the
+   corresponding image, and a success status. The host stops reading and
+   closes the image file after receiving the success status.
+
+8. The host sends a done packet to allow the target to exit the protocol after
+   it receives a successgul end of image transfer packet.
+
+9. After the target receives the done packet, the target sends a done response
+   packet to the host. This packet indicates if the target expects another
+   image to be transferred and if the host can continue to run the protocol.
+
+Packet flow for memory debug
+----------------------------
+
+The packet flow is performed between the host and the target for the successful
+memory debug.
+
+.. code-block:: text
+
+                        Host                       Target
+                          |          HELLO            |
+                          |   (mode = memory debug)   |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          |   (mode = memory debug)   |
+                          |-------------------------->|
+                          |                           |
+                          |         MEMORY_DEBUG      |
+                          |   (location of mem table, |
+                          |   size of memory table)   |
+                          |<--------------------------|
+                          |                           |
+                          |         MEMORY_READ       |
+                          |   (Address from region 0 ,|
+                          |    size of region 0)      |
+                          |-------------------------->|
+                          |         RAW_DATA          |
+                          |     (size of region 0)    |
+                          |<--------------------------|
+                          |                           |
+                          |         MEMORY_READ       |
+                          |   (Address from region 1 ,|
+                          |    size of region 1)      |
+                          |-------------------------->|
+                          |         RAW_DATA          |
+                          |     (size of region 1)    |
+                          |<--------------------------|
+                          |         MEMORY_READ       |
+                          |   (Address from region 2 ,|
+                          |    size of region 0)      |
+                          |-------------------------->|
+                          |         RAW_DATA          |
+                          |     (size of region 2)    |
+                          |<--------------------------|
+                          |          ...              |
+                          |          ...              |
+                          |          ...              |
+                          |          ...              |
+                          |                           |
+                          |         MEMORY_READ       |
+                          |   (Address from region N ,|
+                          |    size of region N)      |
+                          |-------------------------->|
+                          |         RAW_DATA          |
+                          |     (size of region N)    |
+                          |<--------------------------|
+                          |                           |
+                          |          RESET            |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |        RESET_RESP         |
+                          |<--------------------------|
+                          |                           |
+
+The packet flow sequence for image transfer is as follows:
+
+1. A hello packet is sent from the target to the host to initiate the protocol
+   with mode set to memory debug.
+
+2. The host sends a hello response packet with a success status and sets the
+   mode to memory debug after it receives the hello packet and validates the
+   protocol version running on the target.
+
+3. After the target receives the hello response, the target initiates the
+   memory dump by sending a memory debug packet with the location and size of
+   the memory debug table. The memory debug table specifies accessible memory
+   regions.
+
+4. The host then initiates a memory read packet to read the memory debug
+   table and receives the table in a raw data packet after it receives the
+   memory debug packet.
+
+5. The host then decodes the table and issues memory reads for each accessible
+   region. The data for each region is sent in a raw data packet.
+
+6. Upon completion, the host issues a reset to the target. The target sends a
+   reset response and resets the target.
+
+7. The host can alternatively send a command switch mode packet to allow the
+   target to switch modes and avoid a reset.
+
+
+Packet flow to load DDR calibration data on target
+--------------------------------------------------
+
+The packet flow is performed between the host and the target to load DDR
+calibration data on flashless target. This packet flow is initiated when the
+device boots up for the first time and needs DDR calibration. This packet flow
+is also initiated in other scenarios, such as build update or any reason for
+which DDR calibration data gets corrupted.
+
+First boot scenario or invalid calibration data in filesystem.
+--------------------------------------------------------------
+
+.. code-block:: text
+
+                        Host                       Target
+                          |          HELLO            |
+                          |   (mode = image transfer) |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          |   (mode = image transfer) |
+                          |-------------------------->|
+                          |                           |
+                          |         READ_DATA         |
+                          |   (img ID:34, 0, offset,  |
+                          | size of DDR training data)|
+                          |<--------------------------|
+                          |                           |
+                          |         RAW_DATA          |
+                          |(size of DDR training data)|
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |       END_IMAGE_TX        |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |          DONE             |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |         DONE_RESP         |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |<--------------------------|
+                          |1. First boot scenario.    |
+                          |   DDR driver performs     |
+                          |   calibration and returns |
+                          |   to SBL.                 |
+                          |2. Next: Push DDR          |
+                          |  Calibration data to host |
+                          |                           |
+                          |                           |
+                          |          HELLO            |
+                          |   (mode = COMMAND mode)   |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          |   (mode = COMMAND mode  ) |
+                          |-------------------------->|
+                          |                           |
+                          |         CMD_READY         |
+                          |<--------------------------|
+                          |                           |
+                          |         CMD_EXEC          |
+                          |(cmd id = 8, Get command   |
+                          | ID to be executed)        |
+                          |-------------------------->|
+                          |                           |
+                          |       CMD_EXEC_RESP       |
+                          |(cmd id: 8, resp len = 4)  |
+                          |<--------------------------|
+                          |                           |
+                          |    CMD_EXEC_GET_DATA      |
+                          |        (ID = 0x8)         |
+                          |-------------------------->|
+                          |                           |
+                          |          RAW_DATA         |
+                          |       (0x00000009)        |
+                          |<--------------------------|
+                          |                           |
+                          |         CMD_EXEC          |
+                          | (cmd id: 9, resp len > 0) |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |       CMD_EXEC_RESP       |
+                          |(cmd id: 9, resp len > 0)  |
+                          |<--------------------------|
+                          |                           |
+                          |    CMD_EXEC_GET_DATA      |
+                          |        (ID = 0x9)         |
+                          |-------------------------->|
+                          |                           |
+                          |          RAW_DATA         |
+                          |   (valid training data)   |
+                          |<--------------------------|
+                          |                           |
+                          |3. Host sends switch to    |
+                          |image tx mode to continue  |
+                          |booting.                   |
+                          |                           |
+                          |                           |
+                          |      CMD_SWITCH_MODE      |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |          HELLO            |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |-------------------------->|
+                          |                           |
+                          |4. Boot/Load rest of the   |
+                          |    images....             |
+                          |                           |
+                          |       END_IMAGE_TX        |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |          DONE             |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |         DONE_RESP         |
+                          |(mode = IMAGE_TX_COMPLETE) |
+                          |<--------------------------|
+                          |                           |
+
+The packet flow sequence is as follows :
+
+1. The target sends the hello packet to the host to initiate the protocol
+   with the mode set to image transfer pending.
+
+2. The host sends a hello response packet with a success status and sets the
+   mode to image transfer pending after it receives the hello packet and
+   validates the protocol version running on the target.
+
+3. After the target receives the hello response, it initiates the data
+   transfer by requesting the size of DDR training/calibration data.
+
+4. The host sends back the DDR training/calibration data to the target.
+
+5. The target decodes the training data and does not find valid DDR
+   calibration data, target sends END_IMAGE_TX to interrupt the transfer.
+
+6. The host sends DONE after receives END_IMAGE_TX.
+
+7. The target sends DONE_RESP with mode = IMAGE_TX_PENDING because it has
+   not received all images.
+
+8. The target executes DDR training process to generate valid DDR calibration
+   data and prepares to push back to host.
+
+9. The target initiates protocol by sending a hello packet with COMMAND_MODE
+   to the host.
+
+10. The host sends a hello response packet with a success status and sets the
+    mode to COMMAND_MODE.
+
+11. The target sends CMD_READY to the host.
+
+12. The host receives CMD_READY and starts to get command IDs to be executed.
+
+13. The target sends CMD_ID = 9 to push DDR calibration data to host.
+
+14. The host executes CMD_ID = 9 to get DDR calibration data from the target.
+
+15. The target sends RAW_DATA with the payload which contains DDR calibration
+    data to host.
+
+16. The host saves training data in the kernel buffer and exposes to userspace
+    via the sysfs entry. The host sends CMD_SWITCH_MODE with the mode set to
+    IMAGE_TX_PENDING to continue booting.
+
+17. After the target receives the CMD_SWITCH_MODE command, it sends HELLO to
+    the host with the mode set to IMAGE_TX_PENDING. The target and the host
+    repeat the packet flow for image transfer to get all booting-required
+    images.
+
+18. Upon successful transfer of all images, the target sends an END_IMAGE_TX
+    packet with a success status to the host.
+
+19. The host sends DONE after it receives END_IMAGE_TX.
+
+20. The target sends DONE_RESP with the mode set to IMAGE_TX_COMPLETE because
+    it has received all images. The process has been completed after the host
+    receives DONE_RESP with the mode set to IMAGE_TX_COMPLETE.
+
+Subsequent boot scenario with valid DDR calibration data
+--------------------------------------------------------
+
+The below firgure shows the subsequent boot scenario with valid DDR calibration
+data process being loaded from host to target.
+
+.. code-block:: text
+
+                        Host                       Target
+                          |          HELLO            |
+                          |   (mode = image transfer) |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          |   (mode = image transfer) |
+                          |-------------------------->|
+                          |                           |
+                          |         READ_DATA         |
+                          |   (img ID:34, 0, offset,  |
+                          | size of DDR training data)|
+                          |<--------------------------|
+                          |                           |
+                          |         RAW_DATA          |
+                          |(size of DDR training data)|
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |       END_IMAGE_TX        |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |          DONE             |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |         DONE_RESP         |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |<--------------------------|
+                          |                           |
+                          | Subsequent boot scenario  |
+                          | (valid calibration data)  |
+                          | DDR driver configures DDR |
+                          | using valid calibration   |
+                          | data                      |
+                          |                           |
+                          |                           |
+                          |          HELLO            |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |<--------------------------|
+                          |                           |
+                          |         HELLO RESP        |
+                          | (mode = IMAGE_TX_PENDING) |
+                          |-------------------------->|
+                          |                           |
+                          | Boot/Load rest of the     |
+                          |    images....             |
+                          |                           |
+                          |       END_IMAGE_TX        |
+                          |<--------------------------|
+                          |                           |
+                          |                           |
+                          |          DONE             |
+                          |-------------------------->|
+                          |                           |
+                          |                           |
+                          |         DONE_RESP         |
+                          |(mode = IMAGE_TX_COMPLETE) |
+                          |<--------------------------|
+                          |                           |
+
+The packet flow is as follows :
+
+1. The target sends the hello packet to the host to initiate the protocol
+   with the mode set to image transfer pending.
+
+2. The host sends a hello response packet with a success status and sets the
+   mode to image transfer pending after it receives the hello packet and
+   validates the protocol version running on the target.
+
+3. After the target receives the hello response, it initiates the images
+   transfer by requesting the training/calibration data from the host.
+
+4. The host sends back the DDR training/calibration data to the target.
+
+5. The target decodes the DDR training/calibration data and finds valid DDR
+   calibration data.
+
+6. The host sends RAW_DATA with the size of the DDR calibration data to the
+   target.
+
+7. Upon successful transfer of DDR calibration data, the target sends an
+   END_IMAGE_TX packet with a success status.
+
+8. The host sends DONE after it receives END_IMAGE_TX.
+
+9. The target sends DONE_RESP with mode = IMAGE_TX_PENDING because it has not
+   received all images.
+
+10. The target continues booting with valid DDR calibration data.
+
+11. The target and the host repeat the packet flow for image transfer to get
+    all booting-required images.
+
+12. After successful transfer of all images, the target sends an END_IMAGE_TX
+    packet with a success status to the host.
+
+13. The host sends DONE after it receives END_IMAGE_TX.
+
+14. The target sends DONE_RESP with the mode set to IMAGE_TX_COMPLETE because
+    it has received all images. The process has been completed after the host
+    receives DONE_RESP with the mode set to IMAGE_TX_COMPLETE.

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 3/7] bus: mhi: Centralize Sahara firmware image table selection at probe time
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

The Sahara driver currently selects firmware image tables using scattered,
device specific conditionals in the probe path. This makes the logic harder
to follow, harder to extend for new devices, and spreads device knowledge
across multiple code paths.

Refactor firmware image table selection into a single, explicit probe time
mechanism by introducing a controller provided firmware mapping table that
captures device matching, Sahara image tables, firmware folder names, and
streaming behaviour in one place.

This centralizes device specific decisions in the controller driver,
simplifies the Sahara probe logic, and removes ad-hoc conditionals while
preserving existing behavior for all supported AIC devices. This is in
preparation for adding QDU100 support.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 drivers/accel/qaic/mhi_controller.c          | 61 ++++++++++++++++++++++++++++
 drivers/bus/mhi/host/clients/sahara/sahara.c | 60 +++++----------------------
 include/linux/mhi.h                          | 17 ++++++++
 3 files changed, 88 insertions(+), 50 deletions(-)

diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 4d787f77ce419fcd2b250f9cabaec9c26f2da8dc..1f9ef871421b976c35cfad59aed715da96c1813b 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -20,6 +20,62 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
 module_param(mhi_timeout_ms, uint, 0600);
 MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
 
+static const char * const aic100_image_table[] = {
+	[1]  = "qcom/aic100/fw1.bin",
+	[2]  = "qcom/aic100/fw2.bin",
+	[4]  = "qcom/aic100/fw4.bin",
+	[5]  = "qcom/aic100/fw5.bin",
+	[6]  = "qcom/aic100/fw6.bin",
+	[8]  = "qcom/aic100/fw8.bin",
+	[9]  = "qcom/aic100/fw9.bin",
+	[10] = "qcom/aic100/fw10.bin",
+};
+
+static const char * const aic200_image_table[] = {
+	[5]  = "qcom/aic200/uefi.elf",
+	[12] = "qcom/aic200/aic200-nsp.bin",
+	[23] = "qcom/aic200/aop.mbn",
+	[32] = "qcom/aic200/tz.mbn",
+	[33] = "qcom/aic200/hypvm.mbn",
+	[38] = "qcom/aic200/xbl_config.elf",
+	[39] = "qcom/aic200/aic200_abl.elf",
+	[40] = "qcom/aic200/apdp.mbn",
+	[41] = "qcom/aic200/devcfg.mbn",
+	[42] = "qcom/aic200/sec.elf",
+	[43] = "qcom/aic200/aic200-hlos.elf",
+	[49] = "qcom/aic200/shrm.elf",
+	[50] = "qcom/aic200/cpucp.elf",
+	[51] = "qcom/aic200/aop_devcfg.mbn",
+	[54] = "qcom/aic200/qupv3fw.elf",
+	[57] = "qcom/aic200/cpucp_dtbs.elf",
+	[62] = "qcom/aic200/uefi_dtbs.elf",
+	[63] = "qcom/aic200/xbl_ac_config.mbn",
+	[64] = "qcom/aic200/tz_ac_config.mbn",
+	[65] = "qcom/aic200/hyp_ac_config.mbn",
+	[66] = "qcom/aic200/pdp.elf",
+	[67] = "qcom/aic200/pdp_cdb.elf",
+	[68] = "qcom/aic200/sdi.mbn",
+	[69] = "qcom/aic200/dcd.mbn",
+	[73] = "qcom/aic200/gearvm.mbn",
+	[74] = "qcom/aic200/sti.bin",
+	[76] = "qcom/aic200/tz_qti_config.mbn",
+	[78] = "qcom/aic200/pvs.bin",
+};
+
+static const struct mhi_sahara_fw_table aic100_sahara_fw = {
+	.image_table = aic100_image_table,
+	.table_size = ARRAY_SIZE(aic100_image_table),
+	.fw_folder = "aic100",
+	.non_streaming = true,
+};
+
+static const struct mhi_sahara_fw_table aic200_sahara_fw = {
+	.image_table = aic200_image_table,
+	.table_size = ARRAY_SIZE(aic200_image_table),
+	.fw_folder = "aic200",
+	.non_streaming = false,
+};
+
 static const char *fw_image_paths[FAMILY_MAX] = {
 	[FAMILY_AIC100] = "qcom/aic100/sbl.bin",
 	[FAMILY_AIC200] = "qcom/aic200/sbl.bin",
@@ -871,6 +927,11 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
 		mhi_cntrl->name = "AIC100";
 	}
 
+	if (mhi_cntrl->name && !strcmp(mhi_cntrl->name, "AIC100"))
+		mhi_cntrl->sahara_fw = &aic100_sahara_fw;
+	else if (mhi_cntrl->name && !strcmp(mhi_cntrl->name, "AIC200"))
+		mhi_cntrl->sahara_fw = &aic200_sahara_fw;
+
 	/* use latest configured timeout */
 	mhi_config.timeout_ms = mhi_timeout_ms;
 	ret = mhi_register_controller(mhi_cntrl, &mhi_config);
diff --git a/drivers/bus/mhi/host/clients/sahara/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
index 858dc5bc39c1ad42922cabef3b1abcd43bc4f0f4..e339c67e236af271645ca81cc517efd9eead87e4 100644
--- a/drivers/bus/mhi/host/clients/sahara/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -179,48 +179,7 @@ struct sahara_context {
 	u32				read_data_length;
 	bool				is_mem_dump_mode;
 	bool				non_streaming;
-};
-
-static const char * const aic100_image_table[] = {
-	[1]  = "qcom/aic100/fw1.bin",
-	[2]  = "qcom/aic100/fw2.bin",
-	[4]  = "qcom/aic100/fw4.bin",
-	[5]  = "qcom/aic100/fw5.bin",
-	[6]  = "qcom/aic100/fw6.bin",
-	[8]  = "qcom/aic100/fw8.bin",
-	[9]  = "qcom/aic100/fw9.bin",
-	[10] = "qcom/aic100/fw10.bin",
-};
-
-static const char * const aic200_image_table[] = {
-	[5]  = "qcom/aic200/uefi.elf",
-	[12] = "qcom/aic200/aic200-nsp.bin",
-	[23] = "qcom/aic200/aop.mbn",
-	[32] = "qcom/aic200/tz.mbn",
-	[33] = "qcom/aic200/hypvm.mbn",
-	[38] = "qcom/aic200/xbl_config.elf",
-	[39] = "qcom/aic200/aic200_abl.elf",
-	[40] = "qcom/aic200/apdp.mbn",
-	[41] = "qcom/aic200/devcfg.mbn",
-	[42] = "qcom/aic200/sec.elf",
-	[43] = "qcom/aic200/aic200-hlos.elf",
-	[49] = "qcom/aic200/shrm.elf",
-	[50] = "qcom/aic200/cpucp.elf",
-	[51] = "qcom/aic200/aop_devcfg.mbn",
-	[54] = "qcom/aic200/qupv3fw.elf",
-	[57] = "qcom/aic200/cpucp_dtbs.elf",
-	[62] = "qcom/aic200/uefi_dtbs.elf",
-	[63] = "qcom/aic200/xbl_ac_config.mbn",
-	[64] = "qcom/aic200/tz_ac_config.mbn",
-	[65] = "qcom/aic200/hyp_ac_config.mbn",
-	[66] = "qcom/aic200/pdp.elf",
-	[67] = "qcom/aic200/pdp_cdb.elf",
-	[68] = "qcom/aic200/sdi.mbn",
-	[69] = "qcom/aic200/dcd.mbn",
-	[73] = "qcom/aic200/gearvm.mbn",
-	[74] = "qcom/aic200/sti.bin",
-	[76] = "qcom/aic200/tz_qti_config.mbn",
-	[78] = "qcom/aic200/pvs.bin",
+	const char			*fw_folder;
 };
 
 static bool is_streaming(struct sahara_context *context)
@@ -796,6 +755,7 @@ static void sahara_read_data_processing(struct work_struct *work)
 
 static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
 {
+	const struct mhi_sahara_fw_table *sahara_fw;
 	struct sahara_context *context;
 	int ret;
 	int i;
@@ -808,14 +768,14 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
 	if (!context->rx)
 		return -ENOMEM;
 
-	if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
-		context->image_table = aic200_image_table;
-		context->table_size = ARRAY_SIZE(aic200_image_table);
-	} else {
-		context->image_table = aic100_image_table;
-		context->table_size = ARRAY_SIZE(aic100_image_table);
-		context->non_streaming = true;
-	}
+	sahara_fw = mhi_dev->mhi_cntrl->sahara_fw;
+	if (!sahara_fw || !sahara_fw->image_table || !sahara_fw->table_size)
+		return -ENODEV;
+
+	context->image_table = sahara_fw->image_table;
+	context->table_size = sahara_fw->table_size;
+	context->non_streaming = sahara_fw->non_streaming;
+	context->fw_folder = sahara_fw->fw_folder;
 
 	/*
 	 * There are two firmware implementations for READ_DATA handling.
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 88ccb3e14f481d6b85c2a314eb74ba960c2d4c81..060dafffac67c5c920adc1562a61a7233e8d583f 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -234,6 +234,21 @@ struct mhi_channel_config {
 	bool wake_capable;
 };
 
+/**
+ * struct mhi_sahara_fw_table - Controller provided sahara firmware mapping
+ * @image_table: Sparse array indexed by Sahara image ID
+ * @table_size: Size of @image_table
+ * @fw_folder: Firmware folder name.
+ * @non_streaming: Streaming feature support (optional)
+ *
+ */
+struct mhi_sahara_fw_table {
+	const char *const *image_table;
+	u32 table_size;
+	const char *fw_folder;
+	bool non_streaming;
+};
+
 /**
  * struct mhi_event_config - Event ring configuration structure for controller
  * @num_elements: The number of elements that can be queued to this ring
@@ -360,6 +375,7 @@ struct mhi_controller_config {
  * @wake_set: Device wakeup set flag
  * @irq_flags: irq flags passed to request_irq (optional)
  * @mru: the default MRU for the MHI device
+ * @sahara_fw: Sahara firmware mapping
  *
  * Fields marked as (required) need to be populated by the controller driver
  * before calling mhi_register_controller(). For the fields marked as (optional)
@@ -445,6 +461,7 @@ struct mhi_controller {
 	bool wake_set;
 	unsigned long irq_flags;
 	u32 mru;
+	const struct mhi_sahara_fw_table *sahara_fw;
 };
 
 /**

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 4/7] bus: mhi: Add QDU100 Sahara variant and firmware fallback
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

The Sahara driver currently selects a firmware image table based on the
attached device, but it does not recognize QDU100 devices that expose the
protocol on the SAHARA MHI channel. As a result, the host cannot associate
QDU100 devices with the correct firmware namespace during image transfer.

Extend the probe time variant selection to match the SAHARA MHI channel and
associate it with the QDU100 firmware folder. Add a firmware lookup
fallback for cases where an image does not have an explicit entry in the
device's firmware table. This allows required images to be provisioned by
the platform.

This change only affects devices matched on the SAHARA MHI channel and
does not change behavior for existing AIC100 and AIC200 devices.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 drivers/bus/mhi/host/clients/sahara/sahara.c | 27 +++++++++++++++--
 drivers/bus/mhi/host/pci_generic.c           | 45 ++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/mhi/host/clients/sahara/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
index e339c67e236af271645ca81cc517efd9eead87e4..9adbd84859073d8024ba2a5fcfa33897439d6759 100644
--- a/drivers/bus/mhi/host/clients/sahara/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -189,6 +189,7 @@ static bool is_streaming(struct sahara_context *context)
 
 static int sahara_find_image(struct sahara_context *context, u32 image_id)
 {
+	char *fw_path;
 	int ret;
 
 	if (image_id == context->active_image_id)
@@ -201,8 +202,28 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
 	}
 
 	if (image_id >= context->table_size || !context->image_table[image_id]) {
-		dev_err(&context->mhi_dev->dev, "request for unknown image: %d\n", image_id);
-		return -EINVAL;
+		if (!context->fw_folder) {
+			dev_err(&context->mhi_dev->dev,
+				"Request for unknown image: %u (no fw folder)\n", image_id);
+			return -EINVAL;
+		}
+
+		fw_path = kasprintf(GFP_KERNEL, "qcom/%s/%u",
+				    context->fw_folder, image_id);
+		if (!fw_path)
+			return -ENOMEM;
+
+		ret = firmware_request_nowarn(&context->firmware,
+					      fw_path,
+					      &context->mhi_dev->dev);
+		kfree(fw_path);
+		if (ret) {
+			dev_err(&context->mhi_dev->dev,
+				"request for unknown image: %d\n", image_id);
+			return -EINVAL;
+		}
+		context->active_image_id = image_id;
+		return 0;
 	}
 
 	/*
@@ -870,8 +891,10 @@ static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result
 
 static const struct mhi_device_id sahara_mhi_match_table[] = {
 	{ .chan = "QAIC_SAHARA", },
+	{ .chan = "SAHARA"},
 	{},
 };
+MODULE_DEVICE_TABLE(mhi, sahara_mhi_match_table);
 
 static struct mhi_driver sahara_mhi_driver = {
 	.id_table = sahara_mhi_match_table,
diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index 391ab146f501c6ce1c81f6138f7c491a49c2f264..82e41632afc555a53dec3d8395558ae039b33bbd 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -300,6 +300,43 @@ static const struct mhi_pci_dev_info mhi_qcom_qdu100_info = {
 	.reset_on_remove = true,
 };
 
+static const char * const qdu100_image_table[] = {
+	[5] = "qcom/qdu100/uefi.elf",
+	[8] = "qcom/qdu100/qdsp6sw.mbn",
+	[16] = "qcom/qdu100/efs1.bin",
+	[17] = "qcom/qdu100/efs2.bin",
+	[20] = "qcom/qdu100/efs3.bin",
+	[23] = "qcom/qdu100/aop.mbn",
+	[25] = "qcom/qdu100/tz.mbn",
+	[29] = "qcom/qdu100/zeros_1sector.bin",
+	[33] = "qcom/qdu100/hypvm.mbn",
+	[34] = "qcom/qdu100/mdmddr.mbn",
+	[36] = "qcom/qdu100/multi_image_qti.mbn",
+	[37] = "qcom/qdu100/multi_image.mbn",
+	[38] = "qcom/qdu100/xbl_config.elf",
+	[39] = "qcom/qdu100/abl_userdebug.elf",
+	[40] = "qcom/qdu100/zeros_1sector.bin",
+	[41] = "qcom/qdu100/devcfg.mbn",
+	[42] = "qcom/qdu100/zeros_1sector.bin",
+	[45] = "qcom/qdu100/tools_l.elf",
+	[46] = "qcom/qdu100/Quantum.elf",
+	[47] = "qcom/qdu100/quest.elf",
+	[48] = "qcom/qdu100/xbl_ramdump.elf",
+	[49] = "qcom/qdu100/shrm.elf",
+	[50] = "qcom/qdu100/cpucp.elf",
+	[51] = "qcom/qdu100/aop_devcfg.mbn",
+	[52] = "qcom/qdu100/fw_csm_gsi_3.0.elf",
+	[53] = "qcom/qdu100/qdsp6sw_dtbs.elf",
+	[54] = "qcom/qdu100/qupv3fw.elf",
+};
+
+static const struct mhi_sahara_fw_table qdu100_sahara_fw = {
+	.image_table = qdu100_image_table,
+	.table_size = ARRAY_SIZE(qdu100_image_table),
+	.fw_folder = "qdu100",
+	.non_streaming = false,
+};
+
 static const struct mhi_channel_config mhi_qcom_sa8775p_channels[] = {
 	MHI_CHANNEL_CONFIG_UL(46, "IP_SW0", 2048, 1),
 	MHI_CHANNEL_CONFIG_DL(47, "IP_SW0", 2048, 2),
@@ -1399,6 +1436,14 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_drvdata(pdev, mhi_pdev);
 
+	/*
+	 * Provide Sahara firmware mapping. Sahara consumes it via
+	 * mhi_dev->mhi_cntrl->sahara_fw at probe time.
+	 */
+	if (info == &mhi_qcom_qdu100_info ||
+	    (info->name && !strcmp(info->name, "qcom-qdu100")))
+		mhi_cntrl->sahara_fw = &qdu100_sahara_fw;
+
 	/* Have stored pci confspace at hand for restore in sudden PCI error.
 	 * cache the state locally and discard the PCI core one.
 	 */

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 5/7] bus: mhi: Load DDR training data using device serial number
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

Devices may provide device specific DDR training data that can be reused
across boot to avoid retraining and reduce boot time. The Sahara driver
currently always falls back to the default DDR training image, even when
serial specific training data is available.

Extend the firmware loading logic for the DDR training image to first
attempt loading a per-device image dervied from the device serial number.
If the serial-specific image is not present, fall back to the existing
default image, preserving current behavior.

This allows reuse of previously generated DDR training data when available,
while keeping the existing training flow unchanged for devices without
saved data or for all other firmware images.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 drivers/bus/mhi/host/clients/sahara/sahara.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/mhi/host/clients/sahara/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
index 9adbd84859073d8024ba2a5fcfa33897439d6759..b5ca6353540dc3815db6539e7424afdb749fd3f6 100644
--- a/drivers/bus/mhi/host/clients/sahara/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -59,6 +59,7 @@
 #define SAHARA_RESET_LENGTH		0x8
 #define SAHARA_MEM_DEBUG64_LENGTH	0x18
 #define SAHARA_MEM_READ64_LENGTH	0x18
+#define SAHARA_DDR_TRAINING_IMG_ID	34
 
 struct sahara_packet {
 	__le32 cmd;
@@ -226,6 +227,27 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
 		return 0;
 	}
 
+	/* DDR training special case: Try per-serial number file first */
+	if (image_id == SAHARA_DDR_TRAINING_IMG_ID && context->fw_folder) {
+		u32 serial_num = context->mhi_dev->mhi_cntrl->serial_number;
+
+		fw_path = kasprintf(GFP_KERNEL,
+				    "qcom/%s/mdmddr_0x%x.mbn",
+				    context->fw_folder, serial_num);
+		if (!fw_path)
+			return -ENOMEM;
+
+		ret = firmware_request_nowarn(&context->firmware,
+					      fw_path,
+					      &context->mhi_dev->dev);
+		kfree(fw_path);
+
+		if (!ret) {
+			context->active_image_id = image_id;
+			return 0;
+		}
+	}
+
 	/*
 	 * This image might be optional. The device may continue without it.
 	 * Only the device knows. Suppress error messages that could suggest an
@@ -235,7 +257,8 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
 				      context->image_table[image_id],
 				      &context->mhi_dev->dev);
 	if (ret) {
-		dev_dbg(&context->mhi_dev->dev, "request for image id %d / file %s failed %d\n",
+		dev_dbg(&context->mhi_dev->dev,
+			"request for image id %d / file %s failed %d\n",
 			image_id, context->image_table[image_id], ret);
 		return ret;
 	}

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 6/7] bus: mhi: Capture DDR training data via command mode
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

During early boot, devices may perform DDR training and produce training
data that can be reused on subsequent boots to reduce initialization
time. The Sahara protocol provides a command mode flow to transfer this
training data to the host, but the driver currently does not handle
command mode and drops the training payload.

Add Sahara command mode support to retrieve DDR training data from the
device. When the device enters command mode and sends CMD_READY, query
the support command list(ID 8) and request DDR training data(ID 9) using
EXECUTE and EXECUTE_DATA as defined by protocol. Allocate receive buffers
based on the reported response size and copy the raw payload directly from
the MHI DL completion callback.

Store the captured training data in controller-scoped memory using devres,
so it remains available after Sahara channel teardown. Also distinguish
raw payload completion from control packets in the DL callback, avoiding
misinterpretation of training data as protocol messages, and requeue
the RX buffer after switching back to IMAGE_TX_PENDING to allow the
boot flow to continue.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 drivers/bus/mhi/host/clients/sahara/sahara.c | 326 ++++++++++++++++++++++++++-
 1 file changed, 319 insertions(+), 7 deletions(-)

diff --git a/drivers/bus/mhi/host/clients/sahara/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
index b5ca6353540dc3815db6539e7424afdb749fd3f6..07bc743aa061dd2fa85638067d494562152474e3 100644
--- a/drivers/bus/mhi/host/clients/sahara/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -5,11 +5,14 @@
  */
 
 #include <linux/devcoredump.h>
+#include <linux/device.h>
+#include <linux/device/devres.h>
 #include <linux/firmware.h>
 #include <linux/limits.h>
 #include <linux/mhi.h>
 #include <linux/minmax.h>
 #include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
 #include <linux/overflow.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
@@ -59,7 +62,15 @@
 #define SAHARA_RESET_LENGTH		0x8
 #define SAHARA_MEM_DEBUG64_LENGTH	0x18
 #define SAHARA_MEM_READ64_LENGTH	0x18
+#define SAHARA_COMMAND_READY_LENGTH	0x8
+#define SAHARA_COMMAND_EXEC_RESP_LENGTH	0x10
+#define SAHARA_COMMAND_EXECUTE_LENGTH	0xc
+#define SAHARA_COMMAND_EXEC_DATA_LENGTH	0xc
+#define SAHARA_SWITCH_MODE_LENGTH	0xc
+#define SAHARA_EXEC_CMD_GET_COMMAND_ID_LIST	0x8
+#define SAHARA_EXEC_CMD_GET_TRAINING_DATA	0x9
 #define SAHARA_DDR_TRAINING_IMG_ID	34
+#define SAHARA_NUM_CMD_BUF		SAHARA_NUM_TX_BUF
 
 struct sahara_packet {
 	__le32 cmd;
@@ -95,6 +106,19 @@ struct sahara_packet {
 			__le64 memory_address;
 			__le64 memory_length;
 		} memory_read64;
+		struct {
+			__le32 client_command;
+		} command_execute;
+		struct {
+			__le32 client_command;
+			__le32 response_length;
+		} command_execute_resp;
+		struct {
+			__le32 client_command;
+		} command_exec_data;
+		struct {
+			__le32 mode;
+		} mode_switch;
 	};
 };
 
@@ -161,6 +185,7 @@ struct sahara_context {
 	struct work_struct		fw_work;
 	struct work_struct		dump_work;
 	struct work_struct		read_data_work;
+	struct work_struct		cmd_work;
 	struct mhi_device		*mhi_dev;
 	const char * const		*image_table;
 	u32				table_size;
@@ -181,6 +206,24 @@ struct sahara_context {
 	bool				is_mem_dump_mode;
 	bool				non_streaming;
 	const char			*fw_folder;
+	bool				is_cmd_mode;
+	bool				receiving_training_data;
+	size_t				training_size;
+	size_t				training_rcvd;
+	u32				training_nbuf;
+	char				*cmd_buff[SAHARA_NUM_CMD_BUF];
+};
+
+/*
+ * Controller-scoped training data store (per MHI controller device).
+ * Stored as devres resource on mhi_dev->mhi_cntrl->mhi_dev->dev.
+ */
+struct sahara_cntrl_training_data {
+	struct mutex lock;	/* Protects data, size, copied and receiving */
+	void *data;
+	size_t size;
+	size_t copied;
+	bool receiving;
 };
 
 static bool is_streaming(struct sahara_context *context)
@@ -188,6 +231,48 @@ static bool is_streaming(struct sahara_context *context)
 	return !context->non_streaming;
 }
 
+static void sahara_cntrl_training_release(struct device *dev, void *res)
+{
+	struct sahara_cntrl_training_data *ct = res;
+
+	mutex_lock(&ct->lock);
+	kfree(ct->data);
+	ct->data = NULL;
+	ct->size = 0;
+	ct->copied = 0;
+	ct->receiving = false;
+	mutex_unlock(&ct->lock);
+}
+
+static int sahara_cntrl_training_match(struct device *dev, void *res, void *match_data)
+{
+	/* Exactly one instance per controller */
+	return 1;
+}
+
+static struct sahara_cntrl_training_data *sahara_cntrl_training_get(struct device *dev)
+{
+	struct sahara_cntrl_training_data *ct;
+
+	ct = devres_find(dev, sahara_cntrl_training_release,
+			 sahara_cntrl_training_match, NULL);
+	if (ct)
+		return ct;
+
+	ct = devres_alloc(sahara_cntrl_training_release, sizeof(*ct), GFP_KERNEL);
+	if (!ct)
+		return NULL;
+
+	mutex_init(&ct->lock);
+	ct->data = NULL;
+	ct->size = 0;
+	ct->copied = 0;
+	ct->receiving = false;
+
+	devres_add(dev, ct);
+	return ct;
+}
+
 static int sahara_find_image(struct sahara_context *context, u32 image_id)
 {
 	char *fw_path;
@@ -282,6 +367,11 @@ static void sahara_send_reset(struct sahara_context *context)
 	context->is_mem_dump_mode = false;
 	context->read_data_offset = 0;
 	context->read_data_length = 0;
+	context->is_cmd_mode = false;
+	context->receiving_training_data = false;
+	context->training_size = 0;
+	context->training_rcvd = 0;
+	context->training_nbuf = 0;
 
 	context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
 	context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -317,7 +407,8 @@ static void sahara_hello(struct sahara_context *context)
 
 	if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING &&
 	    le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE &&
-	    le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) {
+	    le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG &&
+	    le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_COMMAND) {
 		dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n",
 			le32_to_cpu(context->rx->hello.mode));
 		return;
@@ -336,6 +427,153 @@ static void sahara_hello(struct sahara_context *context)
 		dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
 }
 
+static void sahara_switch_mode_to_img_tx(struct sahara_context *context)
+{
+	int ret;
+
+	context->tx[0]->cmd = cpu_to_le32(SAHARA_SWITCH_MODE_CMD);
+	context->tx[0]->length = cpu_to_le32(SAHARA_SWITCH_MODE_LENGTH);
+	context->tx[0]->mode_switch.mode = cpu_to_le32(SAHARA_MODE_IMAGE_TX_PENDING);
+
+	ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+			    SAHARA_SWITCH_MODE_LENGTH, MHI_EOT);
+
+	if (ret)
+		dev_err(&context->mhi_dev->dev, "Unable to send mode switch %d\n", ret);
+}
+
+static void sahara_command_execute(struct sahara_context *context, u32 client_command)
+{
+	int ret;
+
+	context->tx[0]->cmd = cpu_to_le32(SAHARA_EXECUTE_CMD);
+	context->tx[0]->length = cpu_to_le32(SAHARA_COMMAND_EXECUTE_LENGTH);
+	context->tx[0]->command_execute.client_command = cpu_to_le32(client_command);
+
+	ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+			    SAHARA_COMMAND_EXECUTE_LENGTH, MHI_EOT);
+	if (ret)
+		dev_err(&context->mhi_dev->dev, "Unable to send command execute %d\n", ret);
+}
+
+static void sahara_command_execute_data(struct sahara_context *context, u32 client_command)
+{
+	int ret;
+
+	context->tx[0]->cmd = cpu_to_le32(SAHARA_EXECUTE_DATA_CMD);
+	context->tx[0]->length = cpu_to_le32(SAHARA_COMMAND_EXEC_DATA_LENGTH);
+	context->tx[0]->command_exec_data.client_command = cpu_to_le32(client_command);
+
+	ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+			    SAHARA_COMMAND_EXEC_DATA_LENGTH, MHI_EOT);
+	if (ret)
+		dev_err(&context->mhi_dev->dev, "Unable to send execute data %d\n", ret);
+}
+
+static void sahara_command_ready(struct sahara_context *context)
+{
+	if (le32_to_cpu(context->rx->length) != SAHARA_COMMAND_READY_LENGTH) {
+		dev_err(&context->mhi_dev->dev,
+			"Malformed command ready packet - length %u\n",
+			le32_to_cpu(context->rx->length));
+		return;
+	}
+
+	context->is_cmd_mode = true;
+	context->receiving_training_data = false;
+
+	sahara_command_execute(context, SAHARA_EXEC_CMD_GET_COMMAND_ID_LIST);
+}
+
+static void sahara_command_execute_resp(struct sahara_context *context)
+{
+	struct device *dev = &context->mhi_dev->mhi_cntrl->mhi_dev->dev;
+	struct sahara_cntrl_training_data *ct;
+	u32 client_cmd, resp_len;
+	int ret;
+	u64 remaining;
+	u32 i;
+
+	if (le32_to_cpu(context->rx->length) != SAHARA_COMMAND_EXEC_RESP_LENGTH ||
+	    le32_to_cpu(context->rx->command_execute_resp.response_length) < 0) {
+		dev_err(&context->mhi_dev->dev,
+			"Malformed command execute resp packet - length %d\n",
+			le32_to_cpu(context->rx->length));
+		return;
+	}
+
+	client_cmd = le32_to_cpu(context->rx->command_execute_resp.client_command);
+	resp_len = le32_to_cpu(context->rx->command_execute_resp.response_length);
+
+	sahara_command_execute_data(context, client_cmd);
+
+	if (client_cmd == SAHARA_EXEC_CMD_GET_COMMAND_ID_LIST) {
+		sahara_command_execute(context, SAHARA_EXEC_CMD_GET_TRAINING_DATA);
+		return;
+	}
+
+	if (client_cmd != SAHARA_EXEC_CMD_GET_TRAINING_DATA)
+		return;
+
+	ct = sahara_cntrl_training_get(dev);
+	if (!ct) {
+		context->is_cmd_mode = false;
+		sahara_switch_mode_to_img_tx(context);
+		return;
+	}
+
+	mutex_lock(&ct->lock);
+	kfree(ct->data);
+	ct->data = kzalloc(resp_len, GFP_KERNEL);
+	ct->size = resp_len;
+	ct->copied = 0;
+	ct->receiving = true;
+	mutex_unlock(&ct->lock);
+
+	if (!ct->data) {
+		context->is_cmd_mode = false;
+		sahara_switch_mode_to_img_tx(context);
+		return;
+	}
+
+	context->training_size = resp_len;
+	context->training_rcvd = 0;
+	context->receiving_training_data = true;
+
+	remaining = resp_len;
+	for (i = 0; i < SAHARA_NUM_CMD_BUF && remaining; i++) {
+		size_t pkt = min_t(size_t, remaining, SAHARA_PACKET_MAX_SIZE);
+
+		ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE,
+				    context->cmd_buff[i], pkt,
+				    (remaining <= pkt) ? MHI_EOT : MHI_CHAIN);
+		if (ret)
+			break;
+
+		remaining -= pkt;
+	}
+
+	context->training_nbuf = i;
+}
+
+static void sahara_command_processing(struct work_struct *work)
+{
+	struct sahara_context *context = container_of(work, struct sahara_context, cmd_work);
+	int ret;
+
+	if (le32_to_cpu(context->rx->cmd) == SAHARA_EXECUTE_RESP_CMD)
+		sahara_command_execute_resp(context);
+
+	if (!context->receiving_training_data) {
+		ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE,
+				    context->rx, SAHARA_PACKET_MAX_SIZE, MHI_EOT);
+
+		if (ret)
+			dev_err(&context->mhi_dev->dev,
+				"Unable to requeue rx buf %d\n", ret);
+	}
+}
+
 static int read_data_helper(struct sahara_context *context, int buf_index)
 {
 	enum mhi_flags mhi_flag;
@@ -562,6 +800,9 @@ static void sahara_processing(struct work_struct *work)
 	case SAHARA_MEM_DEBUG64_CMD:
 		sahara_memory_debug64(context);
 		break;
+	case SAHARA_CMD_READY_CMD:
+		sahara_command_ready(context);
+		break;
 	default:
 		dev_err(&context->mhi_dev->dev, "Unknown command %d\n",
 			le32_to_cpu(context->rx->cmd));
@@ -862,6 +1103,20 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
 	INIT_WORK(&context->fw_work, sahara_processing);
 	INIT_WORK(&context->dump_work, sahara_dump_processing);
 	INIT_WORK(&context->read_data_work, sahara_read_data_processing);
+	INIT_WORK(&context->cmd_work, sahara_command_processing);
+
+	for (i = 0; i < SAHARA_NUM_CMD_BUF; i++) {
+		context->cmd_buff[i] = devm_kzalloc(&mhi_dev->dev,
+						    SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+		if (!context->cmd_buff[i])
+			return -ENOMEM;
+	}
+
+	context->is_cmd_mode = false;
+	context->receiving_training_data = false;
+	context->training_size = 0;
+	context->training_rcvd = 0;
+	context->training_nbuf = 0;
 
 	context->active_image_id = SAHARA_IMAGE_ID_NONE;
 	dev_set_drvdata(&mhi_dev->dev, context);
@@ -885,6 +1140,7 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
 
 	cancel_work_sync(&context->fw_work);
 	cancel_work_sync(&context->dump_work);
+	cancel_work_sync(&context->cmd_work);
 	vfree(context->mem_dump);
 	sahara_release_image(context);
 	mhi_unprepare_from_transfer(mhi_dev);
@@ -901,15 +1157,71 @@ static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result
 static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
 {
 	struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+	struct sahara_cntrl_training_data *ct;
+	struct device *dev;
+	size_t copy;
+	int ret;
+	u32 i;
+
+	if (mhi_result->transaction_status)
+		return;
+
+	/*
+	 * Raw training payload completions arrive for cmd_buff[] buffers.
+	 * Do not schedule cmd_work for those.
+	 */
+	if (context->is_cmd_mode && context->receiving_training_data &&
+	    mhi_result->buf_addr != context->rx) {
+		dev = &context->mhi_dev->mhi_cntrl->mhi_dev->dev;
+		ct = sahara_cntrl_training_get(dev);
+		if (!ct)
+			return;
 
-	if (!mhi_result->transaction_status) {
-		context->rx_size = mhi_result->bytes_xferd;
-		if (context->is_mem_dump_mode)
-			schedule_work(&context->dump_work);
-		else
-			schedule_work(&context->fw_work);
+		for (i = 0; i < context->training_nbuf; i++) {
+			if (mhi_result->buf_addr == context->cmd_buff[i]) {
+				mutex_lock(&ct->lock);
+				copy = min_t(size_t, mhi_result->bytes_xferd,
+					     ct->size - ct->copied);
+				memcpy((u8 *)ct->data + ct->copied,
+				       mhi_result->buf_addr, copy);
+				ct->copied += copy;
+				mutex_unlock(&ct->lock);
+
+				context->training_rcvd += copy;
+
+				if (context->training_rcvd >= context->training_size) {
+					mutex_lock(&ct->lock);
+					ct->receiving = false;
+					mutex_unlock(&ct->lock);
+
+					context->receiving_training_data = false;
+					context->is_cmd_mode = false;
+
+					sahara_switch_mode_to_img_tx(context);
+					ret = mhi_queue_buf(context->mhi_dev,
+							    DMA_FROM_DEVICE,
+							    context->rx,
+							    SAHARA_PACKET_MAX_SIZE,
+							    MHI_EOT);
+					if (ret)
+						dev_err(&context->mhi_dev->dev,
+							"Unable to requeue rx buf %d\n", ret);
+				}
+				return;
+			}
+		}
+		return;
 	}
 
+	/* Normal Rx completion */
+	context->rx_size = mhi_result->bytes_xferd;
+	if (context->is_mem_dump_mode)
+		schedule_work(&context->dump_work);
+	else if (context->is_cmd_mode)
+		schedule_work(&context->cmd_work);
+	else
+		schedule_work(&context->fw_work);
+
 }
 
 static const struct mhi_device_id sahara_mhi_match_table[] = {

-- 
2.34.1


^ permalink raw reply related

* [PATCH v5 7/7] bus: mhi: Expose DDR training data via controller sysfs
From: Kishore Batta @ 2026-04-16 14:09 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, Jeff Hugo, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi,
	Kishore Batta
In-Reply-To: <20260416-sahara_protocol_new_v2-v5-0-6aebf005e4ba@oss.qualcomm.com>

DDR training data captured during Sahara command mode needs to be
accessible to userspace so it can be persisted and reused on subsequent
boots. Currently, the training data is stored internally in the driver
but has no external visibility once the Sahara channel is torn down.

Expose the captured DDR training data via a read-only binary sysfs
attribute on the MHI controller device:

/sys/bus/mhi/devices/<mhi_cntrl>/ddr_training_data

The sysfs read callback serves data directly from controller scoped storage
and protects access with the controller training data lock. The attribute
lifetime is tied to the controller device via devres, allowing the data to
remain readable after Sahara channel teardown and ensuring automatic
cleanup when controller device is removed.

Userspace flow:
1. For each controller device, userspace reads the ddr_training_data sysfs
   attribute.
2. If the read returns non-zero data, userspace persists it using a
   serial specific filename (for example, mdmddr_0x<serial_no>.mbn).
3. On subsequent boots, the Sahara driver attempts to load this serial
   specific DDR training image before falling back to the default
   training image, restoring DDR calibration data and avoiding retraining.

Add ABI documentation for the DDR training data sysfs attribute exposed by
Sahara MHI driver.

Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
---
 .../ABI/testing/sysfs-bus-mhi-ddr_training_data    | 19 ++++++
 drivers/bus/mhi/host/clients/sahara/sahara.c       | 69 ++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-mhi-ddr_training_data b/Documentation/ABI/testing/sysfs-bus-mhi-ddr_training_data
new file mode 100644
index 0000000000000000000000000000000000000000..810b487b5a5fdba133d81255f9879844e3938a10
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-mhi-ddr_training_data
@@ -0,0 +1,19 @@
+What:                   /sys/bus/mhi/devices/<mhi-cntrl>/ddr_training_data
+
+Date:                   March 2026
+
+Contact:                Kishore Batta <kishore.batta@oss.qualcomm.com>
+
+Description:            Contains the DDR training data for the Qualcomm device
+                        connected. MHI driver populates different controller
+                        nodes for each device. The DDR training data is exposed
+                        to userspace to read and save the training data file to
+                        the filesystem. In the subsequent boot up of the device,
+                        the training data is restored from host to device
+                        optimizing the boot up time of the device.
+
+Usage:                  Example for reading DDR training data:
+                        cat /sys/bus/mhi/devices/mhi0/ddr_training_data
+
+Permissions:            The file permissions are set to 0444 allowing read
+                        access.
diff --git a/drivers/bus/mhi/host/clients/sahara/sahara.c b/drivers/bus/mhi/host/clients/sahara/sahara.c
index 07bc743aa061dd2fa85638067d494562152474e3..fef5dc1d8884133397d204f23361584fd1d9b075 100644
--- a/drivers/bus/mhi/host/clients/sahara/sahara.c
+++ b/drivers/bus/mhi/host/clients/sahara/sahara.c
@@ -273,6 +273,73 @@ static struct sahara_cntrl_training_data *sahara_cntrl_training_get(struct devic
 	return ct;
 }
 
+static ssize_t ddr_training_data_read(struct file *filp, struct kobject *kobj,
+				      const struct bin_attribute *attr, char *buf,
+				      loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct sahara_cntrl_training_data *ct;
+	size_t available;
+
+	ct = sahara_cntrl_training_get(dev);
+	if (!ct)
+		return -ENODEV;
+
+	mutex_lock(&ct->lock);
+
+	/* No data yet or offset past end */
+	if (!ct->data || offset >= ct->size) {
+		mutex_unlock(&ct->lock);
+		return 0;
+	}
+
+	available = ct->size - offset;
+	count = min(count, available);
+	memcpy(buf, (u8 *)ct->data + offset, count);
+
+	mutex_unlock(&ct->lock);
+
+	return count;
+}
+
+static const struct bin_attribute ddr_training_data_attr = {
+	.attr = {
+		.name = "ddr_training_data",
+		.mode = 0444,
+	},
+	.read = ddr_training_data_read,
+};
+
+static void sahara_sysfs_devres_release(struct device *dev, void *res)
+{
+	device_remove_bin_file(dev, &ddr_training_data_attr);
+}
+
+static void sahara_sysfs_create(struct mhi_device *mhi_dev)
+{
+	struct device *dev = &mhi_dev->mhi_cntrl->mhi_dev->dev;
+	void *cookie;
+	int ret;
+
+	if (devres_find(dev, sahara_sysfs_devres_release, NULL, NULL))
+		return;
+
+	ret = device_create_bin_file(dev, &ddr_training_data_attr);
+	if (ret) {
+		dev_warn(&mhi_dev->dev,
+			 "Failed to create DDR training sysfs node (%d)\n", ret);
+		return;
+	}
+
+	cookie = devres_alloc(sahara_sysfs_devres_release, 1, GFP_KERNEL);
+	if (!cookie) {
+		device_remove_bin_file(dev, &ddr_training_data_attr);
+		return;
+	}
+
+	devres_add(dev, cookie);
+}
+
 static int sahara_find_image(struct sahara_context *context, u32 image_id)
 {
 	char *fw_path;
@@ -1131,6 +1198,8 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
 		return ret;
 	}
 
+	sahara_sysfs_create(mhi_dev);
+
 	return 0;
 }
 

-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH v2 2/2] hwmon: (pmbus/max20830) add driver for max20830
From: Nuno Sá @ 2026-04-16 14:38 UTC (permalink / raw)
  To: Alexis Czezar Torreno
  Cc: Guenter Roeck, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Jonathan Corbet, Shuah Khan, linux-hwmon, devicetree,
	linux-kernel, linux-doc
In-Reply-To: <20260416-dev_max20830-v2-2-2c7d676dc0bd@analog.com>

On Thu, Apr 16, 2026 at 03:59:11PM +0800, Alexis Czezar Torreno wrote:
> Add support for MAX20830 step-down DC-DC switching regulator with
> PMBus interface. It allows monitoring of input/output voltage,
> output current and temperature through the PMBus serial interface.
> 
> Signed-off-by: Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
> ---
>  Documentation/hwmon/index.rst    |  1 +
>  Documentation/hwmon/max20830.rst | 49 +++++++++++++++++++++++
>  MAINTAINERS                      |  2 +
>  drivers/hwmon/pmbus/Kconfig      |  9 +++++
>  drivers/hwmon/pmbus/Makefile     |  1 +
>  drivers/hwmon/pmbus/max20830.c   | 86 ++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 148 insertions(+)
> 
> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> index 8b655e5d6b68b90c697a52c7bf526e81d370caf7..56f7eb761be76dd627a2f34135abad05203b0582 100644
> --- a/Documentation/hwmon/index.rst
> +++ b/Documentation/hwmon/index.rst
> @@ -158,6 +158,7 @@ Hardware Monitoring Kernel Drivers
>     max197
>     max20730
>     max20751
> +   max20830
>     max31722
>     max31730
>     max31760
> diff --git a/Documentation/hwmon/max20830.rst b/Documentation/hwmon/max20830.rst
> new file mode 100644
> index 0000000000000000000000000000000000000000..936e409dcc5c0898dde27d782308d4a7e1357e73
> --- /dev/null
> +++ b/Documentation/hwmon/max20830.rst
> @@ -0,0 +1,49 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +Kernel driver max20830
> +======================
> +
> +Supported chips:
> +
> +  * Analog Devices MAX20830
> +
> +    Prefix: 'max20830'
> +
> +    Addresses scanned: -
> +
> +    Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/max20830.pdf
> +
> +Author:
> +
> +  - Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
> +
> +
> +Description
> +-----------
> +
> +This driver supports hardware monitoring for Analog Devices MAX20830
> +Step-Down Switching Regulator with PMBus Interface.
> +
> +The MAX20830 is a 2.7V to 16V, 30A fully integrated step-down DC-DC switching
> +regulator. Through the PMBus interface, the device can monitor input/output
> +voltages, output current and temperature.
> +
> +The driver is a client driver to the core PMBus driver. Please see
> +Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
> +
> +Sysfs entries
> +-------------
> +
> +================= ========================================
> +in1_label         "vin"
> +in1_input         Measured input voltage
> +in1_alarm         Input voltage alarm
> +in2_label         "vout1"
> +in2_input         Measured output voltage
> +in2_alarm         Output voltage alarm
> +curr1_label       "iout1"
> +curr1_input       Measured output current
> +curr1_alarm       Output current alarm
> +temp1_input       Measured temperature
> +temp1_alarm       Chip temperature alarm
> +================= ========================================
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 031c743e979521a92ed9ac67915c178ce31727bd..d6a6745e2dae29c3b8f80bbe61c54a2f5ecd9f47 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15585,6 +15585,8 @@ L:	linux-hwmon@vger.kernel.org
>  S:	Supported
>  W:	https://ez.analog.com/linux-software-drivers
>  F:	Documentation/devicetree/bindings/hwmon/pmbus/adi,max20830.yaml
> +F:	Documentation/hwmon/max20830.rst
> +F:	drivers/hwmon/pmbus/max20830.c
>  
>  MAX2175 SDR TUNER DRIVER
>  M:	Ramesh Shanmugasundaram <rashanmu@gmail.com>
> diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
> index 8f4bff375ecbc355f5ed3400855c2852ec2aa5ef..987705bf45b75b7b91ccc469247909f3c3f53d77 100644
> --- a/drivers/hwmon/pmbus/Kconfig
> +++ b/drivers/hwmon/pmbus/Kconfig
> @@ -365,6 +365,15 @@ config SENSORS_MAX20751
>  	  This driver can also be built as a module. If so, the module will
>  	  be called max20751.
>  
> +config SENSORS_MAX20830
> +	tristate "Analog Devices MAX20830"
> +	help
> +	  If you say yes here you get hardware monitoring support for Analog
> +	  Devices MAX20830.
> +
> +	  This driver can also be built as a module. If so, the module will
> +	  be called max20830.
> +
>  config SENSORS_MAX31785
>  	tristate "Maxim MAX31785 and compatibles"
>  	help
> diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
> index 7129b62bc00f8a2e98de14004997752a856dfda2..bc52f930e0825a902a0dd1c9e2b44f2e8d577c35 100644
> --- a/drivers/hwmon/pmbus/Makefile
> +++ b/drivers/hwmon/pmbus/Makefile
> @@ -36,6 +36,7 @@ obj-$(CONFIG_SENSORS_MAX16601)	+= max16601.o
>  obj-$(CONFIG_SENSORS_MAX17616)	+= max17616.o
>  obj-$(CONFIG_SENSORS_MAX20730)	+= max20730.o
>  obj-$(CONFIG_SENSORS_MAX20751)	+= max20751.o
> +obj-$(CONFIG_SENSORS_MAX20830)	+= max20830.o
>  obj-$(CONFIG_SENSORS_MAX31785)	+= max31785.o
>  obj-$(CONFIG_SENSORS_MAX34440)	+= max34440.o
>  obj-$(CONFIG_SENSORS_MAX8688)	+= max8688.o
> diff --git a/drivers/hwmon/pmbus/max20830.c b/drivers/hwmon/pmbus/max20830.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..21ea8b59150cb0564f1776ee08131bad7fdef003
> --- /dev/null
> +++ b/drivers/hwmon/pmbus/max20830.c
> @@ -0,0 +1,86 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Hardware monitoring driver for Analog Devices MAX20830
> + *
> + * Copyright (C) 2026 Analog Devices, Inc.
> + */
> +
> +#include <linux/i2c.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/module.h>
> +#include "pmbus.h"
> +
> +static struct pmbus_driver_info max20830_info = {
> +	.pages = 1,
> +	.format[PSC_VOLTAGE_IN] = linear,
> +	.format[PSC_VOLTAGE_OUT] = linear,
> +	.format[PSC_CURRENT_OUT] = linear,
> +	.format[PSC_TEMPERATURE] = linear,
> +	.func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_IOUT |
> +		PMBUS_HAVE_TEMP |
> +		PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_STATUS_IOUT |
> +		PMBUS_HAVE_STATUS_INPUT | PMBUS_HAVE_STATUS_TEMP,
> +};
> +
> +static int max20830_probe(struct i2c_client *client)
> +{
> +	u8 buf[I2C_SMBUS_BLOCK_MAX + 1];

AI seems a bit paranoid but maybe to be on the safe side just initialize
the above buffer.

> +	u8 len;
> +	int ret;
> +
> +	if (!i2c_check_functionality(client->adapter,
> +				     I2C_FUNC_SMBUS_READ_I2C_BLOCK))
> +		return -ENODEV;
> +
> +	/*
> +	 * Use i2c_smbus_read_i2c_block_data() instead of
> +	 * i2c_smbus_read_block_data() to support I2C controllers
> +	 * which do not support SMBus block reads.
> +	 */
> +	ret = i2c_smbus_read_i2c_block_data(client, PMBUS_IC_DEVICE_ID,
> +					    I2C_SMBUS_BLOCK_MAX, buf);
> +	if (ret < 0)
> +		return dev_err_probe(&client->dev, ret,
> +				     "Failed to read IC_DEVICE_ID\n");
> +
> +	/* First byte is the block length. */
> +	len = buf[0];
> +	if (len != 9)
> +		return dev_err_probe(&client->dev, -ENODEV,
> +				     "Unexpected IC_DEVICE_ID response\n");
> +
> +	buf[len] = '\0';

It looks like it has a point in the above though.

- Nuno Sá


^ permalink raw reply

* [PATCH] Documentation: update deferred_probe_timeout cmdline parameter documentation
From: Hans de Goede @ 2026-04-16 14:41 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan
  Cc: Hans de Goede, linux-doc, linux-kernel, Danilo Krummrich

Extend the deferred_probe_timeout cmdline parameter documentation,
documenting that negative values are treated as an infinite timeout value.

Suggested-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 03a550630644..0f1a1332480f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1249,8 +1249,9 @@ Kernel parameters
 			deferred probe to give up waiting on dependencies to
 			probe. Only specific dependencies (subsystems or
 			drivers) that have opted in will be ignored. A timeout
-			of 0 will timeout at the end of initcalls. If the time
-			out hasn't expired, it'll be restarted by each
+			of 0 will timeout at the end of initcalls; a negative
+			value is treated as an infinite timeout value. If the
+			timeout hasn't expired, it'll be restarted by each
 			successful driver registration. This option will also
 			dump out devices still on the deferred probe list after
 			retrying.
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v4 0/3] mm/memory-failure: add panic option for unrecoverable pages
From: Breno Leitao @ 2026-04-16 15:32 UTC (permalink / raw)
  To: Jiaqi Yan
  Cc: Miaohe Lin, Naoya Horiguchi, Andrew Morton, Jonathan Corbet,
	Shuah Khan, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	linux-mm, linux-kernel, linux-doc, kernel-team
In-Reply-To: <CACw3F51PC0iB6mfbiceQ_Kh242FN8zdXOfTyE5Pa_5+gjTPPGg@mail.gmail.com>

Hi Jiaqi,

On Wed, Apr 15, 2026 at 01:56:35PM -0700, Jiaqi Yan wrote:
> On Wed, Apr 15, 2026 at 5:55 AM Breno Leitao <leitao@debian.org> wrote:
> >
> > When the memory failure handler encounters an in-use kernel page that it
> > cannot recover (slab, page tables, kernel stacks, vmalloc, etc.), it
> > currently logs the error as "Ignored" and continues operation.
> >
> > This leaves corrupted data accessible to the kernel, which will inevitably
> > cause either silent data corruption or a delayed crash when the poisoned memory
> > is next accessed.
> >
> > This is a common problem on large fleets. We frequently observe multi-bit ECC
> > errors hitting kernel slab pages, where memory_failure() fails to recover them
> > and the system crashes later at an unrelated code path, making root cause
> > analysis unnecessarily difficult.
> >
> > Here is one specific example from production on an arm64 server: a multi-bit
> > ECC error hit a dentry cache slab page, memory_failure() failed to recover it
> > (slab pages are not supported by the hwpoison recovery mechanism), and 67
> > seconds later d_lookup() accessed the poisoned cache line causing
> > a synchronous external abort:
> >
> >     [88690.479680] [Hardware Error]: error_type: 3, multi-bit ECC
> >     [88690.498473] Memory failure: 0x40272d: unhandlable page.
> >     [88690.498619] Memory failure: 0x40272d: recovery action for
> >                    get hwpoison page: Ignored
> >     ...
> >     [88757.847126] Internal error: synchronous external abort:
> >                    0000000096000410 [#1] SMP
> >     [88758.061075] pc : d_lookup+0x5c/0x220
> >
> > This series adds a new sysctl vm.panic_on_unrecoverable_memory_failure
> > (default 0) that, when enabled, panics immediately on unrecoverable
> > memory failures. This provides a clean crash dump at the time of the
>
> I get the fail-fast part, but wonder will kernel really be able to
> provide clean crash dump useful for diagnosis?

Yes, the kernel does provide a useful crash dump. With the sysctl enabled,
here's what I observe:

	Kernel panic - not syncing: Memory failure: 0x1: unrecoverable page
	CPU: 40 UID: 0 PID: 682 Comm: bash Tainted: G B  7.0.0-next-20260414-upstream-00004-gcbb3af7bfd3b #93
	Tainted: [B]=BAD_PAGE

	Call Trace:
	 <TASK>
	 vpanic+0x399/0x700
	 panic+0xb4/0xc0
	 action_result+0x278/0x340          ← your new panic call site
	 memory_failure+0x152b/0x1c80


Without the patch (or with the sysctl disabled), you only get:

	Memory failure: 0x1: unhandlable page.
	Memory failure: 0x1: recovery action for reserved kernel page: Ignored

Then the host continues running until it eventually accesses that poisoned
memory, triggering a generic error similar to the d_lookup() case mentioned
above.

> In your example at 88757.847126, kernel was handling SEA and because
> we are under kernel context, eventually has to die(). Apparently not
> only your patch, but also memory-failure has no role to play there.
> But at least SEA handling tried its best to show the kernel code that
> consumed the memory error.
>
> So your code should apply to the memory failure handling at
> 88690.498473, which is likely triggered from APEI GHES for poison
> detection (I guess the example is from ARM64). Anything except SEA is
> considered not synchronous (by APEI is_hest_sync_notify()). If kernel
> panics there, I guess it will be in a random process context or a
> kworker thread? How useful is it for diagnosis? Just the exact time an
> error detected (which is already logged by kernel)?

The kernel panics with a clear stack trace and explicit reason, making it
straightforward to correlate and analyze the failure.

My objective is to have a clean, immediate crash rather than allowing the
system to continue running and potentially crash later (if at all).

Working at a hyperscaler, I regularly see thousands of these "unhandlable
page" messages, followed by later kernel crashes when the corrupted memory
is eventually accessed.

> On X86, for UCNA or SRAO type machine check exceptions, I think with
> your patch the panic would also happen in random process context or
> kworker thread,
>
> Can you share some clean crash dumps from your testing that show they
> are more useful than the crash at SEA? Thanks!

Certainly, here is the complete crash dump from the example above. This
happened on a real production hardware:

	[88690.478913] [ T593001] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 784
	[88690.479097] [ T593001] {1}[Hardware Error]: event severity: recoverable
	[88690.479184] [ T593001] {1}[Hardware Error]:  imprecise tstamp: 2026-03-20 13:13:08
	[88690.479282] [ T593001] {1}[Hardware Error]:  Error 0, type: recoverable
	[88690.479359] [ T593001] {1}[Hardware Error]:   section_type: memory error
	[88690.479424] [ T593001] {1}[Hardware Error]:   physical_address: 0x00000040272d5080
	[88690.479503] [ T593001] {1}[Hardware Error]:   physical_address_mask: 0xfffffffffffff000
	[88690.479606] [ T593001] {1}[Hardware Error]:   node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027 
	[88690.479680] [ T593001] {1}[Hardware Error]:   error_type: 3, multi-bit ECC
	[88690.479754] [ T593001] {1}[Hardware Error]:   DIMM location: not present. DMI handle: 0x000e 
	[88690.479882] [ T593001] EDAC MC0: 1 UE multi-bit ECC on unknown memory (node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027 DIMM location: not present. DMI handle: 0x000e page:0x40272d offset:0x5080 grain:4096 - APEI location: node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027 DIMM location: not present. DMI handle: 0x000e)
	[88690.498473] [ T593001] Memory failure: 0x40272d: unhandlable page.
	[88690.498619] [ T593001] Memory failure: 0x40272d: recovery action for get hwpoison page: Ignored
	[88757.847126] [ T640437] Internal error: synchronous external abort: 0000000096000410 [#1]  SMP
	[88757.867131] [ T640437] Modules linked in: ghes_edac(E) act_gact(E) sch_fq(E) tcp_diag(E) inet_diag(E) cls_bpf(E) mlx5_ib(E) sm3_ce(E) sha3_ce(E) sha512_ce(E) ipmi_ssif(E) ipmi_devintf(E) nvidia_cspmu(E) ib_uverbs(E) cppc_cpufreq(E) coresight_etm4x(E) coresight_stm(E) ipmi_msghandler(E) coresight_trbe(E) arm_cspmu_module(E) arm_smmuv3_pmu(E) arm_spe_pmu(E) stm_core(E) coresight_tmc(E) coresight_funnel(E) coresight(E) bpf_preload(E) sch_fq_codel(E) ip_tables(E) ip6_tables(E) vhost_net(E) tun(E) vhost(E) vhost_iotlb(E) tap(E) tls(E) mpls_gso(E) mpls_iptunnel(E) mpls_router(E) fou(E) acpi_power_meter(E) loop(E) drm(E) backlight(E) drm_panel_orientation_quirks(E) autofs4(E) raid0(E) efivarfs(E) dm_crypt(E)
	[88757.991191] [ T640437] CPU: 70 UID: 34133 PID: 640437 Comm: Collection-20 Kdump: loaded Tainted: G   M        E       6.16.1-0_fbk2_0_gf40efc324cc8 #1 NONE 
	[88758.017569] [ T640437] Tainted: [M]=MACHINE_CHECK, [E]=UNSIGNED_MODULE
	[88758.028860] [ T640437] Hardware name: ....
	[88758.046969] [ T640437] pstate: 23401009 (nzCv daif +PAN -UAO +TCO +DIT +SSBS BTYPE=--)
	[88758.061075] [ T640437] pc : d_lookup+0x5c/0x220
	[88758.068392] [ T640437] lr : try_lookup_noperm+0x30/0x50
	[88758.077088] [ T640437] sp : ffff800138cafc30
	[88758.083827] [ T640437] x29: ffff800138cafc40 x28: ffff0001dcfe8bc0 x27: 00000000bc0a11f7
	[88758.098321] [ T640437] x26: 00000000000ee00c x25: ffffffffffffffff x24: 0000000000000001
	[88758.112807] [ T640437] x23: ffff003fa14d0000 x22: ffff8000828d3740 x21: ffff800138cafde8
	[88758.127281] [ T640437] x20: ffff0000d0316fc0 x19: ffff800138cafce0 x18: 0001000000000000
	[88758.141753] [ T640437] x17: 0000000000000001 x16: 0000000001ffffff x15: dfc038a300003936
	[88758.156226] [ T640437] x14: 00000000fffffffa x13: ffffffffffffffff x12: ffff0000d0316fc0
	[88758.170695] [ T640437] x11: 61c8864680b583eb x10: 0000000000000039 x9 : ffff800080fcfd68
	[88758.185170] [ T640437] x8 : ffff003fa72d5088 x7 : 0000000000000000 x6 : ffff800138cafd58
	[88758.199645] [ T640437] x5 : ffff0001dcfe8bc0 x4 : ffff80008104a330 x3 : 0000000000000002
	[88758.214111] [ T640437] x2 : ffff800138cafd4d x1 : ffff800138cafce0 x0 : ffff0000d0316fc0
	[88758.228579] [ T640437] Call trace:
	[88758.233565] [ T640437]  d_lookup+0x5c/0x220 (P)
	[88758.240864] [ T640437]  try_lookup_noperm+0x30/0x50
	[88758.248868] [ T640437]  proc_fill_cache+0x54/0x140
	[88758.256696] [ T640437]  proc_readfd_common+0x138/0x1e8
	[88758.265222] [ T640437]  proc_fd_iterate.llvm.7260857650841435759+0x1c/0x30
	[88758.277248] [ T640437]  iterate_dir+0x84/0x228
	[88758.284354] [ T640437]  __arm64_sys_getdents64+0x5c/0x110
	[88758.293383] [ T640437]  invoke_syscall+0x4c/0xd0
	[88758.300843] [ T640437]  do_el0_svc+0x80/0xb8
	[88758.307599] [ T640437]  el0_svc+0x30/0xf0
	[88758.313820] [ T640437]  el0t_64_sync_handler+0x70/0x100
	[88758.322497] [ T640437]  el0t_64_sync+0x17c/0x180
	...

And my clear crash would look like the following:

	[ 1096.480523] Memory failure: 0x2: recovery action for reserved kernel page: Ignored
	[ 1096.480751] Kernel panic - not syncing: Memory failure: 0x2: unrecoverable page
	[ 1096.480760] CPU: 5 UID: 0 PID: 683 Comm: bash Tainted: G    B               7.0.0-next-20260414-upstream-00004-gcbb3af7bfd3b #93 PREEMPTLAZY
	[ 1096.480768] Tainted: [B]=BAD_PAGE
	[ 1096.480774] Call Trace:
	[ 1096.480778]  <TASK>
	[ 1096.480782]  vpanic+0x399/0x700
	[ 1096.480821]  panic+0xb4/0xc0
	[ 1096.480849]  action_result+0x278/0x340
	[ 1096.480857]  memory_failure+0x152b/0x1c80
	[ 1096.480925]  hwpoison_inject+0x3a6/0x3f0 [hwpoison_inject]
	....


Isn't the clean approach way better than the random one?

For testing, I use this simple procedure, in case you want to play with
it:
	# modprobe hwpoison-inject
	# sysctl -w vm.panic_on_unrecoverable_memory_failure=0
	# echo 1 > /sys/kernel/debug/hwpoison/corrupt-pfn


Thanks for the review and good discussion,
--breno


^ permalink raw reply

* Re: [PATCH v2 1/2] dt-bindings: hwmon: pmbus: add max20830
From: Conor Dooley @ 2026-04-16 15:51 UTC (permalink / raw)
  To: Alexis Czezar Torreno
  Cc: Guenter Roeck, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Jonathan Corbet, Shuah Khan, linux-hwmon, devicetree,
	linux-kernel, linux-doc
In-Reply-To: <20260416-dev_max20830-v2-1-2c7d676dc0bd@analog.com>

[-- Attachment #1: Type: text/plain, Size: 3739 bytes --]

On Thu, Apr 16, 2026 at 03:59:10PM +0800, Alexis Czezar Torreno wrote:
> Add device tree documentation for MAX20830 step-down DC-DC switching
> regulator with PMBus interface.
> 
> Signed-off-by: Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
> ---
>  .../bindings/hwmon/pmbus/adi,max20830.yaml         | 61 ++++++++++++++++++++++
>  MAINTAINERS                                        |  7 +++
>  2 files changed, 68 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/hwmon/pmbus/adi,max20830.yaml b/Documentation/devicetree/bindings/hwmon/pmbus/adi,max20830.yaml
> new file mode 100644
> index 0000000000000000000000000000000000000000..8b3ec1ffa0c9460de2122f6606ce3dcbcdfbbcc7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/hwmon/pmbus/adi,max20830.yaml
> @@ -0,0 +1,61 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/hwmon/pmbus/adi,max20830.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: Analog Devices MAX20830 Step-Down Switching Regulator with PMBus
> +
> +maintainers:
> +  - Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
> +
> +description: |
> +  The MAX20830 is a fully integrated step-down DC-DC switching regulator with
> +  PMBus interface. It provides 2.7V to 16V input, 0.4V to 5.8V adjustable
> +  output, and up to 30A output current. It allows monitoring of input/output
> +  voltage, output current and temperature through the PMBus serial interface.
> +  Datasheet:
> +    https://www.analog.com/en/products/max20830.html
> +
> +allOf:
> +  - $ref: /schemas/regulator/regulator.yaml#
> +
> +properties:
> +  compatible:
> +    const: adi,max20830
> +
> +  reg:
> +    maxItems: 1

On the previous version, you got an LLM comment about not having the
interrupts property amongst other things.
I think the other things got implemented, but I didn't see any reply to
the bot about that?
I think the answer is that it shouldn't because the pin it referenced
doesn't exist, but when looking at the schematic I have to wonder if
there should be an interrupts property for dealing with "pgood"?

Cheers,
Conor.

> +
> +  vddh-supply:
> +    description:
> +      Phandle to the regulator that provides the VDDH power supply.
> +
> +  avdd-supply:
> +    description:
> +      Phandle to the regulator that provides the AVDD power supply.
> +
> +  ldoin-supply:
> +    description:
> +      Optional 2.5V to 5.5V LDO input supply.
> +
> +required:
> +  - compatible
> +  - reg
> +  - vddh-supply
> +
> +unevaluatedProperties: false
> +
> +examples:
> +  - |
> +    i2c {
> +        #address-cells = <1>;
> +        #size-cells = <0>;
> +
> +        regulator@30 {
> +            compatible = "adi,max20830";
> +            reg = <0x30>;
> +            vddh-supply = <&vddh>;
> +        };
> +    };
> +...
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 0a3991c10ade20dd79cc7d1bf2a1d307ba6bd19d..031c743e979521a92ed9ac67915c178ce31727bd 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15579,6 +15579,13 @@ F:	Documentation/devicetree/bindings/hwmon/pmbus/adi,max17616.yaml
>  F:	Documentation/hwmon/max17616.rst
>  F:	drivers/hwmon/pmbus/max17616.c
>  
> +MAX20830 HARDWARE MONITOR DRIVER
> +M:	Alexis Czezar Torreno <alexisczezar.torreno@analog.com>
> +L:	linux-hwmon@vger.kernel.org
> +S:	Supported
> +W:	https://ez.analog.com/linux-software-drivers
> +F:	Documentation/devicetree/bindings/hwmon/pmbus/adi,max20830.yaml
> +
>  MAX2175 SDR TUNER DRIVER
>  M:	Ramesh Shanmugasundaram <rashanmu@gmail.com>
>  L:	linux-media@vger.kernel.org
> 
> -- 
> 2.34.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* Re: [PATCH V10 00/10] famfs: port into fuse
From: Joanne Koong @ 2026-04-16 15:56 UTC (permalink / raw)
  To: John Groves
  Cc: Darrick J. Wong, Miklos Szeredi, Bernd Schubert, John Groves,
	Dan Williams, Bernd Schubert, Alison Schofield, John Groves,
	Jonathan Corbet, Shuah Khan, Vishal Verma, Dave Jiang,
	Matthew Wilcox, Jan Kara, Alexander Viro, David Hildenbrand,
	Christian Brauner, Randy Dunlap, Jeff Layton, Amir Goldstein,
	Jonathan Cameron, Stefan Hajnoczi, Josef Bacik, Bagas Sanjaya,
	Chen Linxuan, James Morse, Fuad Tabba, Sean Christopherson,
	Shivank Garg, Ackerley Tng, Gregory Price, Aravind Ramesh,
	Ajay Joshi, venkataravis@micron.com, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-cxl@vger.kernel.org, linux-fsdevel@vger.kernel.org, djbw
In-Reply-To: <ad7Tps4tkNbndd9Z@groves.net>

On Tue, Apr 14, 2026 at 5:10 PM John Groves <John@groves.net> wrote:
>
> On 26/04/14 03:13PM, Joanne Koong wrote:
> > On Tue, Apr 14, 2026 at 11:57 AM Darrick J. Wong <djwong@kernel.org> wrote:
> > >
> > > On Tue, Apr 14, 2026 at 08:41:42AM -0500, John Groves wrote:
> > > > On 26/04/14 03:19PM, Miklos Szeredi wrote:
> > > > > On Fri, 10 Apr 2026 at 21:44, Joanne Koong <joannelkoong@gmail.com> wrote:
> > > > >
> > > > > > Overall, my intention with bringing this up is just to make sure we're
> > > > > > at least aware of this alternative before anything is merged and
> > > > > > permanent. If Miklos and you think we should land this series, then
> > > > > > I'm on board with that.
> > > > >
> > > > > TBH, I'd prefer not to add the famfs specific mapping interface if not
> > > > > absolutely necessary.  This was the main sticking point originally,
> > > > > but there seemed to be no better alternative.
> > > > >
> > > > > However with the bpf approach this would be gone, which is great.
> > >
> > > Well... you can't get away with having *no* mapping interface at all.
> >
> > Yes but the mapping interface should be *generic*, not one that is so
> > specifically tailored to one server. fuse will have to support this
> > forever.
>
> Mapping interfaces being generic is a nice idea, but I'm no sure it's
> realistic in a generalized sense. But other mitigating comments below.
>
> >
> > > You still have to define a UABI that BPF programs can use to convey
> > > mapping data into fsdax/iomap.  BTF is a nice piece of work that smooths
> > > over minor fluctuations in struct layout between a running kernel and
> > > a precompiled BPF program, but fundamentally we still need a fuse-native
> > > representation.
> > >
> > > That last sentence was an indirect way of saying: No, we're not going
> > > to export struct iomap to userspace.  The fuse-iomap patchset provides
> > > all the UABI pieces we need for regular filesystems (ext4) and hardware
> > > adjacent filesystems (famfs) to exchange file mapping data with the
> > > kernel.  This has been out for review since last October, but the lack
> > > of engagement with that patchset (or its February resubmission) doesn't
> > > leave me with confidence that any of it is going anywhere.
> > >
> > > Note: The reason for bolting BPF atop fuse-iomap is so that famfs can
> > > upload bpf programs to generate interleaved mappings.  It's not so hard
> > > to convert famfs' iomapping paths to use fuse-iomap, but I haven't
> > > helped him do that because:
> > >
> > > a) I have no idea what Miklos' thoughts are about merging any of the
> > > famfs stuff.
> > >
> > > b) I also have no idea what his thoughts are about fuse-iomap.  The
> > > sparse replies are not encouraging.
> > >
> > > c) It didn't seem fair to John to make him take on a whole new patchset
> > > dependency given (a) and (b).
> > >
> > > d) Nobody ever replied to my reply to the LSFMM thread about "can we do
> > > some code review of fuse iomap without waiting three months for LSFMM?"
> > > I've literally done nothing with fuse-iomap for two of the three months
> > > requested.
> > >
> > > > > So let us please at least have a try at this. I'm not into bpf yet,
> > > > > but willing to learn.
> > >
> > > I sent out the patches to enable exactly this sort of experimentation
> > > two months ago, and have not received any responses:
> > >
> > > https://lore.kernel.org/linux-fsdevel/177188736765.3938194.6770791688236041940.stgit@frogsfrogsfrogs/
> > >
> > > I would like to say this as gently as possible: I don't know what the
> > > problem here is, Miklos -- are you uninterested in the work?  Do you
> > > have too many other things to do inside RH that you can't talk about?
> > > Is it too difficult to figure out how the iomap stuff fits into the rest
> > > of the fuse codebase?  Do you need help from the rest of us to get
> > > reviews done?  Is there something else with which I could help?
> > >
> > > Because ... over the past few years, many of my team's filesystem
> > > projects have endured monthslong review cycles and often fail to get
> > > merged.  This has led to burnout and frustration among my teammates such
> > > that many of them chose to move on to other things.  For the remaining
> > > people, it was very difficult to justify continuing headcount when
> > > progress on projects is so slow that individuals cannot achieve even one
> > > milestone per quarter on any project.
> > >
> > > There's now nobody left here but me.
> > >
> > > I'm not blaming you (Miklos) for any of this, but that is the current
> > > deplorable state of things.
> > >
> > > > > Thanks,
> > > > > Miklos
> > > >
> > > > Thanks for responding...
> > > >
> > > > My short response: Noooooooooo!!!!!!
> > > >
> > > > I very strongly object to making this a prerequisite to merging. This
> > > > is an untested idea that will certainly delay us by at least a couple
> > > > of merge windows when products are shipping now, and the existing approach
> > > > has been in circulation for a long time. It is TOO LATE!!!!!!
> > >
> > > /me notes that has "we're shipping so you have to merge it over peoples'
> > > concerns" rarely carries the day in LKML land, and has never ended well
> > > in the few cases that it happens.  As Ted is fond of saying, this is a
> > > team sport, not an individual effort.  Unfortunately, to abuse your
> > > sports metaphor, we all play for the ******* A's.
> > >
> > > That said, you're clearly pissed at the goalposts changing yet again,
> > > and that's really not fair that we collectively keep moving them.
> > >
> > > It's a rotten situation that I could have even helped you to solve both
> > > our problems via fuse-iomap, but I just couldn't motivate myself to
> > > entwine our two projects until the technical direction questions got
> > > answered.
> > >
> > > > Famfs is not a science project, it's enablement for actual products and
> > > > early versions are available now!!!
> > > >
> > > > That doesn't mean we couldn't convert later IF THERE ARE NO HIDDEN PROBLEMS.
> > >
> > > Heck, the fuse command field is a u32.  There are plenty of numberspace
> > > left, and the kernel can just *stop issuing them*.
> >
> > I don't think the problem is the command field. As I understand it, if
> > this lands and is converted over later, none of the famfs code in this
> > series can be removed from fuse. If fuse has native non-bpf support
> > for famfs, then it will always need to have that. That's the part that
> > worries me.
>
> I believe this basic premise is completely wrong. Here is why:
>
> There is a FUSE_DAX_FMAP capability that the kernel may advertise or not
> at init time; this capability "is" the famfs GET_FMAP AND GET_DAXDEV
> commands. In the future, if we find a way to use BPF (or some other
> mechanism) to avoid needing those fuse messages, the kernel could be updated
> to NEVER advertise the FUSE_DAX_FMAP capability. All of the famfs-specific
> code could be taken out of kernels that never advertise that capability.

I’m not sure the capability bit can be used like that (though I am
hoping it can!). As I understand it, once the kernel advertises a
capability, it must continue supporting it in future kernels else
userspace programs that rely on it will break.

John, with what you have in this series, is there any way we can make
it generalizable so it can be used by any dax-based server? Would you
be open to that?

My main gripe is with the layout encoding for FUSE_GET_FMAP and how it
bakes in famfs-specific layout concepts (simple vs interleaved
extents, file type, strip definitions, etc) into the uapi and forces
the kernel to interpret/process this super-specific encoding in a
hardcoded way. Is it not possible to just move this all to the server
side? Could the famfs server just preprocess the mappings and take
care of the simple vs interleaved extents stuff and do all the logic
that's in [1] in userspace and simply just give the kernel a list of
generic extent mapping information? The kernel could then just cache
this generic mapping per inode and do the lookup on fault with no
famfs-specific logic needed in the kernel. Or is there a reason the
famfs processing logic needs to happen in the kernel? That's the part
I'm unclear about.

I think this is pretty much what Darrick is doing in his iomap
patchset [2] except in his case, the mappings are fetched lazily on
access, whereas I guess for your case that would be too slow so it
needs to be prepopulated at open time. imo that seems fine since
prepopulating mappings would be a useful feature in general. So could
we rename FUSE_GET_FMAP to something like FUSE_IOMAP_GETMAP and have
it just return super generic mapping info? Darrick already adds a
"struct fuse_iomap_io" [3] to the uapi, could we just use that and add
a uint32_t dev_id; to that?

Looking more at Darrick's patchset, his caching [4] uses the b+ tree
and is more performant but imo also more complex, but I don't think
that would be necessary for this series. imo we could just use a basic
interval tree for now for famfs's needs and optimize / converge it
later.

As well, are you open to renaming the FUSE_DAX_FMAP capability and
FUSE_FAMFS_DAX config to a generic FUSE_IOMAP_DAX naming scheme
instead?

Additionally, it would be really nice to replace FUSE_GET_DAXDEV and
add the daxdev config stuff to the generic FUSE_IOMAP_CONFIG Darrick
has for iomap [5], but this one might be more contentious in getting
the config uapi solidified. I think in general though for famfs, it
makes more sense for all the dax devices to be set up upfront all at
once at connection init time instead of discovered lazily on every
file open, which as I understand it,  would make things less complex
server side for famfs too, so even if this doesn't go through
FUSE_IOMAP_CONFIG, maybe it makes sense to have this be on init where
the server sends all the daxdev device information all at once? That
seems less confusing (and better performance-wise) of a flow than
having it be lazily discovered on open, though if I'm misunderstanding
something here please let me know.

I'm not sure how Miklos feels about this, but I think this would get
rid of all the famfs-specific logic in fuse and would be adding
infrastrucutre that other dax-backed servers in the future would use.
On my end, this eases my concerns and would be just as good as what
the bpf approach is trying to accomplish.

There's some ties between your and Darrick's work as you guys are both
using iomap, and maybe/hopefully this aligns with Darrick's thoughts
combining the two, but in my opinion (and maybe Darrick disagrees with
this), to get famfs out the door, we wouldn't need all the code in
Darrick's patchset to land before famfs. In my view, we would only
need to borrow/steal the 'struct fuse_iomap_io' definition in uapi and
hopefully converge on a FUSE_IOMAP_CONFIG uapi format, for famfs to
land, if you agree this direction makes sense. I think we just need to
get the uapi stuff figured out and everything else could be optimized
in the future post-merge. Darrick, do you agree with this assessment?

John, is this a middle ground you think is reasonable? I know this
whole thing has kind of been a mess with the moving goalposts - if the
above would work for famfs but you're slammed with other work and
don't have the bandwidth to make these changes, I'm happy to help out
with the restructuring if that would be useful.

>
> Simple, really. Can't re-use the message opcodes, but as Darrick pointed out
> those are not a scarce resource.
>
> >
> > >
> > > > What are the risks of converting to BPF?
> >
> > I think maybe there is a misinterpretation of what the alternative
> > approach entails. From my point of view, the alternative approach is
> > not that different from what is already in this series. The only piece
> > of the famfs logic that would need to use bpf is the logic for
> > finding/computing the extent mappings (which is the famfs-specific
> > logic that would not be applicable to any other server). That famfs
> > bpf code is minimal and already written [1], as it is just the logic
> > that is in patch 6 [2] in this series copied over. No other part of
> > famfs touches bpf. The rest is renaming the functions in
> > fs/fuse/famfs.c to generic fuse_iomap_dax_XXX names (the logic is the
> > same logic in this series, eg invoking the lower-level calls to
> > dax_iomap_rw/fault/etc) and moving the daxdev setup/initialization to
> > connection initialization time where the server passes that daxdev
> > setup info/configs upfront. I don't think this would delay things by
> > several merge windows, as the code is already mostly written. If it
> > would be helpful, I can clean up what's in the prototype and send that
> > out.
> >
> > I think the part that is not clear yet and needs to be verified is
> > whether this approach runs into any technical limitations on famfs's
> > production workloads. For example, does the overhead of using bpf maps
> > lead to a noticeable performance drop on real workloads? In the
> > future, will there be too many extent mappings on high-scale systems
> > to make this feasible? etc. If there are technical reasons why the
> > famfs logic has to be in fuse, then imo we should figure that out and
> > ideally that's the discussion we should be having. I am not a cxl
> > expert so perhaps there is something missing in the approach that
> > makes it not sufficient on production systems. If we don't end up
> > going with the alternative approach, I still think this series should
> > try to make the famfs uapi additions to fuse as generic as possible
> > since that will be irreversible.
> >
> > If we expedited the alternative approach in terms of reviewing and
> > merging, would that suffice? Is the main pushback the timing of it, eg
> > that it would take too long to get reviewed, merged, and shipped?
> >
> > > >
> > > > - I don't know how to do it - so it'll be slow (kinda like my fuse learning
> > > >   curve cost about a year because this is not that similar to anything
> > > >   else that was already in fuse.
> > >
> > > ...and per above, BPF isn't some magic savior that avoids the expansion
> > > of the UABI.
> >
> > It doesn't avoid the expansion of the UABI but it makes the UABI
> > generic (eg plenty of future servers can/will use the generic iomap
> > layer).
>
> Um, advertised capabilities allow contraction of the UABI-handling code with
> only some small cruft. Code that is only reachable in the presence of dead
> capability can totally be removed.
>
> >
> > >
> > > > - Those of us who are involved don't fully understand either the security
> > > >   or performance implications of this. It
> > >
> > > Correct.  I sure think it's swell that people can inject IR programs
> > > that jit/link into the kernel.  Don't ask which secondary connotation of
> > > "swell" I'm talking about.
> >
> > bpf is used elsewhere in the kernel (eg networking, scheduling). If it
> > is the case that it is unsafe (which maybe it is, I don't know), then
> > wouldn't those other areas have the same issues?
>
> See my long comment to Darrick's prior email.
>
> I suspect that this would be the only place BPF has been tried for a vma
> fault handler. That is a special, performance critical path - especially
> for famfs. In discussion with the right people we can probably reason
> through whether this is a non-starter or not.

Yes, I think the bpf overhead is the main uncertainty about whether
this suffices or not for famfs. I understand (from the all caps in
your previous messages :)) that performance is critical. The bpf
overhead could indeed be too much for the special famfs performance
critical path. I'll try to get some time next week to benchmark this.

Thanks,
Joanne

[1] https://lore.kernel.org/linux-fsdevel/0100019d43e79794-0eadcf5e-b659-43f7-8fdc-dec9f4ccce14-000000@email.amazonses.com/
[2] https://lore.kernel.org/linux-fsdevel/177188733084.3935219.10400570136529869673.stgit@frogsfrogsfrogs/T/#t
[3] https://lore.kernel.org/linux-fsdevel/176169810371.1424854.3010195280915622081.stgit@frogsfrogsfrogs/
[4] https://lore.kernel.org/linux-fsdevel/177188735954.3937557.841478048197856035.stgit@frogsfrogsfrogs/
[5] https://lore.kernel.org/linux-fsdevel/176169813786.1427432.414564085463311156.stgit@frogsfrogsfrogs/

>
> >
> > >
> > > > - Famfs is enabling access to memory and mapping fault handling must be
> > > >   at "memory speed". We know that BPF walks some data structures when a
> > > >   program executes. That exposes us to additional serialized L3 cache
> > > >   misses each time we service a mapping fault (any TLB & page table miss).
> > > >   This should be studied side-by-side with the existing approach under
> > > >   multiple loads before being adopted for production.
> > >
> > > Yes, it should.  AFAICT if one switched to a per-inode bpf program, then
> > > you could do per-inode bpf programs.  Then you don't even need the bpf
> > > map, and the ->iomap_begin becomes an indirect call into JITted x86_64
> > > math code.
> > >
> > > (The downside is that dyn code can't be meaningfully signed, requires
> > > clang on the system, and you have to deal with inode eviction issues.)
> > >
> > > > - This has never been done in production, and we're throwing it in the way
> > > >   of a project that has been soaking for years and needs to support early
> > > >   shipments of products.
> > >
> > > Correct.  I haven't even implemented BPF-iomap for fuse4fs.  This BPF
> > > integration stuff is *highly* experimental code.
> >
> > I think what fuse4fs needs for bpf is significantly more complicated
> > and intensive than what famfs needs. For famfs, the extent mapping
> > logic is straightforward computation.
> >
> > >
> > > > If this is the only path, I'd like to revive famfs as a standalone file
> > > > system. I'm still maintaining that and it's still in use.
> > >
> > > Honestly, you should probably just ship that to your users.  As long as
> > > the ondisk format doesn't change much, switching the implementation at a
> > > later date is at least still possible.
> >
> > I recognize this is an unfair situation John as you've already spent
> > years working on this and did what the community asked with rewriting
> > it. What I'm hoping to convey is that the approach where the extent
> > computing/finding logic gets moved to bpf is not radically different
> > from the famfs logic already in this patchset. In my view, moving this
> > logic to bpf is more advantageous for both fuse *and* famfs
> > (decoupling famfs releases from kernel releases) - it would be great
> > to consider this on technical merits if expediting the timeline of the
> > alternative approach would suffice.
> >
> > Thanks,
> > Joanne
> >
> > [1] https://github.com/joannekoong/libfuse/blob/444fa27fa9fd2118a0dc332933197faf9bbf25aa/example/famfs.bpf.c
> > [2] https://lore.kernel.org/linux-fsdevel/0100019d43e79794-0eadcf5e-b659-43f7-8fdc-dec9f4ccce14-000000@email.amazonses.com/
> >
> > >
> > > --D
>
> Regards,
> John
>

^ permalink raw reply

* Re: [PATCH v4 0/3] mm/memory-failure: add panic option for unrecoverable pages
From: Jiaqi Yan @ 2026-04-16 16:26 UTC (permalink / raw)
  To: Breno Leitao
  Cc: Miaohe Lin, Naoya Horiguchi, Andrew Morton, Jonathan Corbet,
	Shuah Khan, David Hildenbrand, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	linux-mm, linux-kernel, linux-doc, kernel-team
In-Reply-To: <aeD6hpM3t0RZm5mW@gmail.com>

On Thu, Apr 16, 2026 at 8:32 AM Breno Leitao <leitao@debian.org> wrote:
>
> Hi Jiaqi,
>
> On Wed, Apr 15, 2026 at 01:56:35PM -0700, Jiaqi Yan wrote:
> > On Wed, Apr 15, 2026 at 5:55 AM Breno Leitao <leitao@debian.org> wrote:
> > >
> > > When the memory failure handler encounters an in-use kernel page that it
> > > cannot recover (slab, page tables, kernel stacks, vmalloc, etc.), it
> > > currently logs the error as "Ignored" and continues operation.
> > >
> > > This leaves corrupted data accessible to the kernel, which will inevitably
> > > cause either silent data corruption or a delayed crash when the poisoned memory
> > > is next accessed.
> > >
> > > This is a common problem on large fleets. We frequently observe multi-bit ECC
> > > errors hitting kernel slab pages, where memory_failure() fails to recover them
> > > and the system crashes later at an unrelated code path, making root cause
> > > analysis unnecessarily difficult.
> > >
> > > Here is one specific example from production on an arm64 server: a multi-bit
> > > ECC error hit a dentry cache slab page, memory_failure() failed to recover it
> > > (slab pages are not supported by the hwpoison recovery mechanism), and 67
> > > seconds later d_lookup() accessed the poisoned cache line causing
> > > a synchronous external abort:
> > >
> > >     [88690.479680] [Hardware Error]: error_type: 3, multi-bit ECC
> > >     [88690.498473] Memory failure: 0x40272d: unhandlable page.
> > >     [88690.498619] Memory failure: 0x40272d: recovery action for
> > >                    get hwpoison page: Ignored
> > >     ...
> > >     [88757.847126] Internal error: synchronous external abort:
> > >                    0000000096000410 [#1] SMP
> > >     [88758.061075] pc : d_lookup+0x5c/0x220
> > >
> > > This series adds a new sysctl vm.panic_on_unrecoverable_memory_failure
> > > (default 0) that, when enabled, panics immediately on unrecoverable
> > > memory failures. This provides a clean crash dump at the time of the
> >
> > I get the fail-fast part, but wonder will kernel really be able to
> > provide clean crash dump useful for diagnosis?
>
> Yes, the kernel does provide a useful crash dump. With the sysctl enabled,
> here's what I observe:
>
>         Kernel panic - not syncing: Memory failure: 0x1: unrecoverable page
>         CPU: 40 UID: 0 PID: 682 Comm: bash Tainted: G B  7.0.0-next-20260414-upstream-00004-gcbb3af7bfd3b #93
>         Tainted: [B]=BAD_PAGE
>
>         Call Trace:
>          <TASK>
>          vpanic+0x399/0x700
>          panic+0xb4/0xc0
>          action_result+0x278/0x340          ← your new panic call site
>          memory_failure+0x152b/0x1c80
>
>
> Without the patch (or with the sysctl disabled), you only get:
>
>         Memory failure: 0x1: unhandlable page.
>         Memory failure: 0x1: recovery action for reserved kernel page: Ignored
>
> Then the host continues running until it eventually accesses that poisoned
> memory, triggering a generic error similar to the d_lookup() case mentioned
> above.
>
> > In your example at 88757.847126, kernel was handling SEA and because
> > we are under kernel context, eventually has to die(). Apparently not
> > only your patch, but also memory-failure has no role to play there.
> > But at least SEA handling tried its best to show the kernel code that
> > consumed the memory error.
> >
> > So your code should apply to the memory failure handling at
> > 88690.498473, which is likely triggered from APEI GHES for poison
> > detection (I guess the example is from ARM64). Anything except SEA is
> > considered not synchronous (by APEI is_hest_sync_notify()). If kernel
> > panics there, I guess it will be in a random process context or a
> > kworker thread? How useful is it for diagnosis? Just the exact time an
> > error detected (which is already logged by kernel)?
>
> The kernel panics with a clear stack trace and explicit reason, making it
> straightforward to correlate and analyze the failure.

So we will always get the same stack trace below, right?

          panic+0xb4/0xc0
          action_result+0x278/0x340
          memory_failure+0x152b/0x1c80

IIUC, this stack trace itself doesn't provide any useful information
about the memory error, right? What exactly can we use from the stack
trace? It is just a side-effect that we failed immediately.

You can still correlate failure with "Memory failure: 0x1: unhandlable
page" and keep running until the actual fatal poison consumption takes
down the system. Drawback is that these will be cascading events that
can be "noisy". What I see is the choice between failing fast versus
failing safe.

>
> My objective is to have a clean, immediate crash rather than allowing the
> system to continue running and potentially crash later (if at all).
>
> Working at a hyperscaler, I regularly see thousands of these "unhandlable
> page" messages, followed by later kernel crashes when the corrupted memory
> is eventually accessed.
>
> > On X86, for UCNA or SRAO type machine check exceptions, I think with
> > your patch the panic would also happen in random process context or
> > kworker thread,
> >
> > Can you share some clean crash dumps from your testing that show they
> > are more useful than the crash at SEA? Thanks!
>
> Certainly, here is the complete crash dump from the example above. This
> happened on a real production hardware:
>
>         [88690.478913] [ T593001] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 784
>         [88690.479097] [ T593001] {1}[Hardware Error]: event severity: recoverable
>         [88690.479184] [ T593001] {1}[Hardware Error]:  imprecise tstamp: 2026-03-20 13:13:08
>         [88690.479282] [ T593001] {1}[Hardware Error]:  Error 0, type: recoverable
>         [88690.479359] [ T593001] {1}[Hardware Error]:   section_type: memory error
>         [88690.479424] [ T593001] {1}[Hardware Error]:   physical_address: 0x00000040272d5080
>         [88690.479503] [ T593001] {1}[Hardware Error]:   physical_address_mask: 0xfffffffffffff000
>         [88690.479606] [ T593001] {1}[Hardware Error]:   node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027
>         [88690.479680] [ T593001] {1}[Hardware Error]:   error_type: 3, multi-bit ECC
>         [88690.479754] [ T593001] {1}[Hardware Error]:   DIMM location: not present. DMI handle: 0x000e
>         [88690.479882] [ T593001] EDAC MC0: 1 UE multi-bit ECC on unknown memory (node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027 DIMM location: not present. DMI handle: 0x000e page:0x40272d offset:0x5080 grain:4096 - APEI location: node:0 card:0 module:1 rank:1 bank:13 device:6 row:64114 column:832 requestor_id:0x0000000000000027 DIMM location: not present. DMI handle: 0x000e)
>         [88690.498473] [ T593001] Memory failure: 0x40272d: unhandlable page.
>         [88690.498619] [ T593001] Memory failure: 0x40272d: recovery action for get hwpoison page: Ignored
>         [88757.847126] [ T640437] Internal error: synchronous external abort: 0000000096000410 [#1]  SMP
>         [88757.867131] [ T640437] Modules linked in: ghes_edac(E) act_gact(E) sch_fq(E) tcp_diag(E) inet_diag(E) cls_bpf(E) mlx5_ib(E) sm3_ce(E) sha3_ce(E) sha512_ce(E) ipmi_ssif(E) ipmi_devintf(E) nvidia_cspmu(E) ib_uverbs(E) cppc_cpufreq(E) coresight_etm4x(E) coresight_stm(E) ipmi_msghandler(E) coresight_trbe(E) arm_cspmu_module(E) arm_smmuv3_pmu(E) arm_spe_pmu(E) stm_core(E) coresight_tmc(E) coresight_funnel(E) coresight(E) bpf_preload(E) sch_fq_codel(E) ip_tables(E) ip6_tables(E) vhost_net(E) tun(E) vhost(E) vhost_iotlb(E) tap(E) tls(E) mpls_gso(E) mpls_iptunnel(E) mpls_router(E) fou(E) acpi_power_meter(E) loop(E) drm(E) backlight(E) drm_panel_orientation_quirks(E) autofs4(E) raid0(E) efivarfs(E) dm_crypt(E)
>         [88757.991191] [ T640437] CPU: 70 UID: 34133 PID: 640437 Comm: Collection-20 Kdump: loaded Tainted: G   M        E       6.16.1-0_fbk2_0_gf40efc324cc8 #1 NONE
>         [88758.017569] [ T640437] Tainted: [M]=MACHINE_CHECK, [E]=UNSIGNED_MODULE
>         [88758.028860] [ T640437] Hardware name: ....
>         [88758.046969] [ T640437] pstate: 23401009 (nzCv daif +PAN -UAO +TCO +DIT +SSBS BTYPE=--)
>         [88758.061075] [ T640437] pc : d_lookup+0x5c/0x220
>         [88758.068392] [ T640437] lr : try_lookup_noperm+0x30/0x50
>         [88758.077088] [ T640437] sp : ffff800138cafc30
>         [88758.083827] [ T640437] x29: ffff800138cafc40 x28: ffff0001dcfe8bc0 x27: 00000000bc0a11f7
>         [88758.098321] [ T640437] x26: 00000000000ee00c x25: ffffffffffffffff x24: 0000000000000001
>         [88758.112807] [ T640437] x23: ffff003fa14d0000 x22: ffff8000828d3740 x21: ffff800138cafde8
>         [88758.127281] [ T640437] x20: ffff0000d0316fc0 x19: ffff800138cafce0 x18: 0001000000000000
>         [88758.141753] [ T640437] x17: 0000000000000001 x16: 0000000001ffffff x15: dfc038a300003936
>         [88758.156226] [ T640437] x14: 00000000fffffffa x13: ffffffffffffffff x12: ffff0000d0316fc0
>         [88758.170695] [ T640437] x11: 61c8864680b583eb x10: 0000000000000039 x9 : ffff800080fcfd68
>         [88758.185170] [ T640437] x8 : ffff003fa72d5088 x7 : 0000000000000000 x6 : ffff800138cafd58
>         [88758.199645] [ T640437] x5 : ffff0001dcfe8bc0 x4 : ffff80008104a330 x3 : 0000000000000002
>         [88758.214111] [ T640437] x2 : ffff800138cafd4d x1 : ffff800138cafce0 x0 : ffff0000d0316fc0
>         [88758.228579] [ T640437] Call trace:
>         [88758.233565] [ T640437]  d_lookup+0x5c/0x220 (P)
>         [88758.240864] [ T640437]  try_lookup_noperm+0x30/0x50
>         [88758.248868] [ T640437]  proc_fill_cache+0x54/0x140
>         [88758.256696] [ T640437]  proc_readfd_common+0x138/0x1e8
>         [88758.265222] [ T640437]  proc_fd_iterate.llvm.7260857650841435759+0x1c/0x30
>         [88758.277248] [ T640437]  iterate_dir+0x84/0x228
>         [88758.284354] [ T640437]  __arm64_sys_getdents64+0x5c/0x110
>         [88758.293383] [ T640437]  invoke_syscall+0x4c/0xd0
>         [88758.300843] [ T640437]  do_el0_svc+0x80/0xb8
>         [88758.307599] [ T640437]  el0_svc+0x30/0xf0
>         [88758.313820] [ T640437]  el0t_64_sync_handler+0x70/0x100
>         [88758.322497] [ T640437]  el0t_64_sync+0x17c/0x180
>         ...
>
> And my clear crash would look like the following:
>
>         [ 1096.480523] Memory failure: 0x2: recovery action for reserved kernel page: Ignored
>         [ 1096.480751] Kernel panic - not syncing: Memory failure: 0x2: unrecoverable page
>         [ 1096.480760] CPU: 5 UID: 0 PID: 683 Comm: bash Tainted: G    B               7.0.0-next-20260414-upstream-00004-gcbb3af7bfd3b #93 PREEMPTLAZY
>         [ 1096.480768] Tainted: [B]=BAD_PAGE
>         [ 1096.480774] Call Trace:
>         [ 1096.480778]  <TASK>
>         [ 1096.480782]  vpanic+0x399/0x700
>         [ 1096.480821]  panic+0xb4/0xc0
>         [ 1096.480849]  action_result+0x278/0x340
>         [ 1096.480857]  memory_failure+0x152b/0x1c80
>         [ 1096.480925]  hwpoison_inject+0x3a6/0x3f0 [hwpoison_inject]
>         ....
>
>
> Isn't the clean approach way better than the random one?

I don't fully agree. In the past upstream has enhanced many kernel mm
services (e.g. khugepaged, page migration, dump_user_range()) to
recover from memory error in order to improve system availability,
given these service or tools can fail safe. Seeing many crashes
pointing to a certain in-kernel service at consumption time helped us
decide what services we should enhance, and which service we should
prioritize. Of course not all kernel code can be recovered from memory
error, but that doesn't mean knowing what kernel code often caused
crash isn't useful.

>
> For testing, I use this simple procedure, in case you want to play with
> it:
>         # modprobe hwpoison-inject
>         # sysctl -w vm.panic_on_unrecoverable_memory_failure=0
>         # echo 1 > /sys/kernel/debug/hwpoison/corrupt-pfn
>
>
> Thanks for the review and good discussion,

Anyway, I only have a second opinion on the usefulness of a static
stack trace. This fail-fast option is good to have. Thanks!

> --breno
>

^ permalink raw reply

* [PATCH v4 00/10] Reintroduce Hornet LSM
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu

This patch series introduces the next iteration of the Hornet LSM.
Hornet’s goal is to provide a secure and extensible in-kernel
signature verification mechanism for eBPF programs.

Hornet addresses concerns from users who require strict audit trails and
verification guarantees for eBPF programs, especially in
security-sensitive environments. Many production systems need assurance
that only authorized, unmodified eBPF programs are loaded into the
kernel. Hornet provides this assurance through cryptographic signature
verification.

The currently accepted loader-plus-map signature verification scheme,
mandated by Alexei and KP, is simple to implement and generally
acceptable if users and administrators are satisfied with it. However,
verifying both the loader and the maps offers additional benefits
beyond verifying the loader alone:

1. Security and Audit Integrity

A key advantage is that the LSM hook for authorizing BPF program loads
can operate after signature verification. This ensures:

* Access control decisions are based on verified signature status.
* Accurate system state measurement and logging.
* Log entries claiming a verified signature are truthful, avoiding
  misleading records where only the loader was verified while the actual
  BPF program verification occurs later without logging.

2. TOCTOU Attack Prevention

The current map hash implementation may be vulnerable to a TOCTOU
attack because it allows unfrozen maps to cache a previously
calculated hash. The accepted “trusted loader” scheme cannot detect
this and may permit loading altered maps.

3. Supply Chain Integrity

Verify that eBPF programs and their associated map data have not been
modified since they were built and signed, in the kernel proper, may
aid in protecting against supply chain attacks.

This approach addresses concerns from users who require strict audit
trails and verification guarantees, especially in security-sensitive
environments. Map hashes for extended verification are passed via the
existing PKCS#7 UAPI and verified by the crypto subsystem. Hornet then
calculates the program’s verification state.  Hornet itself does not
enforce a policy on whether unsigned or partially signed programs
should be rejected. It delegates that decision to downstream LSMs
hook, making it a composable building block in a larger security
architecture.

Changes in V4:
- IPE integration
- Arbitrary keyring support

Link to V3: https://lore.kernel.org/linux-security-module/20260326060655.2550595-1-bboscaccy@linux.microsoft.com/

Changes in V3:
- Updated for signed attribute patch series changes
- Added some new result enum values
- Minor documentation clarification
- Misc style fixes
- Added missing signed-off-by tags

Link to V2: https://lore.kernel.org/linux-security-module/20260227233930.2418522-1-bboscaccy@linux.microsoft.com/

Changes in V2:
- Addressed possible TocTou races in hash verification
- Improved documentation and tooling
- Added Alexie's nack

Link to RFC: https://lore.kernel.org/linux-security-module/20251211021257.1208712-1-bboscaccy@linux.microsoft.com/


Blaise Boscaccy (6):
  lsm: security: Add additional enum values for bpf integrity checks
  security: Hornet LSM
  hornet: Introduce gen_sig
  hornet: Add a light skeleton data extractor scripts
  selftests/hornet: Add a selftest for the Hornet LSM
  ipe: Add BPF program load policy enforcement via Hornet integration

James Bottomley (3):
  crypto: pkcs7: add flag for validated trust on a signed info block
  crypto: pkcs7: add ability to extract signed attributes by OID
  crypto: pkcs7: add tests for pkcs7_get_authattr

Paul Moore (1):
  lsm: framework for BPF integrity verification

 Documentation/admin-guide/LSM/Hornet.rst     | 321 +++++++++++++++
 Documentation/admin-guide/LSM/index.rst      |   1 +
 MAINTAINERS                                  |   9 +
 certs/system_keyring.c                       |   1 +
 crypto/asymmetric_keys/Makefile              |   4 +-
 crypto/asymmetric_keys/pkcs7_aa.asn1         |  18 +
 crypto/asymmetric_keys/pkcs7_key_type.c      |  44 ++-
 crypto/asymmetric_keys/pkcs7_parser.c        |  81 ++++
 crypto/asymmetric_keys/pkcs7_parser.h        |   1 +
 crypto/asymmetric_keys/pkcs7_trust.c         |   1 +
 include/crypto/pkcs7.h                       |   4 +
 include/linux/lsm_hook_defs.h                |   5 +
 include/linux/oid_registry.h                 |   3 +
 include/linux/security.h                     |  28 ++
 include/uapi/linux/lsm.h                     |   1 +
 scripts/Makefile                             |   1 +
 scripts/hornet/Makefile                      |   5 +
 scripts/hornet/extract-insn.sh               |  27 ++
 scripts/hornet/extract-map.sh                |  27 ++
 scripts/hornet/extract-skel.sh               |  27 ++
 scripts/hornet/gen_sig.c                     | 392 +++++++++++++++++++
 scripts/hornet/write-sig.sh                  |  27 ++
 security/Kconfig                             |   3 +-
 security/Makefile                            |   1 +
 security/hornet/Kconfig                      |  11 +
 security/hornet/Makefile                     |   7 +
 security/hornet/hornet.asn1                  |  13 +
 security/hornet/hornet_lsm.c                 | 346 ++++++++++++++++
 security/ipe/Kconfig                         |  14 +
 security/ipe/audit.c                         |  15 +
 security/ipe/eval.c                          |  73 +++-
 security/ipe/eval.h                          |   5 +
 security/ipe/hooks.c                         |  37 ++
 security/ipe/hooks.h                         |  11 +
 security/ipe/ipe.c                           |   3 +
 security/ipe/policy.h                        |  14 +
 security/ipe/policy_parser.c                 |  27 ++
 security/security.c                          |  75 +++-
 tools/testing/selftests/Makefile             |   1 +
 tools/testing/selftests/hornet/Makefile      |  63 +++
 tools/testing/selftests/hornet/loader.c      |  21 +
 tools/testing/selftests/hornet/trivial.bpf.c |  33 ++
 42 files changed, 1794 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/admin-guide/LSM/Hornet.rst
 create mode 100644 crypto/asymmetric_keys/pkcs7_aa.asn1
 create mode 100644 scripts/hornet/Makefile
 create mode 100755 scripts/hornet/extract-insn.sh
 create mode 100755 scripts/hornet/extract-map.sh
 create mode 100755 scripts/hornet/extract-skel.sh
 create mode 100644 scripts/hornet/gen_sig.c
 create mode 100755 scripts/hornet/write-sig.sh
 create mode 100644 security/hornet/Kconfig
 create mode 100644 security/hornet/Makefile
 create mode 100644 security/hornet/hornet.asn1
 create mode 100644 security/hornet/hornet_lsm.c
 create mode 100644 tools/testing/selftests/hornet/Makefile
 create mode 100644 tools/testing/selftests/hornet/loader.c
 create mode 100644 tools/testing/selftests/hornet/trivial.bpf.c

-- 
2.53.0


^ permalink raw reply

* [PATCH v4 01/10] crypto: pkcs7: add flag for validated trust on a signed info block
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu
In-Reply-To: <20260416173500.176716-1-bboscaccy@linux.microsoft.com>

From: James Bottomley <James.Bottomley@HansenPartnership.com>

Allow consumers of struct pkcs7_message to tell if any of the sinfo
fields has passed a trust validation.  Note that this does not happen
in parsing, pkcs7_validate_trust() must be explicitly called or called
via validate_pkcs7_trust().  Since the way to get this trusted pkcs7
object is via verify_pkcs7_message_sig, export that so modules can use
it.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
 certs/system_keyring.c                | 1 +
 crypto/asymmetric_keys/pkcs7_parser.h | 1 +
 crypto/asymmetric_keys/pkcs7_trust.c  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index e0761436ec7f4..9bda49295bd02 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -380,6 +380,7 @@ int verify_pkcs7_message_sig(const void *data, size_t len,
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
 }
+EXPORT_SYMBOL(verify_pkcs7_message_sig);
 
 /**
  * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h
index 6ef9f335bb17f..203062a33def6 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.h
+++ b/crypto/asymmetric_keys/pkcs7_parser.h
@@ -20,6 +20,7 @@ struct pkcs7_signed_info {
 	unsigned	index;
 	bool		unsupported_crypto;	/* T if not usable due to missing crypto */
 	bool		blacklisted;
+	bool		verified; /* T if this signer has validated trust */
 
 	/* Message digest - the digest of the Content Data (or NULL) */
 	const void	*msgdigest;
diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 9a87c34ed1733..78ebfb6373b61 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c
@@ -127,6 +127,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
 		for (p = sinfo->signer; p != x509; p = p->signer)
 			p->verified = true;
 	}
+	sinfo->verified = true;
 	kleave(" = 0");
 	return 0;
 }
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 02/10] crypto: pkcs7: add ability to extract signed attributes by OID
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu
In-Reply-To: <20260416173500.176716-1-bboscaccy@linux.microsoft.com>

From: James Bottomley <James.Bottomley@HansenPartnership.com>

Signers may add any information they like in signed attributes and
sometimes this information turns out to be relevant to specific
signing cases, so add an api pkcs7_get_authattr() to extract the value
of an authenticated attribute by specific OID.  The current
implementation is designed for the single signer use case and simply
terminates the search when it finds the relevant OID.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
 crypto/asymmetric_keys/Makefile       |  4 +-
 crypto/asymmetric_keys/pkcs7_aa.asn1  | 18 ++++++
 crypto/asymmetric_keys/pkcs7_parser.c | 81 +++++++++++++++++++++++++++
 include/crypto/pkcs7.h                |  4 ++
 4 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 crypto/asymmetric_keys/pkcs7_aa.asn1

diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index bc65d3b98dcbf..f99b7169ae7cd 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -53,12 +53,14 @@ clean-files	+= pkcs8.asn1.c pkcs8.asn1.h
 obj-$(CONFIG_PKCS7_MESSAGE_PARSER) += pkcs7_message.o
 pkcs7_message-y := \
 	pkcs7.asn1.o \
+	pkcs7_aa.asn1.o \
 	pkcs7_parser.o \
 	pkcs7_trust.o \
 	pkcs7_verify.o
 
-$(obj)/pkcs7_parser.o: $(obj)/pkcs7.asn1.h
+$(obj)/pkcs7_parser.o: $(obj)/pkcs7.asn1.h $(obj)/pkcs7_aa.asn1.h
 $(obj)/pkcs7.asn1.o: $(obj)/pkcs7.asn1.c $(obj)/pkcs7.asn1.h
+$(obj)/pkcs7_aa.asn1.o: $(obj)/pkcs7_aa.asn1.c $(obj)/pkcs7_aa.asn1.h
 
 #
 # PKCS#7 parser testing key
diff --git a/crypto/asymmetric_keys/pkcs7_aa.asn1 b/crypto/asymmetric_keys/pkcs7_aa.asn1
new file mode 100644
index 0000000000000..7a8857bdf56e1
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs7_aa.asn1
@@ -0,0 +1,18 @@
+-- SPDX-License-Identifier: BSD-3-Clause
+--
+-- Copyright (C) 2009 IETF Trust and the persons identified as authors
+-- of the code
+--
+-- https://www.rfc-editor.org/rfc/rfc5652#section-3
+
+AA ::= 	CHOICE {
+	aaSet		[0] IMPLICIT AASet,
+	aaSequence	[2] EXPLICIT SEQUENCE OF AuthenticatedAttribute
+}
+
+AASet ::= SET OF AuthenticatedAttribute
+
+AuthenticatedAttribute ::= SEQUENCE {
+	type	OBJECT IDENTIFIER ({ pkcs7_aa_note_OID }),
+	values	SET OF ANY ({ pkcs7_aa_note_attr })
+}
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 6e3ffdac83ace..d467866f7d930 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -15,6 +15,7 @@
 #include <crypto/public_key.h>
 #include "pkcs7_parser.h"
 #include "pkcs7.asn1.h"
+#include "pkcs7_aa.asn1.h"
 
 MODULE_DESCRIPTION("PKCS#7 parser");
 MODULE_AUTHOR("Red Hat, Inc.");
@@ -211,6 +212,86 @@ int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
 }
 EXPORT_SYMBOL_GPL(pkcs7_get_content_data);
 
+struct pkcs7_aa_context {
+	bool found;
+	enum OID oid_to_find;
+	const void *data;
+	size_t len;
+};
+
+int pkcs7_aa_note_OID(void *context, size_t hdrlen,
+		      unsigned char tag,
+		      const void *value, size_t vlen)
+{
+	struct pkcs7_aa_context *ctx = context;
+	enum OID oid = look_up_OID(value, vlen);
+
+	ctx->found = (oid == ctx->oid_to_find);
+
+	return 0;
+}
+
+int pkcs7_aa_note_attr(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	struct pkcs7_aa_context *ctx = context;
+
+	if (ctx->found) {
+		ctx->data = value;
+		ctx->len = vlen;
+	}
+
+	return 0;
+}
+
+/**
+ * pkcs7_get_authattr - get authenticated attribute by OID
+ * @pkcs7: The preparsed PKCS#7 message
+ * @oid: the enum value of the OID to find
+ * @_data: Place to return a pointer to the attribute value
+ * @_len: length of the attribute value
+ *
+ * Searches the authenticated attributes until one is found with a
+ * matching OID.  Note that because the attributes are per signer
+ * there could be multiple signers with different values, but this
+ * routine will simply return the first one in parse order.
+ *
+ * Returns -ENODATA if the attribute can't be found
+ */
+int pkcs7_get_authattr(const struct pkcs7_message *pkcs7,
+		       enum OID oid,
+		       const void **_data, size_t *_len)
+{
+	struct pkcs7_signed_info *sinfo = pkcs7->signed_infos;
+	struct pkcs7_aa_context ctx;
+
+	ctx.data = NULL;
+	ctx.oid_to_find = oid;
+
+	for (; sinfo; sinfo = sinfo->next) {
+		int ret;
+
+		/* only extract OIDs from validated signers */
+		if (!sinfo->verified)
+			continue;
+
+		ret = asn1_ber_decoder(&pkcs7_aa_decoder, &ctx,
+				       sinfo->authattrs, sinfo->authattrs_len);
+		if (ret < 0 || ctx.data != NULL)
+			break;
+	}
+
+	if (!ctx.data)
+		return -ENODATA;
+
+	*_data = ctx.data;
+	*_len = ctx.len;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pkcs7_get_authattr);
+
 /*
  * Note an OID when we find one for later processing when we know how
  * to interpret it.
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
index 38ec7f5f90411..bd83202cd805c 100644
--- a/include/crypto/pkcs7.h
+++ b/include/crypto/pkcs7.h
@@ -25,6 +25,10 @@ extern void pkcs7_free_message(struct pkcs7_message *pkcs7);
 extern int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
 				  const void **_data, size_t *_datalen,
 				  size_t *_headerlen);
+extern int pkcs7_get_authattr(const struct pkcs7_message *pkcs7,
+			      enum OID oid,
+			      const void **_data, size_t *_len);
+
 
 /*
  * pkcs7_trust.c
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 03/10] crypto: pkcs7: add tests for pkcs7_get_authattr
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu
In-Reply-To: <20260416173500.176716-1-bboscaccy@linux.microsoft.com>

From: James Bottomley <James.Bottomley@HansenPartnership.com>

Add example code to the test module pkcs7_key_type.c that verifies a
message and then pulls out a known authenticated attribute.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
Acked-by: David Howells <dhowells@redhat.com>
---
 crypto/asymmetric_keys/pkcs7_key_type.c | 44 ++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index b930d3bbf1af5..e0b1ce0202f6d 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -12,6 +12,7 @@
 #include <linux/verification.h>
 #include <linux/key-type.h>
 #include <keys/user-type.h>
+#include <crypto/pkcs7.h>
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("PKCS#7 testing key type");
@@ -51,16 +52,57 @@ static int pkcs7_view_content(void *ctx, const void *data, size_t len,
 static int pkcs7_preparse(struct key_preparsed_payload *prep)
 {
 	enum key_being_used_for usage = pkcs7_usage;
+	int ret;
+	struct pkcs7_message *pkcs7;
+	const void *data;
+	size_t len;
 
 	if (usage >= NR__KEY_BEING_USED_FOR) {
 		pr_err("Invalid usage type %d\n", usage);
 		return -EINVAL;
 	}
 
-	return verify_pkcs7_signature(NULL, 0,
+	ret = verify_pkcs7_signature(NULL, 0,
 				      prep->data, prep->datalen,
 				      VERIFY_USE_SECONDARY_KEYRING, usage,
 				      pkcs7_view_content, prep);
+	if (ret)
+		return ret;
+
+	pkcs7 = pkcs7_parse_message(prep->data, prep->datalen);
+	if (IS_ERR(pkcs7)) {
+		pr_err("pkcs7 parse error\n");
+		return PTR_ERR(pkcs7);
+	}
+
+	/*
+	 * the parsed message has no trusted signer, so nothing should
+	 * be returned here
+	 */
+	ret = pkcs7_get_authattr(pkcs7, OID_messageDigest, &data, &len);
+	if (ret == 0) {
+		pr_err("OID returned when no trust in signer\n");
+		goto out;
+	}
+	/* add trust and check again */
+	ret = verify_pkcs7_message_sig(NULL, 0, pkcs7,
+				       VERIFY_USE_SECONDARY_KEYRING, usage,
+				       NULL, NULL);
+	if (ret) {
+		pr_err("verify_pkcs7_message_sig failed!!\n");
+		goto out;
+	}
+	/* now we should find the OID */
+	ret = pkcs7_get_authattr(pkcs7, OID_messageDigest, &data, &len);
+	if (ret) {
+		pr_err("Failed to get message digest\n");
+		goto out;
+	}
+	pr_info("Correctly Got message hash, size=%zu\n", len);
+
+ out:
+	pkcs7_free_message(pkcs7);
+	return 0;
 }
 
 /*
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 04/10] lsm: framework for BPF integrity verification
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu
In-Reply-To: <20260416173500.176716-1-bboscaccy@linux.microsoft.com>

From: Paul Moore <paul@paul-moore.com>

Add a new LSM hook and two new LSM hook callbacks to support LSMs that
perform integrity verification, e.g. digital signature verification,
of BPF programs.

While the BPF subsystem does implement a signature verification scheme,
it does not satisfy a number of existing requirements, adding support
for BPF program integrity verification to the LSM framework allows
administrators to select additional integrity verification mechanisms
to meet these needs while also providing a mechanism for future
expansion.  Additional on why this is necessary can be found at the
lore archive link below:

https://lore.kernel.org/linux-security-module/CAHC9VhTQ_DR=ANzoDBjcCtrimV7XcCZVUsANPt=TjcvM4d-vjg@mail.gmail.com/

The LSM-based BPF integrity verification mechanism works within the
existing security_bpf_prog_load() hook called by the BPF subsystem.
It adds an additional dedicated integrity callback and a new LSM
hook/callback to be called from within LSMs implementing integrity
verification.

The first new callback, bpf_prog_load_integrity(), located within the
security_bpf_prog_load() hook, is necessary to ensure that the integrity
verification callbacks are executed before any of the existing LSMs
are executed via the bpf_prog_load() callback.  Reusing the existing
bpf_prog_load() callback for integrity verification could result in LSMs
not having access to the integrity verification results when asked to
authorize the BPF program load in the bpf_prog_load() callback.

The new LSM hook, security_bpf_prog_load_post_integrity(), is intended
to be called from within LSMs performing BPF program integrity
verification.  It is used to report the verdict of the integrity
verification to other LSMs enforcing access control policy on BPF
program loads.  LSMs enforcing such access controls should register a
bpf_prog_load_post_integrity() callback to receive integrity verdicts.

More information on these new callbacks and hook can be found in the
code comments in this patch.

Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
 include/linux/lsm_hook_defs.h |  5 +++
 include/linux/security.h      | 25 ++++++++++++
 security/security.c           | 75 +++++++++++++++++++++++++++++++++--
 3 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 8c42b4bde09c0..4971d3c36d5b4 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -434,6 +434,11 @@ LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
 LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
 	 struct bpf_token *token, bool kernel)
 LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
+LSM_HOOK(int, 0, bpf_prog_load_post_integrity, struct bpf_prog *prog,
+	 union bpf_attr *attr, struct bpf_token *token, bool kernel,
+	 const struct lsm_id *lsmid, enum lsm_integrity_verdict verdict)
+LSM_HOOK(int, 0, bpf_prog_load_integrity, struct bpf_prog *prog,
+	 union bpf_attr *attr, struct bpf_token *token, bool kernel)
 LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
 	 struct bpf_token *token, bool kernel)
 LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
diff --git a/include/linux/security.h b/include/linux/security.h
index ee88dd2d2d1f7..b3fd04baa78d0 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -67,6 +67,7 @@ enum fs_value_type;
 struct watch;
 struct watch_notification;
 struct lsm_ctx;
+struct lsm_id;
 
 /* Default (no) options for the capable function */
 #define CAP_OPT_NONE 0x0
@@ -100,6 +101,14 @@ enum lsm_integrity_type {
 	LSM_INT_FSVERITY_BUILTINSIG_VALID,
 };
 
+enum lsm_integrity_verdict {
+	LSM_INT_VERDICT_NONE = 0,
+	LSM_INT_VERDICT_OK,
+	LSM_INT_VERDICT_UNSIGNED,
+	LSM_INT_VERDICT_PARTIALSIG,
+	LSM_INT_VERDICT_BADSIG,
+};
+
 /*
  * These are reasons that can be passed to the security_locked_down()
  * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
@@ -2270,6 +2279,12 @@ extern int security_bpf_prog(struct bpf_prog *prog);
 extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
 				   struct bpf_token *token, bool kernel);
 extern void security_bpf_map_free(struct bpf_map *map);
+extern int security_bpf_prog_load_post_integrity(struct bpf_prog *prog,
+					union bpf_attr *attr,
+					struct bpf_token *token,
+					bool kernel,
+					const struct lsm_id *lsmid,
+					enum lsm_integrity_verdict verdict);
 extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
 				  struct bpf_token *token, bool kernel);
 extern void security_bpf_prog_free(struct bpf_prog *prog);
@@ -2304,6 +2319,16 @@ static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *a
 static inline void security_bpf_map_free(struct bpf_map *map)
 { }
 
+static inline int security_bpf_prog_load_post_integrity(struct bpf_prog *prog,
+					  union bpf_attr *attr,
+					  struct bpf_token *token,
+					  bool kernel,
+					  const struct lsm_id *lsmid,
+					  enum lsm_integrity_verdict verdict)
+{
+	return 0;
+}
+
 static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
 					 struct bpf_token *token, bool kernel)
 {
diff --git a/security/security.c b/security/security.c
index a26c1474e2e49..bb78f7e45a98f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -5233,6 +5233,50 @@ int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
 	return rc;
 }
 
+/**
+ * security_bpf_prog_load_post_integrity() - Check if the BPF prog is allowed
+ * @prog: BPF program object
+ * @attr: BPF syscall attributes used to create BPF program
+ * @token: BPF token used to grant user access to BPF subsystem
+ * @kernel: whether or not call originated from kernel
+ * @lsmid: LSM ID of the LSM providing @verdict
+ * @verdict: result of the integrity verification
+ *
+ * See the comment block for the security_bpf_prog_load() LSM hook.
+ *
+ * This LSM hook is intended to be called from within the
+ * bpf_prog_load_integrity() callback that is part of the
+ * security_bpf_prog_load() hook; kernel subsystems outside the scope of the
+ * LSM framework should not call this hook directly.
+ *
+ * If the LSM calling into this hook receives a non-zero error code, it should
+ * return the same error code back to its caller.  If this hook returns a zero,
+ * it does not necessarily mean that all of the enabled LSMs have authorized
+ * the BPF program load, as there may be other LSMs implementing BPF integrity
+ * checks which have yet to execute.  However, if a zero is returned, the LSM
+ * calling into this hook should continue and return zero back to its caller.
+ *
+ * LSMs which implement the bpf_prog_load_post_integrity() callback and
+ * determine that a particular BPF program load is not authorized may choose to
+ * either return an error code for immediate rejection, or store their decision
+ * in their own LSM state attached to @prog, later returning an error code in
+ * the bpf_prog_load() callback.  An immediate error code return is in keeping
+ * with the "fail fast" practice, but waiting until the bpf_prog_load()
+ * callback allows the LSM to consider multiple different integrity verdicts.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_prog_load_post_integrity(struct bpf_prog *prog,
+					  union bpf_attr *attr,
+					  struct bpf_token *token,
+					  bool kernel,
+					  const struct lsm_id *lsmid,
+					  enum lsm_integrity_verdict verdict)
+{
+	return call_int_hook(bpf_prog_load_post_integrity, prog, attr, token,
+			     kernel, lsmid, verdict);
+}
+
 /**
  * security_bpf_prog_load() - Check if loading of BPF program is allowed
  * @prog: BPF program object
@@ -5241,8 +5285,24 @@ int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
  * @kernel: whether or not call originated from kernel
  *
  * Perform an access control check when the kernel loads a BPF program and
- * allocates associated BPF program object. This hook is also responsible for
- * allocating any required LSM state for the BPF program.
+ * allocates the associated BPF program object. This hook is also responsible
+ * for allocating any required LSM state for the BPF program.
+ *
+ * This hook calls two LSM callbacks: bpf_prog_load_integrity() and
+ * bpf_prog_load().  The bpf_prog_load_integrity() callback is for those LSMs
+ * that wish to implement integrity verifications of BPF programs, e.g.
+ * signature verification, while the bpf_prog_load() callback is for general
+ * authorization of the BPF program load.  Performing both verification and
+ * authorization in a single callback, with arbitrary LSM ordering, would be
+ * a challenge.
+ *
+ * LSMs which implement the bpf_prog_load_integrity() callback should call into
+ * the security_bpf_prog_load_post_integrity() hook with their integrity
+ * verdict.  LSMs which implement BPF program integrity policy can register a
+ * callback for the security_bpf_prog_load_post_integrity() hook and
+ * either update their own internal state based on the verdict, or immediately
+ * reject the BPF program load with an error code.  See the comment block for
+ * security_bpf_prog_load_post_integrity() for more information.
  *
  * Return: Returns 0 on success, error on failure.
  */
@@ -5255,9 +5315,18 @@ int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
 	if (unlikely(rc))
 		return rc;
 
+	rc = call_int_hook(bpf_prog_load_integrity, prog, attr, token, kernel);
+	if (unlikely(rc))
+		goto err;
+
 	rc = call_int_hook(bpf_prog_load, prog, attr, token, kernel);
 	if (unlikely(rc))
-		security_bpf_prog_free(prog);
+		goto err;
+
+	return rc;
+
+err:
+	security_bpf_prog_free(prog);
 	return rc;
 }
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 05/10] lsm: security: Add additional enum values for bpf integrity checks
From: Blaise Boscaccy @ 2026-04-16 17:33 UTC (permalink / raw)
  To: Blaise Boscaccy, Jonathan Corbet, Paul Moore, James Morris,
	Serge E. Hallyn, Mickaël Salaün, Günther Noack,
	Dr. David Alan Gilbert, Andrew Morton, James.Bottomley, dhowells,
	Fan Wu, Ryan Foster, Randy Dunlap, linux-security-module,
	linux-doc, linux-kernel, bpf, Song Liu
In-Reply-To: <20260416173500.176716-1-bboscaccy@linux.microsoft.com>

First add a generic LSM_INT_VERDICT_FAULT value to indicate a system
failure during checking. Second, add a LSM_INT_VERDICT_UNKNOWNKEY to
signal that the payload was signed with a key other than one that
exists in the secondary keyring. And finally add an
LSM_INT_VERDICT_UNEXPECTED enum value to indicate that a unexpected
hash value was encountered at some stage of verification.

Signed-off-by: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
---
 include/linux/security.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/security.h b/include/linux/security.h
index b3fd04baa78d0..4b4b8808f67de 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -106,6 +106,9 @@ enum lsm_integrity_verdict {
 	LSM_INT_VERDICT_OK,
 	LSM_INT_VERDICT_UNSIGNED,
 	LSM_INT_VERDICT_PARTIALSIG,
+	LSM_INT_VERDICT_UNKNOWNKEY,
+	LSM_INT_VERDICT_UNEXPECTED,
+	LSM_INT_VERDICT_FAULT,
 	LSM_INT_VERDICT_BADSIG,
 };
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 00/28] vfs/nfsd: add support for CB_NOTIFY callbacks in directory delegations
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton

This version has a number of significant changes from the last. I
dropped some of the R-b's for this reason.

Of particular interest to the fsnotify maintainers will be the
FSNOTIFY_EVENT_RENAME data type. This combines the FSNOTIFY_EVENT_DENTRY
and FSNOTIFY_EVENT_INODE event types so that the fsnotify event can
additionally send information about a file that was unlinked as a result
of being replaced via rename().

There are also a host of other bugfixes, and a new tracepoint. Please
consider this for v7.2.

Original cover letter follows:

---------------------------------8<------------------------------------

This patchset builds on the directory delegation work we did a few
months ago, to add support for CB_NOTIFY callbacks for some events. In
particular, creates, unlinks and renames. The server also sends updated
directory attributes in the notifications. With this support, the client
can register interest in a directory and get notifications about changes
within it without losing its lease.

The series starts with patches to allow the vfs to ignore certain types
of events on directories. nfsd can then request these sorts of
delegations on directories, and then set up inotify watches on the
directory to trigger sending CB_NOTIFY events.

This has mainly been tested with pynfs, with some new testcases that
I'll be posting soon. They seem to work fine with those tests, but I
don't think we'll want to merge these until we have a complete
client-side implementation to test against.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Changes in v2:
- Fix __break_lease handling with different lease types on flc_lease list
- Add FSNOTIFY_EVENT_RENAME data type to properly handle cross-directory rename events
- Display fsnotify mask symbolically in tracepoints
- New tracepoint in fsnotify()
- Recalc fsnotify mask after unlocking lease instead of before
- Don't notify client that is making the changes
- After sending CB_NOTIFY, requeue if new events came in while running
- Document removal of NFS4_VERIFIER_SIZE/NFS4_FHSIZE from UAPI headers
- Properly release nfsd_dir_fsnotify_group on server shutdown
- Link to v1: https://lore.kernel.org/r/20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org

---
Jeff Layton (28):
      filelock: pass current blocking lease to trace_break_lease_block() rather than "new_fl"
      filelock: add support for ignoring deleg breaks for dir change events
      filelock: add a tracepoint to start of break_lease()
      filelock: add an inode_lease_ignore_mask helper
      fsnotify: new tracepoint in fsnotify()
      fsnotify: add fsnotify_modify_mark_mask()
      fsnotify: add FSNOTIFY_EVENT_RENAME data type
      nfsd: check fl_lmops in nfsd_breaker_owns_lease()
      nfsd: add protocol support for CB_NOTIFY
      nfs_common: add new NOTIFY4_* flags proposed in RFC8881bis
      nfsd: allow nfsd to get a dir lease with an ignore mask
      nfsd: update the fsnotify mark when setting or removing a dir delegation
      nfsd: make nfsd4_callback_ops->prepare operation bool return
      nfsd: add callback encoding and decoding linkages for CB_NOTIFY
      nfsd: use RCU to protect fi_deleg_file
      nfsd: add data structures for handling CB_NOTIFY
      nfsd: add notification handlers for dir events
      nfsd: add tracepoint to dir_event handler
      nfsd: apply the notify mask to the delegation when requested
      nfsd: add helper to marshal a fattr4 from completed args
      nfsd: allow nfsd4_encode_fattr4_change() to work with no export
      nfsd: send basic file attributes in CB_NOTIFY
      nfsd: allow encoding a filehandle into fattr4 without a svc_fh
      nfsd: add a fi_connectable flag to struct nfs4_file
      nfsd: add the filehandle to returned attributes in CB_NOTIFY
      nfsd: properly track requested child attributes
      nfsd: track requested dir attributes
      nfsd: add support to CB_NOTIFY for dir attribute changes

 Documentation/sunrpc/xdr/nfs4_1.x    | 264 ++++++++++++++-
 fs/attr.c                            |   2 +-
 fs/locks.c                           | 118 +++++--
 fs/namei.c                           |  31 +-
 fs/nfsd/filecache.c                  |  70 +++-
 fs/nfsd/nfs4callback.c               |  60 +++-
 fs/nfsd/nfs4layouts.c                |   5 +-
 fs/nfsd/nfs4proc.c                   |  17 +
 fs/nfsd/nfs4state.c                  | 550 ++++++++++++++++++++++++++++----
 fs/nfsd/nfs4xdr.c                    | 323 +++++++++++++++++--
 fs/nfsd/nfs4xdr_gen.c                | 601 ++++++++++++++++++++++++++++++++++-
 fs/nfsd/nfs4xdr_gen.h                |  20 +-
 fs/nfsd/state.h                      |  72 ++++-
 fs/nfsd/trace.h                      |  23 ++
 fs/nfsd/xdr4.h                       |   5 +
 fs/nfsd/xdr4cb.h                     |  12 +
 fs/notify/fsnotify.c                 |   5 +
 fs/notify/mark.c                     |  29 ++
 fs/posix_acl.c                       |   4 +-
 fs/xattr.c                           |   4 +-
 include/linux/filelock.h             |  54 +++-
 include/linux/fsnotify.h             |   8 +-
 include/linux/fsnotify_backend.h     |  21 ++
 include/linux/nfs4.h                 | 127 --------
 include/linux/sunrpc/xdrgen/nfs4_1.h | 291 ++++++++++++++++-
 include/trace/events/filelock.h      |  38 ++-
 include/trace/events/fsnotify.h      |  51 +++
 include/trace/misc/fsnotify.h        |  35 ++
 include/uapi/linux/nfs4.h            |   2 -
 29 files changed, 2518 insertions(+), 324 deletions(-)
---
base-commit: f4d71dd7fd9cec357c32431fa55c107b96008312
change-id: 20260325-dir-deleg-339066dd1017

Best regards,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply

* [PATCH v2 01/28] filelock: pass current blocking lease to trace_break_lease_block() rather than "new_fl"
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260416-dir-deleg-v2-0-851426a550f6@kernel.org>

The break_lease_block tracepoint currently just shows the type of
"new_fl", which we can predict from the "flags" value. Switch it to
display info about "fl" instead, as that's the file_lease on which the
code is blocking.

For trace_break_lease_unblock(), pass it a NULL pointer. "fl" may have
been freed by that point, and passing it the info in new_fl is
deceptive.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 8e44b1f6c15a..d82c5be7aa5b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1691,7 +1691,7 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	} else
 		break_time++;
 	locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
-	trace_break_lease_block(inode, new_fl);
+	trace_break_lease_block(inode, fl);
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
 
@@ -1702,7 +1702,7 @@ int __break_lease(struct inode *inode, unsigned int flags)
 
 	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
-	trace_break_lease_unblock(inode, new_fl);
+	trace_break_lease_unblock(inode, NULL);
 	__locks_delete_block(&new_fl->c);
 	if (error >= 0) {
 		/*

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 02/28] filelock: add support for ignoring deleg breaks for dir change events
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260416-dir-deleg-v2-0-851426a550f6@kernel.org>

If a NFS client requests a directory delegation with a notification
bitmask covering directory change events, the server shouldn't recall
the delegation. Instead the client will be notified of the change after
the fact.

Add support for ignoring lease breaks on directory changes. Add a new
flags parameter to try_break_deleg() and teach __break_lease how to
ignore certain types of delegation break events.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/attr.c                       |  2 +-
 fs/locks.c                      | 82 ++++++++++++++++++++++++++++-------------
 fs/namei.c                      | 31 +++++++++-------
 fs/posix_acl.c                  |  4 +-
 fs/xattr.c                      |  4 +-
 include/linux/filelock.h        | 53 ++++++++++++++++++--------
 include/trace/events/filelock.h |  5 ++-
 7 files changed, 120 insertions(+), 61 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index e7d7c6d19fe9..28744f0e9ff4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -547,7 +547,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
 	 * breaking the delegation in this case.
 	 */
 	if (!(ia_valid & ATTR_DELEG)) {
-		error = try_break_deleg(inode, delegated_inode);
+		error = try_break_deleg(inode, 0, delegated_inode);
 		if (error)
 			return error;
 	}
diff --git a/fs/locks.c b/fs/locks.c
index d82c5be7aa5b..8b5958f34b61 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1583,29 +1583,63 @@ static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc
 }
 
 static bool
-any_leases_conflict(struct inode *inode, struct file_lease *breaker)
+ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags)
 {
-	struct file_lock_context *ctx = inode->i_flctx;
-	struct file_lock_core *flc;
+	if ((flags & LEASE_BREAK_DIR_CREATE) && (fl->c.flc_flags & FL_IGN_DIR_CREATE))
+		return true;
+	if ((flags & LEASE_BREAK_DIR_DELETE) && (fl->c.flc_flags & FL_IGN_DIR_DELETE))
+		return true;
+	if ((flags & LEASE_BREAK_DIR_RENAME) && (fl->c.flc_flags & FL_IGN_DIR_RENAME))
+		return true;
+
+	return false;
+}
+
+static unsigned int
+break_lease_flags_to_type(unsigned int flags)
+{
+	if (flags & LEASE_BREAK_LEASE)
+		return FL_LEASE;
+	else if (flags & LEASE_BREAK_DELEG)
+		return FL_DELEG;
+	else if (flags & LEASE_BREAK_LAYOUT)
+		return FL_LAYOUT;
+	else
+		return 0;
+
+}
+
+static struct file_lease *
+first_visible_lease(struct inode *inode, struct file_lease *new_fl, unsigned int flags)
+{
+	struct file_lock_context *ctx = locks_inode_context(inode);
+	struct file_lease *fl;
 
 	lockdep_assert_held(&ctx->flc_lock);
 
-	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
-		if (leases_conflict(flc, &breaker->c))
-			return true;
+	list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
+		if (!leases_conflict(&fl->c, &new_fl->c))
+			continue;
+		if (S_ISDIR(inode->i_mode) && ignore_dir_deleg_break(fl, flags))
+			continue;
+		return fl;
 	}
-	return false;
+	return NULL;
 }
 
+
 /**
- *	__break_lease	-	revoke all outstanding leases on file
- *	@inode: the inode of the file to return
- *	@flags: LEASE_BREAK_* flags
+ * __break_lease	-	revoke all outstanding leases on file
+ * @inode: the inode of the file to return
+ * @flags: LEASE_BREAK_* flags
  *
- *	break_lease (inlined for speed) has checked there already is at least
- *	some kind of lock (maybe a lease) on this file.  Leases are broken on
- *	a call to open() or truncate().  This function can block waiting for the
- *	lease break unless you specify LEASE_BREAK_NONBLOCK.
+ * break_lease (inlined for speed) has checked there already is at least
+ * some kind of lock (maybe a lease) on this file. Leases and Delegations
+ * are broken on a call to open() or truncate(). Delegations are also
+ * broken on any event that would change the ctime. Directory delegations
+ * are broken whenever the directory changes (unless the delegation is set
+ * up to ignore the event). This function can block waiting for the lease
+ * break unless you specify LEASE_BREAK_NONBLOCK.
  */
 int __break_lease(struct inode *inode, unsigned int flags)
 {
@@ -1617,13 +1651,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY);
 	int error = 0;
 
-	if (flags & LEASE_BREAK_LEASE)
-		type = FL_LEASE;
-	else if (flags & LEASE_BREAK_DELEG)
-		type = FL_DELEG;
-	else if (flags & LEASE_BREAK_LAYOUT)
-		type = FL_LAYOUT;
-	else
+	type = break_lease_flags_to_type(flags);
+	if (!type)
 		return -EINVAL;
 
 	new_fl = lease_alloc(NULL, type, want_write ? F_WRLCK : F_RDLCK);
@@ -1642,7 +1671,7 @@ int __break_lease(struct inode *inode, unsigned int flags)
 
 	time_out_leases(inode, &dispose);
 
-	if (!any_leases_conflict(inode, new_fl))
+	if (!first_visible_lease(inode, new_fl, flags))
 		goto out;
 
 	break_time = 0;
@@ -1655,6 +1684,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
 		if (!leases_conflict(&fl->c, &new_fl->c))
 			continue;
+		if (S_ISDIR(inode->i_mode) && ignore_dir_deleg_break(fl, flags))
+			continue;
 		if (want_write) {
 			if (fl->c.flc_flags & FL_UNLOCK_PENDING)
 				continue;
@@ -1670,7 +1701,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 			locks_delete_lock_ctx(&fl->c, &dispose);
 	}
 
-	if (list_empty(&ctx->flc_lease))
+	fl = first_visible_lease(inode, new_fl, flags);
+	if (!fl)
 		goto out;
 
 	if (flags & LEASE_BREAK_NONBLOCK) {
@@ -1680,7 +1712,6 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	}
 
 restart:
-	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
 	break_time = fl->fl_break_time;
 	if (break_time != 0) {
 		if (time_after(jiffies, break_time)) {
@@ -1711,7 +1742,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 		 */
 		if (error == 0)
 			time_out_leases(inode, &dispose);
-		if (any_leases_conflict(inode, new_fl))
+		fl = first_visible_lease(inode, new_fl, flags);
+		if (fl)
 			goto restart;
 		error = 0;
 	}
diff --git a/fs/namei.c b/fs/namei.c
index 9e5500dad14f..e3cbd9f877bd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4176,7 +4176,7 @@ int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	error = try_break_deleg(dir, di);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, di);
 	if (error)
 		return error;
 	error = dir->i_op->create(idmap, dir, dentry, mode, true);
@@ -4475,7 +4475,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (open_flag & O_CREAT)) {
 		/* but break the directory lease first! */
-		error = try_break_deleg(dir_inode, delegated_inode);
+		error = try_break_deleg(dir_inode, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (error)
 			goto out_dput;
 
@@ -5091,7 +5091,7 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		return error;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		return error;
 
@@ -5232,7 +5232,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	if (max_links && dir->i_nlink >= max_links)
 		goto err;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		goto err;
 
@@ -5337,7 +5337,7 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode);
 	if (error)
 		goto out;
 
@@ -5467,10 +5467,10 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
-			error = try_break_deleg(dir, delegated_inode);
+			error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode);
 			if (error)
 				goto out;
-			error = try_break_deleg(target, delegated_inode);
+			error = try_break_deleg(target, 0, delegated_inode);
 			if (error)
 				goto out;
 			error = dir->i_op->unlink(dir, dentry);
@@ -5614,7 +5614,7 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	if (error)
 		return error;
 
-	error = try_break_deleg(dir, delegated_inode);
+	error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 	if (error)
 		return error;
 
@@ -5745,9 +5745,9 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
 	else if (max_links && inode->i_nlink >= max_links)
 		error = -EMLINK;
 	else {
-		error = try_break_deleg(dir, delegated_inode);
+		error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (!error)
-			error = try_break_deleg(inode, delegated_inode);
+			error = try_break_deleg(inode, 0, delegated_inode);
 		if (!error)
 			error = dir->i_op->link(old_dentry, dir, new_dentry);
 	}
@@ -6011,21 +6011,24 @@ int vfs_rename(struct renamedata *rd)
 		    old_dir->i_nlink >= max_links)
 			goto out;
 	}
-	error = try_break_deleg(old_dir, delegated_inode);
+	error = try_break_deleg(old_dir,
+				old_dir == new_dir ? LEASE_BREAK_DIR_RENAME :
+						     LEASE_BREAK_DIR_DELETE,
+				delegated_inode);
 	if (error)
 		goto out;
 	if (new_dir != old_dir) {
-		error = try_break_deleg(new_dir, delegated_inode);
+		error = try_break_deleg(new_dir, LEASE_BREAK_DIR_CREATE, delegated_inode);
 		if (error)
 			goto out;
 	}
 	if (!is_dir) {
-		error = try_break_deleg(source, delegated_inode);
+		error = try_break_deleg(source, 0, delegated_inode);
 		if (error)
 			goto out;
 	}
 	if (target && !new_is_dir) {
-		error = try_break_deleg(target, delegated_inode);
+		error = try_break_deleg(target, 0, delegated_inode);
 		if (error)
 			goto out;
 	}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 12591c95c925..b4bfe4ddf64e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -1126,7 +1126,7 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out_inode_unlock;
 
-	error = try_break_deleg(inode, &delegated_inode);
+	error = try_break_deleg(inode, 0, &delegated_inode);
 	if (error)
 		goto out_inode_unlock;
 
@@ -1234,7 +1234,7 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out_inode_unlock;
 
-	error = try_break_deleg(inode, &delegated_inode);
+	error = try_break_deleg(inode, 0, &delegated_inode);
 	if (error)
 		goto out_inode_unlock;
 
diff --git a/fs/xattr.c b/fs/xattr.c
index 3e49e612e1ba..6b67a6e76eeb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -288,7 +288,7 @@ __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(inode, delegated_inode);
+	error = try_break_deleg(inode, 0, delegated_inode);
 	if (error)
 		goto out;
 
@@ -546,7 +546,7 @@ __vfs_removexattr_locked(struct mnt_idmap *idmap,
 	if (error)
 		goto out;
 
-	error = try_break_deleg(inode, delegated_inode);
+	error = try_break_deleg(inode, 0, delegated_inode);
 	if (error)
 		goto out;
 
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 5f0a2fb31450..9dd4e67a6f30 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -4,19 +4,22 @@
 
 #include <linux/fs.h>
 
-#define FL_POSIX	1
-#define FL_FLOCK	2
-#define FL_DELEG	4	/* NFSv4 delegation */
-#define FL_ACCESS	8	/* not trying to lock, just looking */
-#define FL_EXISTS	16	/* when unlocking, test for existence */
-#define FL_LEASE	32	/* lease held on this file */
-#define FL_CLOSE	64	/* unlock on close */
-#define FL_SLEEP	128	/* A blocking lock */
-#define FL_DOWNGRADE_PENDING	256 /* Lease is being downgraded */
-#define FL_UNLOCK_PENDING	512 /* Lease is being broken */
-#define FL_OFDLCK	1024	/* lock is "owned" by struct file */
-#define FL_LAYOUT	2048	/* outstanding pNFS layout */
-#define FL_RECLAIM	4096	/* reclaiming from a reboot server */
+#define FL_POSIX		BIT(0)	/* POSIX lock */
+#define FL_FLOCK		BIT(1)	/* BSD lock */
+#define FL_DELEG		BIT(2)	/* NFSv4 delegation */
+#define FL_ACCESS		BIT(3)	/* not trying to lock, just looking */
+#define FL_EXISTS		BIT(4)	/* when unlocking, test for existence */
+#define FL_LEASE		BIT(5)	/* file lease */
+#define FL_CLOSE		BIT(6)	/* unlock on close */
+#define FL_SLEEP		BIT(7)	/* A blocking lock */
+#define FL_DOWNGRADE_PENDING	BIT(8)	/* Lease is being downgraded */
+#define FL_UNLOCK_PENDING	BIT(9)	/* Lease is being broken */
+#define FL_OFDLCK		BIT(10) /* POSIX lock "owned" by struct file */
+#define FL_LAYOUT		BIT(11) /* outstanding pNFS layout */
+#define FL_RECLAIM		BIT(12) /* reclaiming from a reboot server */
+#define FL_IGN_DIR_CREATE	BIT(13) /* ignore DIR_CREATE events */
+#define FL_IGN_DIR_DELETE	BIT(14) /* ignore DIR_DELETE events */
+#define FL_IGN_DIR_RENAME	BIT(15) /* ignore DIR_RENAME events */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
@@ -222,6 +225,10 @@ struct file_lease *locks_alloc_lease(void);
 #define LEASE_BREAK_LAYOUT		BIT(2)	// break layouts only
 #define LEASE_BREAK_NONBLOCK		BIT(3)	// non-blocking break
 #define LEASE_BREAK_OPEN_RDONLY		BIT(4)	// readonly open event
+#define LEASE_BREAK_DIR_CREATE		BIT(5)  // dir deleg create event
+#define LEASE_BREAK_DIR_DELETE		BIT(6)  // dir deleg delete event
+#define LEASE_BREAK_DIR_RENAME		BIT(7)  // dir deleg rename event
+
 
 int __break_lease(struct inode *inode, unsigned int flags);
 void lease_get_mtime(struct inode *, struct timespec64 *time);
@@ -516,12 +523,26 @@ static inline bool is_delegated(struct delegated_inode *di)
 	return di->di_inode;
 }
 
-static inline int try_break_deleg(struct inode *inode,
+/**
+ * try_break_deleg - do a non-blocking delegation break
+ * @inode: inode that should have its delegations broken
+ * @flags: extra LEASE_BREAK_* flags to pass to break_deleg()
+ * @di: returns pointer to delegated inode (may be NULL)
+ *
+ * Break delegations in a non-blocking fashion. If there are
+ * outstanding delegations and @di is set, then an extra reference
+ * will be taken on @inode and @di->di_inode will be populated so
+ * that it may be waited upon.
+ *
+ * Returns 0 if there is no need to wait or an error. If -EWOULDBLOCK
+ * is returned, then @di will be populated (if non-NULL).
+ */
+static inline int try_break_deleg(struct inode *inode, unsigned int flags,
 				  struct delegated_inode *di)
 {
 	int ret;
 
-	ret = break_deleg(inode, LEASE_BREAK_NONBLOCK);
+	ret = break_deleg(inode, flags | LEASE_BREAK_NONBLOCK);
 	if (ret == -EWOULDBLOCK && di) {
 		di->di_inode = inode;
 		ihold(inode);
@@ -574,7 +595,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags)
 	return 0;
 }
 
-static inline int try_break_deleg(struct inode *inode,
+static inline int try_break_deleg(struct inode *inode, unsigned int flags,
 				  struct delegated_inode *delegated_inode)
 {
 	return 0;
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index 370016c38a5b..ef4bb0afb86a 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -28,7 +28,10 @@
 		{ FL_DOWNGRADE_PENDING,	"FL_DOWNGRADE_PENDING" },	\
 		{ FL_UNLOCK_PENDING,	"FL_UNLOCK_PENDING" },		\
 		{ FL_OFDLCK,		"FL_OFDLCK" },			\
-		{ FL_RECLAIM,		"FL_RECLAIM"})
+		{ FL_RECLAIM,		"FL_RECLAIM" },			\
+		{ FL_IGN_DIR_CREATE,	"FL_IGN_DIR_CREATE" },		\
+		{ FL_IGN_DIR_DELETE,	"FL_IGN_DIR_DELETE" },		\
+		{ FL_IGN_DIR_RENAME,	"FL_IGN_DIR_RENAME" })
 
 #define show_fl_type(val)				\
 	__print_symbolic(val,				\

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 03/28] filelock: add a tracepoint to start of break_lease()
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260416-dir-deleg-v2-0-851426a550f6@kernel.org>

...mostly to show the LEASE_BREAK_* flags.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c                      |  2 ++
 include/trace/events/filelock.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 8b5958f34b61..792c3920b33a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1651,6 +1651,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
 	bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY);
 	int error = 0;
 
+	trace_break_lease(inode, flags);
+
 	type = break_lease_flags_to_type(flags);
 	if (!type)
 		return -EINVAL;
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index ef4bb0afb86a..fff0ee2d452d 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -120,6 +120,39 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode,
 		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
 		TP_ARGS(inode, fl, ret));
 
+#define show_lease_break_flags(val)					\
+	__print_flags(val, "|",						\
+		{ LEASE_BREAK_LEASE,		"LEASE" },		\
+		{ LEASE_BREAK_DELEG,		"DELEG" },		\
+		{ LEASE_BREAK_LAYOUT,		"LAYOUT" },		\
+		{ LEASE_BREAK_NONBLOCK,		"NONBLOCK" },		\
+		{ LEASE_BREAK_OPEN_RDONLY,	"OPEN_RDONLY" },	\
+		{ LEASE_BREAK_DIR_CREATE,	"DIR_CREATE" },		\
+		{ LEASE_BREAK_DIR_DELETE,	"DIR_DELETE" },		\
+		{ LEASE_BREAK_DIR_RENAME,	"DIR_RENAME" })
+
+TRACE_EVENT(break_lease,
+	TP_PROTO(struct inode *inode, unsigned int flags),
+
+	TP_ARGS(inode, flags),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, i_ino)
+		__field(dev_t, s_dev)
+		__field(unsigned int, flags)
+	),
+
+	TP_fast_assign(
+		__entry->s_dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->flags = flags;
+	),
+
+	TP_printk("dev=0x%x:0x%x ino=0x%lx flags=%s",
+		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		  __entry->i_ino, show_lease_break_flags(__entry->flags))
+);
+
 DECLARE_EVENT_CLASS(filelock_lease,
 	TP_PROTO(struct inode *inode, struct file_lease *fl),
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 04/28] filelock: add an inode_lease_ignore_mask helper
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260416-dir-deleg-v2-0-851426a550f6@kernel.org>

Add a new routine that returns a mask of all dir change events that are
currently ignored by any leases. nfsd will use this to determine how to
configure the fsnotify_mark mask.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c               | 32 ++++++++++++++++++++++++++++++++
 include/linux/filelock.h |  1 +
 2 files changed, 33 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 792c3920b33a..61f64b261282 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1582,6 +1582,38 @@ static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc
 	return rc;
 }
 
+#define IGNORE_MASK	(FL_IGN_DIR_CREATE | FL_IGN_DIR_DELETE | FL_IGN_DIR_RENAME)
+
+/**
+ * inode_lease_ignore_mask - return union of all ignored inode events for this inode
+ * @inode: inode of which to get ignore mask
+ *
+ * Walk the list of leases, and return the result of all of
+ * their FL_IGN_DIR_* bits or'ed together.
+ */
+u32
+inode_lease_ignore_mask(struct inode *inode)
+{
+	struct file_lock_context *ctx;
+	struct file_lock_core *flc;
+	u32 mask = 0;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return 0;
+
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
+		mask |= flc->flc_flags & IGNORE_MASK;
+		/* If we already have everything, we can stop */
+		if (mask == IGNORE_MASK)
+			break;
+	}
+	spin_unlock(&ctx->flc_lock);
+	return mask;
+}
+EXPORT_SYMBOL_GPL(inode_lease_ignore_mask);
+
 static bool
 ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags)
 {
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index 9dd4e67a6f30..6e125902c58a 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -236,6 +236,7 @@ int generic_setlease(struct file *, int, struct file_lease **, void **priv);
 int kernel_setlease(struct file *, int, struct file_lease **, void **);
 int vfs_setlease(struct file *, int, struct file_lease **, void **);
 int lease_modify(struct file_lease *, int, struct list_head *);
+u32 inode_lease_ignore_mask(struct inode *inode);
 
 struct notifier_block;
 int lease_register_notifier(struct notifier_block *);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v2 05/28] fsnotify: new tracepoint in fsnotify()
From: Jeff Layton @ 2026-04-16 17:35 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260416-dir-deleg-v2-0-851426a550f6@kernel.org>

Add a tracepoint so we can see exactly how this is being called.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/notify/fsnotify.c            |  5 ++++
 include/trace/events/fsnotify.h | 51 +++++++++++++++++++++++++++++++++++++++++
 include/trace/misc/fsnotify.h   | 35 ++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9995de1710e5..5448738635f6 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -14,6 +14,9 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/fsnotify.h>
+
 /*
  * Clear all of the marks on an inode when it is being evicted from core
  */
@@ -504,6 +507,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	int ret = 0;
 	__u32 test_mask, marks_mask = 0;
 
+	trace_fsnotify(mask, data, data_type, dir, file_name, inode, cookie);
+
 	if (path)
 		mnt = real_mount(path->mnt);
 
diff --git a/include/trace/events/fsnotify.h b/include/trace/events/fsnotify.h
new file mode 100644
index 000000000000..341bbd57a39b
--- /dev/null
+++ b/include/trace/events/fsnotify.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fsnotify
+
+#if !defined(_TRACE_FSNOTIFY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FSNOTIFY_H
+
+#include <linux/tracepoint.h>
+
+#include <trace/misc/fsnotify.h>
+
+TRACE_EVENT(fsnotify,
+	TP_PROTO(__u32 mask, const void *data, int data_type,
+		 struct inode *dir, const struct qstr *file_name,
+		 struct inode *inode, u32 cookie),
+
+	TP_ARGS(mask, data, data_type, dir, file_name, inode, cookie),
+
+	TP_STRUCT__entry(
+		__field(__u32, mask)
+		__field(unsigned long, dir_ino)
+		__field(unsigned long, ino)
+		__field(dev_t, s_dev)
+		__field(int, data_type)
+		__field(u32, cookie)
+		__string(file_name, file_name ? (const char *)file_name->name : "")
+	),
+
+	TP_fast_assign(
+		__entry->mask = mask;
+		__entry->dir_ino = dir ? dir->i_ino : 0;
+		__entry->ino = inode ? inode->i_ino : 0;
+		__entry->s_dev = dir ? dir->i_sb->s_dev :
+				 inode ? inode->i_sb->s_dev : 0;
+		__entry->data_type = data_type;
+		__entry->cookie = cookie;
+		__assign_str(file_name);
+	),
+
+	TP_printk("dev=%d:%d dir=%lu ino=%lu data_type=%d cookie=0x%x mask=0x%x %s name=%s",
+		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+		  __entry->dir_ino, __entry->ino,
+		  __entry->data_type, __entry->cookie,
+		  __entry->mask, show_fsnotify_mask(__entry->mask),
+		  __get_str(file_name))
+);
+
+#endif /* _TRACE_FSNOTIFY_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/misc/fsnotify.h b/include/trace/misc/fsnotify.h
new file mode 100644
index 000000000000..a201e1bd6d8c
--- /dev/null
+++ b/include/trace/misc/fsnotify.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Display helpers for fsnotify events
+ */
+
+#include <linux/fsnotify_backend.h>
+
+#define show_fsnotify_mask(mask) \
+	__print_flags(mask, "|", \
+		{ FS_ACCESS,		"ACCESS" }, \
+		{ FS_MODIFY,		"MODIFY" }, \
+		{ FS_ATTRIB,		"ATTRIB" }, \
+		{ FS_CLOSE_WRITE,	"CLOSE_WRITE" }, \
+		{ FS_CLOSE_NOWRITE,	"CLOSE_NOWRITE" }, \
+		{ FS_OPEN,		"OPEN" }, \
+		{ FS_MOVED_FROM,	"MOVED_FROM" }, \
+		{ FS_MOVED_TO,		"MOVED_TO" }, \
+		{ FS_CREATE,		"CREATE" }, \
+		{ FS_DELETE,		"DELETE" }, \
+		{ FS_DELETE_SELF,	"DELETE_SELF" }, \
+		{ FS_MOVE_SELF,		"MOVE_SELF" }, \
+		{ FS_OPEN_EXEC,		"OPEN_EXEC" }, \
+		{ FS_UNMOUNT,		"UNMOUNT" }, \
+		{ FS_Q_OVERFLOW,	"Q_OVERFLOW" }, \
+		{ FS_ERROR,		"ERROR" }, \
+		{ FS_OPEN_PERM,		"OPEN_PERM" }, \
+		{ FS_ACCESS_PERM,	"ACCESS_PERM" }, \
+		{ FS_OPEN_EXEC_PERM,	"OPEN_EXEC_PERM" }, \
+		{ FS_PRE_ACCESS,	"PRE_ACCESS" }, \
+		{ FS_MNT_ATTACH,	"MNT_ATTACH" }, \
+		{ FS_MNT_DETACH,	"MNT_DETACH" }, \
+		{ FS_EVENT_ON_CHILD,	"EVENT_ON_CHILD" }, \
+		{ FS_RENAME,		"RENAME" }, \
+		{ FS_DN_MULTISHOT,	"DN_MULTISHOT" }, \
+		{ FS_ISDIR,		"ISDIR" })

-- 
2.53.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox