Linux Documentation

Linux Documentation
 help / color / mirror / Atom feed

* [PATCH v3 2/3] hwmon: pmbus: Add support for Silergy SQ24860
From: Ziming Zhu @ 2026-06-11  7:43 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Jonathan Corbet,
	Shuah Khan, linux-hwmon, devicetree, linux-kernel, linux-doc,
	Ziming Zhu
In-Reply-To: <20260611074335.4415-1-zmzhu0630@163.com>

From: Ziming Zhu <ziming.zhu@silergycorp.com>

Add PMBus hwmon support for the Silergy SQ24860 eFuse.

The driver reports input voltage, output voltage, auxiliary voltage,
input current, input power, and temperature. It also exposes peak,
average, and minimum history attributes, sample count configuration,
and maps the manufacturer-specific VIREF register to the generic input
over-current fault limit attribute.

The IMON resistor value is read from the silergy,rimon-micro-ohms device
property and used to configure the input current calibration gain.

Signed-off-by: Ziming Zhu <ziming.zhu@silergycorp.com>
---
 drivers/hwmon/pmbus/Kconfig   |  19 ++
 drivers/hwmon/pmbus/Makefile  |   1 +
 drivers/hwmon/pmbus/sq24860.c | 430 ++++++++++++++++++++++++++++++++++
 3 files changed, 450 insertions(+)
 create mode 100644 drivers/hwmon/pmbus/sq24860.c

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 8f4bff375ecb..a905b5af137c 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -612,6 +612,25 @@ config SENSORS_STEF48H28
 	  This driver can also be built as a module. If so, the module will
 	  be called stef48h28.
 
+config SENSORS_SQ24860
+	tristate "Silergy SQ24860"
+	help
+	  If you say yes here you get hardware monitoring support for Silergy
+	  SQ24860 eFuse.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called sq24860.
+
+config SENSORS_SQ24860_REGULATOR
+	bool "Regulator support for SQ24860"
+	depends on SENSORS_SQ24860 && REGULATOR
+	default SENSORS_SQ24860
+	help
+	  If you say yes here you get regulator support for Silergy SQ24860.
+	  The regulator is registered through the PMBus regulator framework and
+	  can be used to control the output exposed by the device.
+	  This option is only useful if regulator framework support is needed.
+
 config SENSORS_STPDDC60
 	tristate "ST STPDDC60"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index 7129b62bc00f..86bc93c6c091 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_SENSORS_PM6764TR)	+= pm6764tr.o
 obj-$(CONFIG_SENSORS_PXE1610)	+= pxe1610.o
 obj-$(CONFIG_SENSORS_Q54SJ108A2)	+= q54sj108a2.o
 obj-$(CONFIG_SENSORS_STEF48H28)	+= stef48h28.o
+obj-$(CONFIG_SENSORS_SQ24860)	+= sq24860.o
 obj-$(CONFIG_SENSORS_STPDDC60)	+= stpddc60.o
 obj-$(CONFIG_SENSORS_TDA38640)	+= tda38640.o
 obj-$(CONFIG_SENSORS_TPS25990)	+= tps25990.o
diff --git a/drivers/hwmon/pmbus/sq24860.c b/drivers/hwmon/pmbus/sq24860.c
new file mode 100644
index 000000000000..f16f650ff7ba
--- /dev/null
+++ b/drivers/hwmon/pmbus/sq24860.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Ziming Zhu <ziming.zhu@silergycorp.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/math64.h>
+
+#include "pmbus.h"
+
+#define SQ24860_IIN_CAL_GAIN		0x38
+#define SQ24860_READ_VAUX		0xd0
+#define SQ24860_READ_VIN_MIN		0xd1
+#define SQ24860_READ_VIN_PEAK		0xd2
+#define SQ24860_READ_IIN_PEAK		0xd4
+#define SQ24860_READ_PIN_PEAK		0xd5
+#define SQ24860_READ_TEMP_AVG		0xd6
+#define SQ24860_READ_TEMP_PEAK		0xd7
+#define SQ24860_READ_VOUT_MIN		0xda
+#define SQ24860_READ_VIN_AVG		0xdc
+#define SQ24860_READ_VOUT_AVG		0xdd
+#define SQ24860_READ_IIN_AVG		0xde
+#define SQ24860_READ_PIN_AVG		0xdf
+#define SQ24860_VIREF			0xe0
+#define SQ24860_PK_MIN_AVG		0xea
+#define PK_MIN_AVG_RST_PEAK		BIT(7)
+#define PK_MIN_AVG_RST_AVG		BIT(6)
+#define PK_MIN_AVG_RST_MIN		BIT(5)
+#define PK_MIN_AVG_AVG_CNT		GENMASK(2, 0)
+#define SQ24860_MFR_WRITE_PROTECT	0xf8
+#define SQ24860_UNLOCKED		BIT(7)
+
+#define SQ24860_8B_SHIFT		2
+#define SQ24860_IIN_OCF_NUM		1000000
+#define SQ24860_IIN_OCF_DIV		129278
+#define SQ24860_IIN_OCF_OFF		165
+
+#define PK_MIN_AVG_RST_MASK		(PK_MIN_AVG_RST_PEAK | \
+					 PK_MIN_AVG_RST_AVG  | \
+					 PK_MIN_AVG_RST_MIN)
+#define SQ24860_MAX_SAMPLES		BIT(FIELD_MAX(PK_MIN_AVG_AVG_CNT))
+/*
+ * Arbitrary default Rimon value: 1.6kOhm
+ */
+#define SQ24860_DEFAULT_RIMON		1600000000
+#define SQ24860_GIMON			18180
+
+#define SQ24860_VAUX_DIV		20
+
+static int sq24860_write_iin_cal_gain(struct i2c_client *client, u32 rimon)
+{
+	u64 temp = 6400ULL * 1000000000ULL * 1000ULL;
+	u64 denom;
+	u64 word;
+
+	if (!rimon)
+		return -EINVAL;
+
+	denom = (u64)rimon * SQ24860_GIMON;
+	word = div64_u64(temp, denom);
+	if (!word || word > U16_MAX)
+		return -EINVAL;
+
+	return i2c_smbus_write_word_data(client, SQ24860_IIN_CAL_GAIN,
+					(u16)word);
+}
+
+static int sq24860_mfr_write_protect_set(struct i2c_client *client,
+					u8 protect)
+{
+	u8 val;
+
+	switch (protect) {
+	case 0:
+		val = 0xa2;
+		break;
+	case PB_WP_ALL:
+		val = 0x0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return pmbus_write_byte_data(client, -1, SQ24860_MFR_WRITE_PROTECT,
+				     val);
+}
+
+static int sq24860_mfr_write_protect_get(struct i2c_client *client)
+{
+	int ret = pmbus_read_byte_data(client, -1, SQ24860_MFR_WRITE_PROTECT);
+
+	if (ret < 0)
+		return ret;
+
+	return (ret & SQ24860_UNLOCKED) ? 0 : PB_WP_ALL;
+}
+
+static int sq24860_read_word_data(struct i2c_client *client,
+				   int page, int phase, int reg)
+{
+	int ret;
+
+	switch (reg) {
+	case PMBUS_VIRT_READ_VIN_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VIN_PEAK);
+		break;
+
+	case PMBUS_VIRT_READ_VIN_MIN:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VIN_MIN);
+		break;
+
+	case PMBUS_VIRT_READ_VIN_AVG:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VIN_AVG);
+		break;
+
+	case PMBUS_VIRT_READ_VOUT_MIN:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VOUT_MIN);
+		break;
+
+	case PMBUS_VIRT_READ_VOUT_AVG:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VOUT_AVG);
+		break;
+
+	case PMBUS_VIRT_READ_IIN_AVG:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_IIN_AVG);
+		break;
+
+	case PMBUS_VIRT_READ_IIN_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_IIN_PEAK);
+		break;
+
+	case PMBUS_VIRT_READ_TEMP_AVG:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_TEMP_AVG);
+		break;
+
+	case PMBUS_VIRT_READ_TEMP_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_TEMP_PEAK);
+		break;
+
+	case PMBUS_VIRT_READ_PIN_AVG:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_PIN_AVG);
+		break;
+
+	case PMBUS_VIRT_READ_PIN_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_PIN_PEAK);
+		break;
+
+	case PMBUS_VIRT_READ_VMON:
+		ret = pmbus_read_word_data(client, page, phase,
+					   SQ24860_READ_VAUX);
+		if (ret < 0)
+			break;
+		ret = DIV_ROUND_CLOSEST(ret, SQ24860_VAUX_DIV);
+		break;
+
+	case PMBUS_VIN_UV_WARN_LIMIT:
+	case PMBUS_VIN_UV_FAULT_LIMIT:
+	case PMBUS_VIN_OV_WARN_LIMIT:
+	case PMBUS_VIN_OV_FAULT_LIMIT:
+	case PMBUS_VOUT_UV_WARN_LIMIT:
+	case PMBUS_IIN_OC_WARN_LIMIT:
+	case PMBUS_OT_WARN_LIMIT:
+	case PMBUS_OT_FAULT_LIMIT:
+	case PMBUS_PIN_OP_WARN_LIMIT:
+		/*
+		 * These registers provide an 8 bits value instead of a
+		 * 10bits one. Just shifting twice the register value is
+		 * enough to make the sensor type conversion work, even
+		 * if the datasheet provides different m, b and R for
+		 * those.
+		 */
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			break;
+		ret <<= SQ24860_8B_SHIFT;
+		break;
+
+	case PMBUS_IIN_OC_FAULT_LIMIT:
+		/*
+		 * VIREF directly sets the over-current limit at which the eFuse
+		 * will turn the FET off and trigger a fault. Expose it through
+		 * this generic property instead of a manufacturer specific one.
+		 */
+		ret = pmbus_read_byte_data(client, page, SQ24860_VIREF);
+		if (ret < 0)
+			break;
+		ret = DIV_ROUND_CLOSEST(ret * SQ24860_IIN_OCF_NUM,
+					SQ24860_IIN_OCF_DIV);
+		ret += SQ24860_IIN_OCF_OFF;
+		break;
+
+	case PMBUS_VIRT_SAMPLES:
+		ret = pmbus_read_byte_data(client, page, SQ24860_PK_MIN_AVG);
+		if (ret < 0)
+			break;
+		ret = BIT(FIELD_GET(PK_MIN_AVG_AVG_CNT, ret));
+		break;
+
+	case PMBUS_VIRT_RESET_TEMP_HISTORY:
+	case PMBUS_VIRT_RESET_VIN_HISTORY:
+	case PMBUS_VIRT_RESET_IIN_HISTORY:
+	case PMBUS_VIRT_RESET_PIN_HISTORY:
+	case PMBUS_VIRT_RESET_VOUT_HISTORY:
+		ret = 0;
+		break;
+
+	default:
+		ret = -ENODATA;
+		break;
+	}
+
+	return ret;
+}
+
+static int sq24860_write_word_data(struct i2c_client *client,
+				    int page, int reg, u16 value)
+{
+	int ret;
+
+	switch (reg) {
+	case PMBUS_VIN_UV_WARN_LIMIT:
+	case PMBUS_VIN_UV_FAULT_LIMIT:
+	case PMBUS_VIN_OV_WARN_LIMIT:
+	case PMBUS_VIN_OV_FAULT_LIMIT:
+	case PMBUS_VOUT_UV_WARN_LIMIT:
+	case PMBUS_IIN_OC_WARN_LIMIT:
+	case PMBUS_OT_WARN_LIMIT:
+	case PMBUS_OT_FAULT_LIMIT:
+	case PMBUS_PIN_OP_WARN_LIMIT:
+		value >>= SQ24860_8B_SHIFT;
+		value = clamp_val(value, 0, 0xff);
+		ret = pmbus_write_word_data(client, page, reg, value);
+		break;
+
+	case PMBUS_IIN_OC_FAULT_LIMIT:
+		if (value < SQ24860_IIN_OCF_OFF)
+			return -EINVAL;
+		value -= SQ24860_IIN_OCF_OFF;
+		value = DIV_ROUND_CLOSEST(((unsigned int)value) * SQ24860_IIN_OCF_DIV,
+					  SQ24860_IIN_OCF_NUM);
+		value = clamp_val(value, 0, 0x3f);
+		ret = pmbus_write_byte_data(client, page, SQ24860_VIREF, value);
+		break;
+
+	case PMBUS_VIRT_SAMPLES:
+		value = clamp_val(value, 1, SQ24860_MAX_SAMPLES);
+		value = ilog2(value);
+		ret = pmbus_update_byte_data(client, page, SQ24860_PK_MIN_AVG,
+					     PK_MIN_AVG_AVG_CNT,
+					     FIELD_PREP(PK_MIN_AVG_AVG_CNT, value));
+		break;
+
+	case PMBUS_VIRT_RESET_TEMP_HISTORY:
+	case PMBUS_VIRT_RESET_VIN_HISTORY:
+	case PMBUS_VIRT_RESET_IIN_HISTORY:
+	case PMBUS_VIRT_RESET_PIN_HISTORY:
+	case PMBUS_VIRT_RESET_VOUT_HISTORY:
+		/*
+		 * SQ24860 has history resets based on MIN/AVG/PEAK instead of per
+		 * sensor type. Exposing this quirk in hwmon is not desirable so
+		 * reset MIN, AVG and PEAK together. Even is there effectively only
+		 * one reset, which resets everything, expose the 5 entries so
+		 * userspace is not required map a sensor type to another to trigger
+		 * a reset
+		 */
+		ret = pmbus_update_byte_data(client, 0, SQ24860_PK_MIN_AVG,
+					     PK_MIN_AVG_RST_MASK,
+					     PK_MIN_AVG_RST_MASK);
+		break;
+
+	default:
+		ret = -ENODATA;
+		break;
+	}
+
+	return ret;
+}
+
+static int sq24860_read_byte_data(struct i2c_client *client,
+				   int page, int reg)
+{
+	int ret;
+
+	switch (reg) {
+	case PMBUS_WRITE_PROTECT:
+		ret = sq24860_mfr_write_protect_get(client);
+		break;
+
+	default:
+		ret = -ENODATA;
+		break;
+	}
+
+	return ret;
+}
+
+static int sq24860_write_byte_data(struct i2c_client *client,
+				    int page, int reg, u8 byte)
+{
+	int ret;
+
+	switch (reg) {
+	case PMBUS_WRITE_PROTECT:
+		ret = sq24860_mfr_write_protect_set(client, byte);
+		break;
+
+	default:
+		ret = -ENODATA;
+		break;
+	}
+
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_SENSORS_SQ24860_REGULATOR)
+static const struct regulator_desc sq24860_reg_desc[] = {
+	PMBUS_REGULATOR_ONE_NODE("vout"),
+};
+#endif
+
+static const struct pmbus_driver_info sq24860_base_info = {
+	.pages = 1,
+	.format[PSC_VOLTAGE_IN] = direct,
+	.m[PSC_VOLTAGE_IN] = 64,
+	.b[PSC_VOLTAGE_IN] = 0,
+	.R[PSC_VOLTAGE_IN] = 0,
+	.format[PSC_VOLTAGE_OUT] = direct,
+	.m[PSC_VOLTAGE_OUT] = 64,
+	.b[PSC_VOLTAGE_OUT] = 0,
+	.R[PSC_VOLTAGE_OUT] = 0,
+	.format[PSC_TEMPERATURE] = direct,
+	.m[PSC_TEMPERATURE] = 1,
+	.b[PSC_TEMPERATURE] = 0,
+	.R[PSC_TEMPERATURE] = 0,
+/*
+ * Current and power measurements depend on the calibration gain
+ * programmed from the board-specific IMON resistor value.
+ */
+	.format[PSC_CURRENT_IN] = direct,
+	.m[PSC_CURRENT_IN] = 16,
+	.b[PSC_CURRENT_IN] = 0,
+	.R[PSC_CURRENT_IN] = 0,
+	.format[PSC_POWER] = direct,
+	.m[PSC_POWER] = 2,
+	.b[PSC_POWER] = 0,
+	.R[PSC_POWER] = 0,
+	.func[0] = PMBUS_HAVE_VIN |
+		   PMBUS_HAVE_VOUT |
+		   PMBUS_HAVE_VMON |
+		   PMBUS_HAVE_IIN |
+		   PMBUS_HAVE_PIN |
+		   PMBUS_HAVE_TEMP |
+		   PMBUS_HAVE_STATUS_VOUT |
+		   PMBUS_HAVE_STATUS_IOUT |
+		   PMBUS_HAVE_STATUS_INPUT |
+		   PMBUS_HAVE_STATUS_TEMP |
+		   PMBUS_HAVE_SAMPLES,
+	.read_word_data = sq24860_read_word_data,
+	.write_word_data = sq24860_write_word_data,
+	.read_byte_data = sq24860_read_byte_data,
+	.write_byte_data = sq24860_write_byte_data,
+
+#if IS_ENABLED(CONFIG_SENSORS_SQ24860_REGULATOR)
+	.reg_desc = sq24860_reg_desc,
+	.num_regulators = ARRAY_SIZE(sq24860_reg_desc),
+#endif
+};
+
+static const struct i2c_device_id sq24860_i2c_id[] = {
+	{ "sq24860" },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, sq24860_i2c_id);
+
+static const struct of_device_id sq24860_of_match[] = {
+	{ .compatible = "silergy,sq24860" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sq24860_of_match);
+
+static int sq24860_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct pmbus_driver_info *info;
+	u32 rimon;
+	int ret;
+
+	if (device_property_read_u32(dev, "silergy,rimon-micro-ohms", &rimon))
+		rimon = SQ24860_DEFAULT_RIMON;
+	ret = sq24860_write_iin_cal_gain(client, rimon);
+	if (ret < 0)
+		return dev_err_probe(&client->dev, ret,
+					     "Failed to set gain\n");
+	info = devm_kmemdup(dev, &sq24860_base_info, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	return pmbus_do_probe(client, info);
+}
+
+static struct i2c_driver sq24860_driver = {
+	.driver = {
+		.name = "sq24860",
+		.of_match_table = sq24860_of_match,
+	},
+	.probe = sq24860_probe,
+	.id_table = sq24860_i2c_id,
+};
+module_i2c_driver(sq24860_driver);
+
+MODULE_AUTHOR("Ziming Zhu <ziming.zhu@silergycorp.com>");
+MODULE_DESCRIPTION("PMBUS driver for SQ24860 eFuse");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("PMBUS");
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 0/3] Add Silergy SQ24860 support
From: Ziming Zhu @ 2026-06-11  7:43 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Jonathan Corbet,
	Shuah Khan, linux-hwmon, devicetree, linux-kernel, linux-doc,
	Ziming Zhu

From: Ziming Zhu <ziming.zhu@silergycorp.com>

Add devicetree bindings, PMBus hwmon driver support, and documentation
for the Silergy SQ24860 eFuse.

The device provides voltage, current, power, and temperature telemetry.
The driver also supports peak, average, and minimum history reporting,
sample count configuration, and maps the manufacturer-specific VIREF
register to the generic input over-current fault limit attribute.

Changes in v3:
- fix remaining checkpatch issues in the SQ24860 driver
- use C comments consistently in the driver
- drop unused header files
- make GIMON a constant in the gain calculation helper
- use proper 64-bit division for the calibration gain calculation
- return -EINVAL when the calculated gain does not fit
- reject PMBUS_IIN_OC_FAULT_LIMIT values outside the hardware range
- treat malformed silergy,rimon-micro-ohms as an error
- sort sq24860 correctly in Documentation/hwmon/index.rst

Ziming Zhu (3):
  dt-bindings: hwmon: pmbus: Add bindings for Silergy SQ24860
  hwmon: pmbus: Add support for Silergy SQ24860
  hwmon: Add documentation for SQ24860

 .../bindings/hwmon/pmbus/silergy,sq24860.yaml |  74 +++
 Documentation/hwmon/index.rst                 |   1 +
 Documentation/hwmon/sq24860.rst               |  96 ++++
 drivers/hwmon/pmbus/Kconfig                   |  19 +
 drivers/hwmon/pmbus/Makefile                  |   1 +
 drivers/hwmon/pmbus/sq24860.c                 | 430 ++++++++++++++++++
 6 files changed, 621 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml
 create mode 100644 Documentation/hwmon/sq24860.rst
 create mode 100644 drivers/hwmon/pmbus/sq24860.c

-- 
2.25.1


^ permalink raw reply

* [PATCH v3 3/3] hwmon: Add documentation for SQ24860
From: Ziming Zhu @ 2026-06-11  7:43 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Jonathan Corbet,
	Shuah Khan, linux-hwmon, devicetree, linux-kernel, linux-doc,
	Ziming Zhu
In-Reply-To: <20260611074335.4415-1-zmzhu0630@163.com>

From: Ziming Zhu <ziming.zhu@silergycorp.com>

Document the supported sysfs attributes for the Silergy SQ24860 PMBus
hwmon driver.

Signed-off-by: Ziming Zhu <ziming.zhu@silergycorp.com>
---
 Documentation/hwmon/index.rst   |  1 +
 Documentation/hwmon/sq24860.rst | 96 +++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 Documentation/hwmon/sq24860.rst

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8b655e5d6b68..6184b88e2095 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -243,6 +243,7 @@ Hardware Monitoring Kernel Drivers
    smsc47m1
    sparx5-temp
    spd5118
+   sq24860
    stpddc60
    surface_fan
    sy7636a-hwmon
diff --git a/Documentation/hwmon/sq24860.rst b/Documentation/hwmon/sq24860.rst
new file mode 100644
index 000000000000..f0182b955d8a
--- /dev/null
+++ b/Documentation/hwmon/sq24860.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver sq24860
+=====================
+
+Supported chips:
+
+  * Silergy SQ24860
+
+    Prefix: 'sq24860'
+
+Author:
+
+	Ziming Zhu <ziming.zhu@silergycorp.com>
+
+Description
+------------
+
+This driver implements support for the Silergy SQ24860 eFuse. The device is an
+integrated circuit protection and power management device with a PMBus
+interface.
+
+The device supports direct format for reading input voltage, output voltage,
+auxiliary voltage, input current, input power, and temperature.
+
+The current and power measurement scale depends on the resistor connected
+between the IMON pin and ground. The resistor value can be configured with the
+``silergy,rimon-micro-ohms`` device tree property. See
+``Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml`` for details.
+
+Due to the specificities of the chip, all history reset attributes are tied
+together. Resetting the history of one sensor resets the history of all sensors.
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+======================= ======================================================
+in1_label               "vin"
+in1_input               Measured input voltage.
+in1_average             Average measured input voltage.
+in1_min                 Minimum input voltage limit.
+in1_lcrit               Critical low input voltage limit.
+in1_max                 Maximum input voltage limit.
+in1_crit                Critical high input voltage limit.
+in1_min_alarm           Input voltage low warning alarm.
+in1_lcrit_alarm         Input voltage low fault alarm.
+in1_max_alarm           Input voltage high warning alarm.
+in1_crit_alarm          Input voltage high fault alarm.
+in1_highest             Historical maximum input voltage.
+in1_lowest              Historical minimum input voltage.
+in1_reset_history       Write any value to reset history.
+
+in2_label               "vmon"
+in2_input               Measured auxiliary input voltage.
+
+in3_label               "vout1"
+in3_input               Measured output voltage.
+in3_average             Average measured output voltage.
+in3_min                 Minimum output voltage limit.
+in3_min_alarm           Output voltage low alarm.
+in3_lowest              Historical minimum output voltage.
+in3_reset_history       Write any value to reset history.
+
+curr1_label             "iin"
+curr1_input             Measured input current.
+curr1_average           Average measured input current.
+curr1_max               Maximum input current warning limit.
+curr1_crit              Critical input over-current fault limit.
+curr1_max_alarm         Input current warning alarm.
+curr1_crit_alarm        Input over-current fault alarm.
+curr1_highest           Historical maximum input current.
+curr1_reset_history     Write any value to reset history.
+
+power1_label            "pin"
+power1_input            Measured input power.
+power1_average          Average measured input power.
+power1_max              Maximum input power warning limit.
+power1_alarm            Input power warning alarm.
+power1_input_highest    Historical maximum input power.
+power1_reset_history    Write any value to reset history.
+
+temp1_input             Measured temperature.
+temp1_average           Average measured temperature.
+temp1_max               Maximum temperature warning limit.
+temp1_crit              Critical temperature fault limit.
+temp1_max_alarm         Temperature warning alarm.
+temp1_crit_alarm        Temperature fault alarm.
+temp1_highest           Historical maximum temperature.
+temp1_reset_history     Write any value to reset history.
+
+samples                 Number of samples used for average values.
+======================= ======================================================
+
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 1/3] dt-bindings: hwmon: pmbus: Add bindings for Silergy SQ24860
From: Ziming Zhu @ 2026-06-11  7:43 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Jonathan Corbet,
	Shuah Khan, linux-hwmon, devicetree, linux-kernel, linux-doc,
	Ziming Zhu
In-Reply-To: <20260611074335.4415-1-zmzhu0630@163.com>

From: Ziming Zhu <ziming.zhu@silergycorp.com>

Add devicetree binding documentation for the Silergy SQ24860 eFuse.

The device is a PMBus hardware monitoring device which reports voltage,
current, power, and temperature telemetry. The board-specific IMON
resistor value is described with silergy,rimon-micro-ohms.

Signed-off-by: Ziming Zhu <ziming.zhu@silergycorp.com>
---
 .../bindings/hwmon/pmbus/silergy,sq24860.yaml | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml

diff --git a/Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml b/Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml
new file mode 100644
index 000000000000..03ef82c11e1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/pmbus/silergy,sq24860.yaml
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+
+$id: http://devicetree.org/schemas/hwmon/pmbus/silergy,sq24860.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Silergy SQ24860 eFuse
+
+maintainers:
+  - Ziming Zhu <ziming.zhu@silergycorp.com>
+
+description:
+  The Silergy SQ24860 is an integrated, high-current circuit protection and
+  power management device with PMBus interface.
+
+properties:
+  compatible:
+    const: silergy,sq24860
+
+  reg:
+    maxItems: 1
+
+  silergy,rimon-micro-ohms:
+    description:
+      Micro-ohms value of the resistance installed between the IMON pin and
+      the ground reference.
+
+  interrupts:
+    description: PMBus SMBAlert interrupt.
+    maxItems: 1
+
+  regulators:
+    type: object
+    description:
+      List of regulators provided by this controller.
+
+    properties:
+      vout:
+        $ref: /schemas/regulator/regulator.yaml#
+        type: object
+        unevaluatedProperties: false
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - silergy,rimon-micro-ohms
+
+additionalProperties: false
+
+examples:
+  - |
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        hw-monitor@40 {
+            compatible = "silergy,sq24860";
+            reg = <0x40>;
+
+            interrupt-parent = <&gpio>;
+            interrupts = <42 8>;
+            silergy,rimon-micro-ohms = <1600000000>;
+
+            regulators {
+                cpu0_vout: vout {
+                    regulator-name = "main_cpu0";
+                };
+            };
+        };
+    };
-- 
2.25.1


^ permalink raw reply related

* Re: [RFC PATCH 0/5] mm/slub: preserve previous object lifetime
From: Harry Yoo @ 2026-06-11  7:19 UTC (permalink / raw)
  To: Pengpeng Hou, Vlastimil Babka, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

[-- Attachment #1.1: Type: text/plain, Size: 1476 bytes --]

Hi Pengpeng,

On 6/11/26 3:39 PM, Pengpeng Hou wrote:
> SLAB_STORE_USER currently stores one allocation track and one free track
> for an object. This is useful, but it loses part of the previous lifetime
> when the object is reused: the new allocation overwrites the allocation
> track, and a later stale free can overwrite the free track.

I'm not sure what you meant by "stale free", UAF is accessing object
that are freed. What makes the free "stale"?

In general, I don't think slab_debug=UP is the right tool to debug
use-after-frees, because slab will never know _when_ the object was
overwritten. It can only tell that somebody has overwritten freed
objects by checking if the object content is POISON_FREE or POISON_END.

KASAN is a better tool to debug use-after-frees, because it can
tell you which kernel code is accessing memory it shouldn't. (It also
quarantines slab objects to avoid immediately reusing the object for
better coverage).

So I have to ask, "Why not use KASAN instead?" before enhancing
slab_debug (neither is intended for production anyway).

> For free-after-reuse bugs, the report can therefore contain the victim
> allocation and the stale free, while the earlier alloc/free pair that
> explains where the stale pointer came from is no longer available.

Again, I'm confused. I have no idea what "free-after-reuse" means.
Objects cannot be reused until they are not freed, no?

-- 
Cheers,
Harry / Hyeonggon

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* Re: [RFC V2 1/3] lib/vsprintf: Add support for pgtable entries
From: Andy Shevchenko @ 2026-06-11  7:17 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Usama Arif, linux-mm, Rasmus Villemoes, Sergey Senozhatsky,
	Petr Mladek, Steven Rostedt, Jonathan Corbet, Andrew Morton,
	David Hildenbrand, linux-kernel, linux-doc, David Hildenbrand,
	Lorenzo Stoakes, Andy Whitcroft
In-Reply-To: <919d334b-16a3-4412-82f4-b4cd6a35be0d@arm.com>

On Thu, Jun 11, 2026 at 10:45:01AM +0530, Anshuman Khandual wrote:
> On 10/06/26 4:43 PM, Usama Arif wrote:
> > On Wed, 10 Jun 2026 05:35:43 +0100 Anshuman Khandual <anshuman.khandual@arm.com> wrote:

...

> >> +		static_assert(sizeof(pte_t) == 4 ||
> >> +			      sizeof(pte_t) == 8,
> >> +			      "pte_t size must be 4 or 8 bytes");

Besides occupying too many lines, why are these static asserts hidden here and
not declared in the global space? More wide Q is why they are needed at all?

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply

* [PATCH v2 0/4] mm: split the file's i_mmap tree for NUMA
From: Huang Shijie @ 2026-06-11  6:18 UTC (permalink / raw)
  To: akpm, viro, brauner, jack, muchun.song, osalvador, david
  Cc: surenb, mjguzik, liam, ljs, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei, Huang Shijie

  In NUMA, there are maybe many NUMA nodes and many CPUs.
For example, a Hygon's server has 12 NUMA nodes, and 384 CPUs.
In the UnixBench tests, there is a test "execl" which tests
the execve system call.

  When we test our server with "./Run -c 384 execl",
the test result is not good enough. The i_mmap locks contended heavily on
"libc.so" and "ld.so". For example, the i_mmap tree for "libc.so" can have 
over 6000 VMAs, all the VMAs can be in different NUMA mode.
The insert/remove operations do not run quickly enough.

patch 1 & patch 2 are try to hide the direct access of i_mmap.
patch 3 splits the i_mmap into sibling trees, each tree has separate lock,
and we can get better performance with this patch set in our NUMA server:
    we can get over 400% performance improvement.

I did not test the non-NUMA case, since I do not have such server.    
    
v1 --> v2:
	Not only split the immap tree, but also split the lock.
	v1 : https://lkml.org/lkml/2026/4/13/199

Huang Shijie (4):
  mm: use mapping_mapped to simplify the code
  mm: use get_i_mmap_root to access the file's i_mmap
  mm/fs: split the file's i_mmap tree
  docs/mm: update document for split i_mmap tree

 Documentation/mm/process_addrs.rst |  63 +++++++---
 arch/arm/mm/fault-armv.c           |   3 +-
 arch/arm/mm/flush.c                |   3 +-
 arch/nios2/mm/cacheflush.c         |   3 +-
 arch/parisc/kernel/cache.c         |   4 +-
 fs/Kconfig                         |   8 ++
 fs/dax.c                           |   3 +-
 fs/hugetlbfs/inode.c               |  30 +++--
 fs/inode.c                         |  75 +++++++++++-
 include/linux/fs.h                 | 179 ++++++++++++++++++++++++++++-
 include/linux/mm.h                 |  81 +++++++++++++
 include/linux/mm_types.h           |   3 +
 kernel/events/uprobes.c            |   3 +-
 mm/hugetlb.c                       |   7 +-
 mm/internal.h                      |   3 +-
 mm/khugepaged.c                    |   6 +-
 mm/memory-failure.c                |   8 +-
 mm/memory.c                        |   8 +-
 mm/mmap.c                          |  11 +-
 mm/nommu.c                         |  28 +++--
 mm/pagewalk.c                      |   4 +-
 mm/rmap.c                          |   2 +-
 mm/vma.c                           |  74 +++++++++---
 mm/vma_init.c                      |   3 +
 24 files changed, 534 insertions(+), 78 deletions(-)

-- 
2.53.0



^ permalink raw reply

* [PATCH v2 2/4] mm: use get_i_mmap_root to access the file's i_mmap
From: Huang Shijie @ 2026-06-11  6:18 UTC (permalink / raw)
  To: akpm, viro, brauner, jack, muchun.song, osalvador, david
  Cc: surenb, mjguzik, liam, ljs, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei, Huang Shijie
In-Reply-To: <20260611061915.2354307-1-huangsj@hygon.cn>

Do not access the file's i_mmap directly, use get_i_mmap_root()
to access it. This patch makes preparations for later patch.

Signed-off-by: Huang Shijie <huangsj@hygon.cn>
---
 arch/arm/mm/fault-armv.c   |  3 ++-
 arch/arm/mm/flush.c        |  3 ++-
 arch/nios2/mm/cacheflush.c |  3 ++-
 arch/parisc/kernel/cache.c |  4 +++-
 fs/dax.c                   |  3 ++-
 fs/hugetlbfs/inode.c       |  6 +++---
 include/linux/fs.h         |  5 +++++
 include/linux/mm.h         |  1 +
 kernel/events/uprobes.c    |  3 ++-
 mm/hugetlb.c               |  7 +++++--
 mm/khugepaged.c            |  6 ++++--
 mm/memory-failure.c        |  8 +++++---
 mm/memory.c                |  4 ++--
 mm/mmap.c                  |  2 +-
 mm/nommu.c                 |  9 +++++----
 mm/pagewalk.c              |  2 +-
 mm/rmap.c                  |  2 +-
 mm/vma.c                   | 14 ++++++++------
 18 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 91e488767783..1b5fe151e805 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -126,6 +126,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 {
 	const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE);
 	const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE;
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *mpnt;
 	unsigned long offset;
@@ -140,7 +141,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 	 * cache coherency.
 	 */
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, root, pgoff, pgoff) {
 		/*
 		 * If we are using split PTE locks, then we need to take the pte
 		 * lock. Otherwise we are using shared mm->page_table_lock which
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 4d7ef5cc36b6..01588df81bfc 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -238,6 +238,7 @@ void __flush_dcache_folio(struct address_space *mapping, struct folio *folio)
 static void __flush_dcache_aliases(struct address_space *mapping, struct folio *folio)
 {
 	struct mm_struct *mm = current->active_mm;
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	struct vm_area_struct *vma;
 	pgoff_t pgoff, pgoff_end;
 
@@ -251,7 +252,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct folio *
 	pgoff_end = pgoff + folio_nr_pages(folio) - 1;
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff_end) {
+	vma_interval_tree_foreach(vma, root, pgoff, pgoff_end) {
 		unsigned long start, offset, pfn;
 		unsigned int nr;
 
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 8321182eb927..ab6e064fabe2 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -78,11 +78,12 @@ static void flush_aliases(struct address_space *mapping, struct folio *folio)
 	unsigned long flags;
 	pgoff_t pgoff;
 	unsigned long nr = folio_nr_pages(folio);
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 
 	pgoff = folio->index;
 
 	flush_dcache_mmap_lock_irqsave(mapping, flags);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff + nr - 1) {
+	vma_interval_tree_foreach(vma, root, pgoff, pgoff + nr - 1) {
 		unsigned long start;
 
 		if (vma->vm_mm != mm)
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 0170b69a21d3..f99dffd6cc22 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -473,6 +473,7 @@ static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr)
 void flush_dcache_folio(struct folio *folio)
 {
 	struct address_space *mapping = folio_flush_mapping(folio);
+	struct rb_root_cached *root;
 	struct vm_area_struct *vma;
 	unsigned long addr, old_addr = 0;
 	void *kaddr;
@@ -494,6 +495,7 @@ void flush_dcache_folio(struct folio *folio)
 		return;
 
 	pgoff = folio->index;
+	root = get_i_mmap_root(mapping);
 
 	/*
 	 * We have carefully arranged in arch_get_unmapped_area() that
@@ -503,7 +505,7 @@ void flush_dcache_folio(struct folio *folio)
 	 * on machines that support equivalent aliasing
 	 */
 	flush_dcache_mmap_lock_irqsave(mapping, flags);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff + nr - 1) {
+	vma_interval_tree_foreach(vma, root, pgoff, pgoff + nr - 1) {
 		unsigned long offset = pgoff - vma->vm_pgoff;
 		unsigned long pfn = folio_pfn(folio);
 
diff --git a/fs/dax.c b/fs/dax.c
index 6d175cd47a99..d402edc3c1b8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1138,6 +1138,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 		struct address_space *mapping, void *entry)
 {
 	unsigned long pfn, index, count, end;
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	long ret = 0;
 	struct vm_area_struct *vma;
 
@@ -1201,7 +1202,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 
 	/* Walk all mappings of a given index of a file and writeprotect them */
 	i_mmap_lock_read(mapping);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, index, end) {
+	vma_interval_tree_foreach(vma, root, index, end) {
 		pfn_mkclean_range(pfn, count, index, vma);
 		cond_resched();
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 216e1a0dd0b2..da5b41ea5bdd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,7 +380,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h,
 					struct address_space *mapping,
 					struct folio *folio, pgoff_t index)
 {
-	struct rb_root_cached *root = &mapping->i_mmap;
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	struct hugetlb_vma_lock *vma_lock;
 	unsigned long pfn = folio_pfn(folio);
 	struct vm_area_struct *vma;
@@ -615,7 +615,7 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	i_size_write(inode, offset);
 	i_mmap_lock_write(mapping);
 	if (mapping_mapped(mapping))
-		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
+		hugetlb_vmdelete_list(get_i_mmap_root(mapping), pgoff, 0,
 				      ZAP_FLAG_DROP_MARKER);
 	i_mmap_unlock_write(mapping);
 	remove_inode_hugepages(inode, offset, LLONG_MAX);
@@ -676,7 +676,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	/* Unmap users of full pages in the hole. */
 	if (hole_end > hole_start) {
 		if (mapping_mapped(mapping))
-			hugetlb_vmdelete_list(&mapping->i_mmap,
+			hugetlb_vmdelete_list(get_i_mmap_root(mapping),
 					      hole_start >> PAGE_SHIFT,
 					      hole_end >> PAGE_SHIFT, 0);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 11559c513dfb..cd46615b8f53 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -556,6 +556,11 @@ static inline int mapping_mapped(const struct address_space *mapping)
 	return	!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
 }
 
+static inline struct rb_root_cached *get_i_mmap_root(struct address_space *mapping)
+{
+	return &mapping->i_mmap;
+}
+
 /*
  * Might pages of this file have been modified in userspace?
  * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 06bbe9eba636..0a45c6a8b9f2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4041,6 +4041,7 @@ struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root,
 struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
 				unsigned long start, unsigned long last);
 
+/* Please use get_i_mmap_root() to get the @root */
 #define vma_interval_tree_foreach(vma, root, start, last)		\
 	for (vma = vma_interval_tree_iter_first(root, start, last);	\
 	     vma; vma = vma_interval_tree_iter_next(vma, start, last))
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4084e926e284..d8561a42aec8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1201,6 +1201,7 @@ static inline struct map_info *free_map_info(struct map_info *info)
 static struct map_info *
 build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 {
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	unsigned long pgoff = offset >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
 	struct map_info *curr = NULL;
@@ -1210,7 +1211,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 
  again:
 	i_mmap_lock_read(mapping);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, root, pgoff, pgoff) {
 		if (!valid_vma(vma, is_register))
 			continue;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4b80b167cc9c..8bc49d57a116 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5360,6 +5360,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct hstate *h = hstate_vma(vma);
 	struct vm_area_struct *iter_vma;
 	struct address_space *mapping;
+	struct rb_root_cached *root;
 	pgoff_t pgoff;
 
 	/*
@@ -5370,6 +5371,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
 	mapping = vma->vm_file->f_mapping;
+	root = get_i_mmap_root(mapping);
 
 	/*
 	 * Take the mapping lock for the duration of the table walk. As
@@ -5377,7 +5379,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
 	i_mmap_lock_write(mapping);
-	vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(iter_vma, root, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
 			continue;
@@ -6850,6 +6852,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long addr, pud_t *pud)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
 	struct vm_area_struct *svma;
@@ -6858,7 +6861,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t *pte;
 
 	i_mmap_lock_read(mapping);
-	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
+	vma_interval_tree_foreach(svma, root, idx, idx) {
 		if (svma == vma)
 			continue;
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b8452dbdb043..0f577e4a2ccd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1773,10 +1773,11 @@ static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)
 
 static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 {
+	struct rb_root_cached *root = get_i_mmap_root(mapping);
 	struct vm_area_struct *vma;
 
 	i_mmap_lock_read(mapping);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, root, pgoff, pgoff) {
 		struct mmu_notifier_range range;
 		struct mm_struct *mm;
 		unsigned long addr;
@@ -2194,7 +2195,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
 		 * not be able to observe any missing pages due to the
 		 * previously inserted retry entries.
 		 */
-		vma_interval_tree_foreach(vma, &mapping->i_mmap, start, end) {
+		vma_interval_tree_foreach(vma, get_i_mmap_root(mapping),
+					start, end) {
 			if (userfaultfd_missing(vma)) {
 				result = SCAN_EXCEED_NONE_PTE;
 				goto immap_locked;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ee42d4361309..85196d9bb26c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -598,7 +598,7 @@ static void collect_procs_file(const struct folio *folio,
 
 		if (!t)
 			continue;
-		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
+		vma_interval_tree_foreach(vma, get_i_mmap_root(mapping), pgoff,
 				      pgoff) {
 			/*
 			 * Send early kill signal to tasks where a vma covers
@@ -650,7 +650,8 @@ static void collect_procs_fsdax(const struct page *page,
 			t = task_early_kill(tsk, true);
 		if (!t)
 			continue;
-		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+		vma_interval_tree_foreach(vma, get_i_mmap_root(mapping), pgoff,
+					pgoff) {
 			if (vma->vm_mm == t->mm)
 				add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
 		}
@@ -2251,7 +2252,8 @@ static void collect_procs_pfn(struct pfn_address_space *pfn_space,
 		t = task_early_kill(tsk, true);
 		if (!t)
 			continue;
-		vma_interval_tree_foreach(vma, &mapping->i_mmap, 0, ULONG_MAX) {
+		vma_interval_tree_foreach(vma, get_i_mmap_root(mapping),
+					0, ULONG_MAX) {
 			pgoff_t pgoff;
 
 			if (vma->vm_mm == t->mm &&
diff --git a/mm/memory.c b/mm/memory.c
index 5335077765e2..9ea5d6c8ef4d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4387,7 +4387,7 @@ void unmap_mapping_folio(struct folio *folio)
 
 	i_mmap_lock_read(mapping);
 	if (unlikely(mapping_mapped(mapping)))
-		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+		unmap_mapping_range_tree(get_i_mmap_root(mapping), first_index,
 					 last_index, &details);
 	i_mmap_unlock_read(mapping);
 }
@@ -4417,7 +4417,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 
 	i_mmap_lock_read(mapping);
 	if (unlikely(mapping_mapped(mapping)))
-		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+		unmap_mapping_range_tree(get_i_mmap_root(mapping), first_index,
 					 last_index, &details);
 	i_mmap_unlock_read(mapping);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 5754d1c36462..d714fdb357e5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1831,7 +1831,7 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_interval_tree_insert_after(tmp, mpnt,
-					&mapping->i_mmap);
+					get_i_mmap_root(mapping));
 			flush_dcache_mmap_unlock(mapping);
 			i_mmap_unlock_write(mapping);
 		}
diff --git a/mm/nommu.c b/mm/nommu.c
index ed3934bc2de4..0f18ffc658e9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -569,7 +569,7 @@ static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
 
 		i_mmap_lock_write(mapping);
 		flush_dcache_mmap_lock(mapping);
-		vma_interval_tree_insert(vma, &mapping->i_mmap);
+		vma_interval_tree_insert(vma, get_i_mmap_root(mapping));
 		flush_dcache_mmap_unlock(mapping);
 		i_mmap_unlock_write(mapping);
 	}
@@ -585,7 +585,7 @@ static void cleanup_vma_from_mm(struct vm_area_struct *vma)
 
 		i_mmap_lock_write(mapping);
 		flush_dcache_mmap_lock(mapping);
-		vma_interval_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, get_i_mmap_root(mapping));
 		flush_dcache_mmap_unlock(mapping);
 		i_mmap_unlock_write(mapping);
 	}
@@ -1804,6 +1804,7 @@ EXPORT_SYMBOL_GPL(copy_remote_vm_str);
 int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 				size_t newsize)
 {
+	struct rb_root_cached *root = get_i_mmap_root(&inode->i_mapping);
 	struct vm_area_struct *vma;
 	struct vm_region *region;
 	pgoff_t low, high;
@@ -1816,7 +1817,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	i_mmap_lock_read(inode->i_mapping);
 
 	/* search for VMAs that fall within the dead zone */
-	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
+	vma_interval_tree_foreach(vma, root, low, high) {
 		/* found one - only interested if it's shared out of the page
 		 * cache */
 		if (vma->vm_flags & VM_SHARED) {
@@ -1832,7 +1833,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	 * we don't check for any regions that start beyond the EOF as there
 	 * shouldn't be any
 	 */
-	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) {
+	vma_interval_tree_foreach(vma, root, 0, ULONG_MAX) {
 		if (!(vma->vm_flags & VM_SHARED))
 			continue;
 
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 3ae2586ff45b..8df1b5077951 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -810,7 +810,7 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
 		return -EINVAL;
 
 	lockdep_assert_held(&mapping->i_mmap_rwsem);
-	vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
+	vma_interval_tree_foreach(vma, get_i_mmap_root(mapping), first_index,
 				  first_index + nr - 1) {
 		/* Clip to the vma */
 		vba = vma->vm_pgoff;
diff --git a/mm/rmap.c b/mm/rmap.c
index 99e1b3dc390b..6cfcdb96071f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -3051,7 +3051,7 @@ static void __rmap_walk_file(struct folio *folio, struct address_space *mapping,
 		i_mmap_lock_read(mapping);
 	}
 lookup:
-	vma_interval_tree_foreach(vma, &mapping->i_mmap,
+	vma_interval_tree_foreach(vma, get_i_mmap_root(mapping),
 			pgoff_start, pgoff_end) {
 		unsigned long address = vma_address(vma, pgoff_start, nr_pages);
 
diff --git a/mm/vma.c b/mm/vma.c
index d90791b00a7b..6159650c1b42 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -234,7 +234,7 @@ static void __vma_link_file(struct vm_area_struct *vma,
 		mapping_allow_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_insert(vma, &mapping->i_mmap);
+	vma_interval_tree_insert(vma, get_i_mmap_root(mapping));
 	flush_dcache_mmap_unlock(mapping);
 }
 
@@ -248,7 +248,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		mapping_unmap_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_remove(vma, &mapping->i_mmap);
+	vma_interval_tree_remove(vma, get_i_mmap_root(mapping));
 	flush_dcache_mmap_unlock(mapping);
 }
 
@@ -319,10 +319,11 @@ static void vma_prepare(struct vma_prepare *vp)
 
 	if (vp->file) {
 		flush_dcache_mmap_lock(vp->mapping);
-		vma_interval_tree_remove(vp->vma, &vp->mapping->i_mmap);
+		vma_interval_tree_remove(vp->vma,
+					get_i_mmap_root(vp->mapping));
 		if (vp->adj_next)
 			vma_interval_tree_remove(vp->adj_next,
-						 &vp->mapping->i_mmap);
+					get_i_mmap_root(vp->mapping));
 	}
 
 }
@@ -341,8 +342,9 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
 	if (vp->file) {
 		if (vp->adj_next)
 			vma_interval_tree_insert(vp->adj_next,
-						 &vp->mapping->i_mmap);
-		vma_interval_tree_insert(vp->vma, &vp->mapping->i_mmap);
+					get_i_mmap_root(vp->mapping));
+		vma_interval_tree_insert(vp->vma,
+					get_i_mmap_root(vp->mapping));
 		flush_dcache_mmap_unlock(vp->mapping);
 	}
 
-- 
2.53.0



^ permalink raw reply related

* [PATCH v2 1/4] mm: use mapping_mapped to simplify the code
From: Huang Shijie @ 2026-06-11  6:18 UTC (permalink / raw)
  To: akpm, viro, brauner, jack, muchun.song, osalvador, david
  Cc: surenb, mjguzik, liam, ljs, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei, Huang Shijie
In-Reply-To: <20260611061915.2354307-1-huangsj@hygon.cn>

Use mapping_mapped() to simplify the code, make
the code tidy and clean.

Signed-off-by: Huang Shijie <huangsj@hygon.cn>
---
 fs/hugetlbfs/inode.c | 4 ++--
 mm/memory.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 78d61bf2bd9b..216e1a0dd0b2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -614,7 +614,7 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 
 	i_size_write(inode, offset);
 	i_mmap_lock_write(mapping);
-	if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
+	if (mapping_mapped(mapping))
 		hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
 				      ZAP_FLAG_DROP_MARKER);
 	i_mmap_unlock_write(mapping);
@@ -675,7 +675,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
 	/* Unmap users of full pages in the hole. */
 	if (hole_end > hole_start) {
-		if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
+		if (mapping_mapped(mapping))
 			hugetlb_vmdelete_list(&mapping->i_mmap,
 					      hole_start >> PAGE_SHIFT,
 					      hole_end >> PAGE_SHIFT, 0);
diff --git a/mm/memory.c b/mm/memory.c
index 86a973119bd4..5335077765e2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4386,7 +4386,7 @@ void unmap_mapping_folio(struct folio *folio)
 	details.zap_flags = ZAP_FLAG_DROP_MARKER;
 
 	i_mmap_lock_read(mapping);
-	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+	if (unlikely(mapping_mapped(mapping)))
 		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
 					 last_index, &details);
 	i_mmap_unlock_read(mapping);
@@ -4416,7 +4416,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 		last_index = ULONG_MAX;
 
 	i_mmap_lock_read(mapping);
-	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+	if (unlikely(mapping_mapped(mapping)))
 		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
 					 last_index, &details);
 	i_mmap_unlock_read(mapping);
-- 
2.53.0



^ permalink raw reply related

* [PATCH] Documentation: process: fix brackets
From: Manuel Ebner @ 2026-06-11  6:43 UTC (permalink / raw)
  To: Jonathan Corbet, Shuah Khan, open list:DOCUMENTATION PROCESS,
	open list:DOCUMENTATION, open list
  Cc: Manuel Ebner

Fix missing ')' and needless ')'

Signed-off-by: Manuel Ebner <manuelebner@mailbox.org>
---
This is the first patch of a 'series', but I won't send them together
because I'm still producing the patches and it will take me a couple weeks.
 Documentation/process/deprecated.rst     | 2 +-
 Documentation/process/maintainer-soc.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index ac75b7ecac47..03de71f654c7 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -388,7 +388,7 @@ allocations. For example, these open coded assignments::
 	ptr = kmalloc_array(count, sizeof(*ptr), gfp);
 	ptr = kcalloc(count, sizeof(*ptr), gfp);
 	ptr = kmalloc(struct_size(ptr, flex_member, count), gfp);
-	ptr = kmalloc(sizeof(struct foo, gfp);
+	ptr = kmalloc(sizeof(struct foo), gfp);
 
 become, respectively::
 
diff --git a/Documentation/process/maintainer-soc.rst b/Documentation/process/maintainer-soc.rst
index a3a90a7d4c68..fa91dfc53783 100644
--- a/Documentation/process/maintainer-soc.rst
+++ b/Documentation/process/maintainer-soc.rst
@@ -60,7 +60,7 @@ All typical platform related patches should be sent via SoC submaintainers
 shared defconfigs. Note that scripts/get_maintainer.pl might not provide
 correct addresses for the shared defconfig, so ignore its output and manually
 create CC-list based on MAINTAINERS file or use something like
-``scripts/get_maintainer.pl -f drivers/soc/FOO/``).
+``scripts/get_maintainer.pl -f drivers/soc/FOO/``.
 
 Submitting Patches to the Main SoC Maintainers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
2.54.0


^ permalink raw reply related

* [RFC PATCH 0/5] mm/slub: preserve previous object lifetime
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou

SLAB_STORE_USER currently stores one allocation track and one free track
for an object. This is useful, but it loses part of the previous lifetime
when the object is reused: the new allocation overwrites the allocation
track, and a later stale free can overwrite the free track.

For free-after-reuse bugs, the report can therefore contain the victim
allocation and the stale free, while the earlier alloc/free pair that
explains where the stale pointer came from is no longer available.

This RFC adds an opt-in SLUB debug option to keep one previous completed
object lifetime. The option is disabled by default, is not part of the
default debug flags, and only takes effect when user tracking is already
enabled:

  slab_debug=UH,kmalloc-128

The series intentionally does not attempt to infer semantic ownership or
identify the root cause of a use-after-free. It only preserves and prints
additional track records that SLUB already knows how to collect.

This is sent as RFC because the user-visible interface and the cost/benefit
tradeoff should be agreed on before this becomes a normal patch series.
In particular, feedback would be useful on:

- whether a separate H option is preferable to extending U directly
- whether H should require U, as implemented here, or imply U
- whether the extra per-object metadata is useful enough for this debug path

Not included yet:

- KUnit coverage or a standalone reproducer
- object-size/order comparison data for representative caches
- runtime benchmark data for slab_debug=U vs slab_debug=UH

Those should be added before a non-RFC submission if the direction looks
acceptable.

Pengpeng Hou (5):
  mm/slub: factor user tracking metadata size calculation
  mm/slub: add optional previous lifetime user tracking
  mm/slub: print previous object lifetime in debug reports
  Documentation/mm: document SLUB previous lifetime tracking
  mm/slub: sanitize previous lifetime tracking flags

 Documentation/admin-guide/mm/slab.rst |  22 ++++-
 include/linux/slab.h                  |   3 +
 mm/slab.h                             |   3 +-
 mm/slub.c                             | 118 ++++++++++++++++++++++----
 4 files changed, 128 insertions(+), 18 deletions(-)

-- 
2.50.1 (Apple Git-155)

^ permalink raw reply

* [RFC PATCH 2/5] mm/slub: add optional previous lifetime user tracking
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

SLAB_STORE_USER currently reserves two per-object tracks, one for the
allocation and one for the free. Add an opt-in SLAB_STORE_HISTORY flag
that extends the user tracking area to hold one previous completed
lifetime.

Expose the option as slab_debug=H, but require it to be used together
with U. This avoids silently enabling user tracking and its stack depot
cost when a user only requested H. The option is not part of the default
debug flags.

No history is recorded or printed yet; this only adds the flag, the
object metadata layout, and the sysfs state file.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 include/linux/slab.h |  3 +++
 mm/slab.h            |  3 ++-
 mm/slub.c            | 36 +++++++++++++++++++++++++++++++-----
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2b5ab488e96b..78b9ec5bc17a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -32,6 +32,7 @@ enum _slab_flag_bits {
 	_SLAB_CACHE_DMA,
 	_SLAB_CACHE_DMA32,
 	_SLAB_STORE_USER,
+	_SLAB_STORE_HISTORY,
 	_SLAB_PANIC,
 	_SLAB_TYPESAFE_BY_RCU,
 	_SLAB_TRACE,
@@ -98,6 +99,8 @@ enum _slab_flag_bits {
 #define SLAB_CACHE_DMA32	__SLAB_FLAG_BIT(_SLAB_CACHE_DMA32)
 /* DEBUG: Store the last owner for bug hunting */
 #define SLAB_STORE_USER		__SLAB_FLAG_BIT(_SLAB_STORE_USER)
+/* DEBUG: Store the previous object lifetime for bug hunting */
+#define SLAB_STORE_HISTORY	__SLAB_FLAG_BIT(_SLAB_STORE_HISTORY)
 /* Panic if kmem_cache_create() fails */
 #define SLAB_PANIC		__SLAB_FLAG_BIT(_SLAB_PANIC)
 /**
diff --git a/mm/slab.h b/mm/slab.h
index bf2f87acf5e3..a6af35829f79 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -417,7 +417,8 @@ void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
 			 SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
 
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
+			  SLAB_STORE_HISTORY | SLAB_TRACE | \
+			  SLAB_CONSISTENCY_CHECKS)
 
 #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS)
 
diff --git a/mm/slub.c b/mm/slub.c
index a9114dddc976..803c597351ce 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -277,7 +277,7 @@ void *fixup_red_left(struct kmem_cache *s, void *p)
  * issues when checking or reading debug information
  */
 #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
-				SLAB_TRACE)
+				SLAB_STORE_HISTORY | SLAB_TRACE)
 
 
 /*
@@ -285,7 +285,8 @@ void *fixup_red_left(struct kmem_cache *s, void *p)
  * disabled when slab_debug=O is used and a cache's min order increases with
  * metadata.
  */
-#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | \
+			      SLAB_STORE_USER | SLAB_STORE_HISTORY)
 
 #define OO_SHIFT	16
 #define OO_MASK		((1 << OO_SHIFT) - 1)
@@ -316,14 +317,23 @@ struct track {
 	unsigned long when;	/* When did the operation occur */
 };
 
-enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_NR };
+enum track_item {
+	TRACK_ALLOC,
+	TRACK_FREE,
+	TRACK_PREV_ALLOC,
+	TRACK_PREV_FREE,
+	TRACK_NR,
+};
 
 static inline unsigned int nr_user_tracks(struct kmem_cache *s)
 {
 	if (!(s->flags & SLAB_STORE_USER))
 		return 0;
 
-	return TRACK_NR;
+	if (s->flags & SLAB_STORE_HISTORY)
+		return TRACK_NR;
+
+	return TRACK_PREV_ALLOC;
 }
 
 static inline unsigned int user_tracking_size(struct kmem_cache *s)
@@ -1837,6 +1847,9 @@ parse_slub_debug_flags(const char *str, slab_flags_t *flags, const char **slabs,
 		case 'u':
 			*flags |= SLAB_STORE_USER;
 			break;
+		case 'h':
+			*flags |= SLAB_STORE_HISTORY;
+			break;
 		case 't':
 			*flags |= SLAB_TRACE;
 			break;
@@ -1855,6 +1868,11 @@ parse_slub_debug_flags(const char *str, slab_flags_t *flags, const char **slabs,
 				pr_err("slab_debug option '%c' unknown. skipped\n", *str);
 		}
 	}
+	if ((*flags & SLAB_STORE_HISTORY) && !(*flags & SLAB_STORE_USER)) {
+		if (init)
+			pr_err("slab_debug option 'H' requires 'U'. skipped\n");
+		*flags &= ~SLAB_STORE_HISTORY;
+	}
 check_slabs:
 	if (*str == ',')
 		*slabs = ++str;
@@ -1969,7 +1987,7 @@ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
 	 * but let the user enable it via the command line below.
 	 */
 	if (flags & SLAB_NOLEAKTRACE)
-		slub_debug_local &= ~SLAB_STORE_USER;
+		slub_debug_local &= ~(SLAB_STORE_USER | SLAB_STORE_HISTORY);
 
 	len = strlen(name);
 	next_block = slub_debug_string;
@@ -9223,6 +9241,13 @@ static ssize_t store_user_show(struct kmem_cache *s, char *buf)
 
 SLAB_ATTR_RO(store_user);
 
+static ssize_t store_history_show(struct kmem_cache *s, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_HISTORY));
+}
+
+SLAB_ATTR_RO(store_history);
+
 static ssize_t validate_show(struct kmem_cache *s, char *buf)
 {
 	return 0;
@@ -9442,6 +9467,7 @@ static const struct attribute *const slab_attrs[] = {
 	&red_zone_attr.attr,
 	&poison_attr.attr,
 	&store_user_attr.attr,
+	&store_history_attr.attr,
 	&validate_attr.attr,
 #endif
 #ifdef CONFIG_ZONE_DMA
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* [RFC PATCH 4/5] Documentation/mm: document SLUB previous lifetime tracking
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

Document the new slab_debug=H option, its store_history sysfs state file,
and the additional previous-lifetime lines in SLUB reports.

Spell out that the previous lifetime is diagnostic information only. It
can help with stale-pointer reports after object reuse, but it does not
identify semantic ownership or the root cause of a use-after-free.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 Documentation/admin-guide/mm/slab.rst | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/slab.rst b/Documentation/admin-guide/mm/slab.rst
index 14429ab90611..4f644c4e1baa 100644
--- a/Documentation/admin-guide/mm/slab.rst
+++ b/Documentation/admin-guide/mm/slab.rst
@@ -50,6 +50,8 @@ Possible debug options are::
 	Z		Red zoning
 	P		Poisoning (object and padding)
 	U		User tracking (free and alloc)
+	H		Previous lifetime tracking. Requires U and preserves one
+			previous completed alloc/free lifetime for each object.
 	T		Trace (please only use on single slabs)
 	A		Enable failslab filter mark for the cache
 	O		Switch debugging off for caches that would have
@@ -91,6 +93,17 @@ kmalloc. All other slabs will not get any debugging enabled::
 
 	slab_debug=Z,dentry;U,kmalloc-*
 
+Previous lifetime tracking can be enabled together with user tracking for
+selected caches. This keeps one previous completed alloc/free lifetime in
+addition to the normal user tracking records::
+
+	slab_debug=UH,kmalloc-128
+
+This can help debug reports where an object was freed, reallocated, and later
+accessed or freed again through a stale pointer. The previous lifetime is
+diagnostic information only; it does not identify semantic ownership or the
+root cause of a use-after-free.
+
 You can also enable options (e.g. sanity checks and poisoning) for all caches
 except some that are deemed too performance critical and don't need to be
 debugged by specifying global debug options followed by a list of slab names
@@ -110,6 +123,7 @@ options from the ``slab_debug`` parameter translate to the following files::
 	Z	red_zone
 	P	poison
 	U	store_user
+	H	store_history
 	T	trace
 	A	failslab
 
@@ -245,9 +259,15 @@ into the syslog:
 	cpu> pid=<pid of the process>
      INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
 	pid=<pid of the process>
+     INFO: Previous object lifetime:
+     INFO: Previous allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+	cpu> pid=<pid of the process>
+     INFO: Previous freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+	pid=<pid of the process>
 
    (Object allocation / free information is only available if SLAB_STORE_USER is
-   set for the slab. slab_debug sets that option)
+   set for the slab. slab_debug sets that option. Previous lifetime information
+   is only available if both SLAB_STORE_USER and SLAB_STORE_HISTORY are set.)
 
 2. The object contents if an object was involved.
 
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* [RFC PATCH 1/5] mm/slub: factor user tracking metadata size calculation
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

SLAB_STORE_USER stores one allocation track and one free track after the
object. Several offset and size calculations open-code that as
2 * sizeof(struct track).

Introduce helpers for the number and size of user tracking records, and
use them for the existing metadata layout calculations. This is a
preparatory cleanup for adding optional extra user tracking records
without changing the current layout or behavior.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 mm/slub.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index a2bf3756ca7d..a9114dddc976 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -316,7 +316,20 @@ struct track {
 	unsigned long when;	/* When did the operation occur */
 };
 
-enum track_item { TRACK_ALLOC, TRACK_FREE };
+enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_NR };
+
+static inline unsigned int nr_user_tracks(struct kmem_cache *s)
+{
+	if (!(s->flags & SLAB_STORE_USER))
+		return 0;
+
+	return TRACK_NR;
+}
+
+static inline unsigned int user_tracking_size(struct kmem_cache *s)
+{
+	return nr_user_tracks(s) * sizeof(struct track);
+}
 
 #ifdef SLAB_SUPPORTS_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
@@ -740,7 +753,7 @@ static inline void set_orig_size(struct kmem_cache *s,
 		return;
 
 	p += get_info_end(s);
-	p += sizeof(struct track) * 2;
+	p += user_tracking_size(s);
 
 	*(unsigned long *)p = orig_size;
 }
@@ -756,7 +769,7 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
 		return s->object_size;
 
 	p += get_info_end(s);
-	p += sizeof(struct track) * 2;
+	p += user_tracking_size(s);
 
 	return *(unsigned long *)p;
 }
@@ -873,8 +886,7 @@ static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
 {
 	unsigned int offset = get_info_end(s);
 
-	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
-		offset += sizeof(struct track) * 2;
+	offset += user_tracking_size(s);
 
 	if (slub_debug_orig_size(s))
 		offset += sizeof(unsigned long);
@@ -1077,7 +1089,7 @@ static void init_tracking(struct kmem_cache *s, void *object)
 		return;
 
 	p = get_track(s, object, TRACK_ALLOC);
-	memset(p, 0, 2*sizeof(struct track));
+	memset(p, 0, user_tracking_size(s));
 }
 
 static void print_track(const char *s, struct track *t, unsigned long pr_time)
@@ -1185,8 +1197,7 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
 
 	off = get_info_end(s);
 
-	if (s->flags & SLAB_STORE_USER)
-		off += 2 * sizeof(struct track);
+	off += user_tracking_size(s);
 
 	if (slub_debug_orig_size(s))
 		off += sizeof(unsigned long);
@@ -1390,7 +1401,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 
 	if (s->flags & SLAB_STORE_USER) {
 		/* We also have user information there */
-		off += 2 * sizeof(struct track);
+		off += user_tracking_size(s);
 
 		if (s->flags & SLAB_KMALLOC)
 			off += sizeof(unsigned long);
@@ -7845,7 +7856,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 		 * Need to store information about allocs and frees after
 		 * the object.
 		 */
-		size += 2 * sizeof(struct track);
+		size += user_tracking_size(s);
 
 		/* Save the original kmalloc request size */
 		if (flags & SLAB_KMALLOC)
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* [RFC PATCH 3/5] mm/slub: print previous object lifetime in debug reports
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

When SLAB_STORE_USER is enabled, each object stores one allocation track
and one free track. Reusing the object overwrites the allocation track,
while a later stale free can overwrite the free track. This can leave a
report with the victim allocation and stale free but without the
previous completed lifetime that explains where the stale pointer came
from.

When SLAB_STORE_HISTORY is enabled, copy the current allocation/free pair
to a previous-lifetime pair before recording the new allocation. Keep
the existing TRACK_FREE semantics unchanged so current store_user and
free_traces behavior is preserved. On SLUB error reports, print the
previous completed lifetime when both records are available.

The extra records are diagnostic information only. They do not infer
semantic ownership or identify the root cause of a use-after-free.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 mm/slub.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 803c597351ce..2dfa8af00a49 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1083,6 +1083,31 @@ static void set_track_update(struct kmem_cache *s, void *object,
 	p->when = jiffies;
 }
 
+static bool track_has_record(const struct track *track)
+{
+	return track->addr;
+}
+
+static void save_previous_lifetime(struct kmem_cache *s, void *object)
+{
+	struct track *alloc, *free;
+	struct track *prev_alloc, *prev_free;
+
+	if (!(s->flags & SLAB_STORE_HISTORY))
+		return;
+
+	alloc = get_track(s, object, TRACK_ALLOC);
+	free = get_track(s, object, TRACK_FREE);
+	if (!track_has_record(alloc) || !track_has_record(free))
+		return;
+
+	prev_alloc = get_track(s, object, TRACK_PREV_ALLOC);
+	prev_free = get_track(s, object, TRACK_PREV_FREE);
+
+	*prev_alloc = *alloc;
+	*prev_free = *free;
+}
+
 static __always_inline void set_track(struct kmem_cache *s, void *object,
 				      enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
 {
@@ -1123,11 +1148,25 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
 void print_tracking(struct kmem_cache *s, void *object)
 {
 	unsigned long pr_time = jiffies;
+	struct track *prev_alloc, *prev_free;
+
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 
 	print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
 	print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
+
+	if (!(s->flags & SLAB_STORE_HISTORY))
+		return;
+
+	prev_alloc = get_track(s, object, TRACK_PREV_ALLOC);
+	prev_free = get_track(s, object, TRACK_PREV_FREE);
+	if (!track_has_record(prev_alloc) || !track_has_record(prev_free))
+		return;
+
+	pr_err("Previous object lifetime:\n");
+	print_track("Previous allocated", prev_alloc, pr_time);
+	print_track("Previous freed", prev_free, pr_time);
 }
 
 static void print_slab_info(const struct slab *slab)
@@ -4505,8 +4544,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	return NULL;
 
 success:
-	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
+	if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
+		save_previous_lifetime(s, object);
 		set_track(s, object, TRACK_ALLOC, addr, gfpflags);
+	}
 
 	return object;
 }
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* [RFC PATCH 5/5] mm/slub: sanitize previous lifetime tracking flags
From: Pengpeng Hou @ 2026-06-11  6:39 UTC (permalink / raw)
  To: Vlastimil Babka, Harry Yoo, Andrew Morton, linux-mm
  Cc: Hao Li, Christoph Lameter, David Rientjes, Roman Gushchin,
	David Hildenbrand, Lorenzo Stoakes, liam, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jonathan Corbet, Shuah Khan,
	linux-doc, linux-kernel, Pengpeng Hou
In-Reply-To: <20260611063926.38111-1-pengpeng@iscas.ac.cn>

SLAB_STORE_HISTORY only has meaning together with SLAB_STORE_USER because
the previous lifetime records live in the user tracking metadata area.

The slab_debug parser rejects H without U, but kmem_cache_create()
callers may also pass debug flags directly. Clear SLAB_STORE_HISTORY
whenever SLAB_STORE_USER is not present so caches cannot end up
reporting store_history without any user tracking storage.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
 mm/slub.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 2dfa8af00a49..931e6d04ba2b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -341,6 +341,14 @@ static inline unsigned int user_tracking_size(struct kmem_cache *s)
 	return nr_user_tracks(s) * sizeof(struct track);
 }
 
+static inline slab_flags_t sanitize_user_tracking_flags(slab_flags_t flags)
+{
+	if ((flags & SLAB_STORE_HISTORY) && !(flags & SLAB_STORE_USER))
+		flags &= ~SLAB_STORE_HISTORY;
+
+	return flags;
+}
+
 #ifdef SLAB_SUPPORTS_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 #else
@@ -1910,7 +1918,7 @@ parse_slub_debug_flags(const char *str, slab_flags_t *flags, const char **slabs,
 	if ((*flags & SLAB_STORE_HISTORY) && !(*flags & SLAB_STORE_USER)) {
 		if (init)
 			pr_err("slab_debug option 'H' requires 'U'. skipped\n");
-		*flags &= ~SLAB_STORE_HISTORY;
+		*flags = sanitize_user_tracking_flags(*flags);
 	}
 check_slabs:
 	if (*str == ',')
@@ -2052,7 +2060,7 @@ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
 
 			if (!strncmp(name, iter, cmplen)) {
 				flags |= block_flags;
-				return flags;
+				return sanitize_user_tracking_flags(flags);
 			}
 
 			if (!*end || *end == ';')
@@ -2061,7 +2069,7 @@ slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
 		}
 	}
 
-	return flags | slub_debug_local;
+	return sanitize_user_tracking_flags(flags | slub_debug_local);
 }
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* [PATCH v2 4/4] docs/mm: update document for split i_mmap tree
From: Huang Shijie @ 2026-06-11  6:19 UTC (permalink / raw)
  To: akpm, viro, brauner, jack, muchun.song, osalvador, david
  Cc: surenb, mjguzik, liam, ljs, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei, Huang Shijie
In-Reply-To: <20260611061915.2354307-1-huangsj@hygon.cn>

Document the i_mmap locking changes introduced by the following patches:
- Use mapping_mapped() to simplify the code
- Use get_i_mmap_root() to access the file's i_mmap
- Split the file's i_mmap tree (CONFIG_SPLIT_I_MMAP)

Add documentation for:
- CONFIG_SPLIT_I_MMAP split i_mmap tree architecture with per-tree locks
- New per-tree lock helpers: i_mmap_tree_lock_write/unlock_write
- New vm_area_struct.tree_idx field for sibling tree selection
- Updated i_mmap_lock_read/write semantics acquiring all per-tree locks
- Updated lock ordering notes for split tree configuration
- Updated page table freeing section for split tree scenario

Signed-off-by: Huang Shijie <huangsj@hygon.cn>
---
 Documentation/mm/process_addrs.rst | 63 +++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/Documentation/mm/process_addrs.rst b/Documentation/mm/process_addrs.rst
index 851680ead45f..4aed3100b249 100644
--- a/Documentation/mm/process_addrs.rst
+++ b/Documentation/mm/process_addrs.rst
@@ -60,6 +60,15 @@ Terminology
   :c:func:`!i_mmap_[try]lock_write` for file-backed memory. We refer to these
   locks as the reverse mapping locks, or 'rmap locks' for brevity.
 
+  When :c:macro:`!CONFIG_SPLIT_I_MMAP` is enabled, the file-backed i_mmap tree
+  is split into multiple sibling trees (one per NUMA node or a number based on
+  CPU count), each with its own :c:type:`!struct i_mmap_tree` containing a
+  red/black interval tree and a :c:type:`!struct rw_semaphore`. In this
+  configuration, :c:func:`!i_mmap_lock_read` and :c:func:`!i_mmap_lock_write`
+  acquire all per-tree locks, while VMA insert/remove operations use the
+  per-tree granularity :c:func:`!i_mmap_tree_lock_write` to lock only the
+  relevant sibling tree, significantly reducing lock contention.
+
 We discuss page table locks separately in the dedicated section below.
 
 The first thing **any** of these locks achieve is to **stabilise** the VMA
@@ -230,12 +239,16 @@ These are the core fields which describe the MM the VMA belongs to and its attri
                                                            Updated under mmap read lock by
                                                            :c:func:`!task_numa_work`.
    :c:member:`!vm_userfaultfd_ctx`   CONFIG_USERFAULTFD    Userfaultfd context wrapper object of    mmap write,
-                                                           type :c:type:`!vm_userfaultfd_ctx`,      VMA write.
-                                                           either of zero size if userfaultfd is
-                                                           disabled, or containing a pointer
-                                                           to an underlying
-                                                           :c:type:`!userfaultfd_ctx` object which
-                                                           describes userfaultfd metadata.
+                                                            type :c:type:`!vm_userfaultfd_ctx`,      VMA write.
+                                                            either of zero size if userfaultfd is
+                                                            disabled, or containing a pointer
+                                                            to an underlying
+                                                            :c:type:`!userfaultfd_ctx` object which
+                                                            describes userfaultfd metadata.
+   :c:member:`!tree_idx`             CONFIG_SPLIT_I_MMAP   The index of the sibling i_mmap tree     Written once on
+                                                            that this VMA belongs to, set at         initial map.
+                                                            VMA creation time based on the NUMA
+                                                            node or the smallest sibling tree.
    ================================= ===================== ======================================== ===============
 
 These fields are present or not depending on whether the relevant kernel
@@ -247,12 +260,18 @@ configuration option is set.
    Field                               Description                               Write lock
    =================================== ========================================= ============================
    :c:member:`!shared.rb`              A red/black tree node used, if the        mmap write, VMA write,
-                                       mapping is file-backed, to place the VMA  i_mmap write.
-                                       in the
-                                       :c:member:`!struct address_space->i_mmap`
-                                       red/black interval tree.
+                                        mapping is file-backed, to place the VMA  i_mmap write (or per-tree
+                                        in the                                    i_mmap write when
+                                        :c:member:`!struct address_space->i_mmap` :c:macro:`!CONFIG_SPLIT_I_MMAP`
+                                        red/black interval tree (or one of the    is set).
+                                        sibling trees when
+                                        :c:macro:`!CONFIG_SPLIT_I_MMAP`
+                                        is enabled).
    :c:member:`!shared.rb_subtree_last` Metadata used for management of the       mmap write, VMA write,
-                                       interval tree if the VMA is file-backed.  i_mmap write.
+                                        interval tree if the VMA is file-backed.  i_mmap write (or per-tree
+                                                                                  i_mmap write when
+                                                                                  :c:macro:`!CONFIG_SPLIT_I_MMAP`
+                                                                                  is set).
    :c:member:`!anon_vma_chain`         List of pointers to both forked/CoW’d     mmap read, anon_vma write.
                                        :c:type:`!anon_vma` objects and
                                        :c:member:`!vma->anon_vma` if it is
@@ -490,6 +509,16 @@ There is also a file-system specific lock ordering comment located at the top of
 Please check the current state of these comments which may have changed since
 the time of writing of this document.
 
+.. note:: When :c:macro:`!CONFIG_SPLIT_I_MMAP` is enabled, the single
+   ``mapping->i_mmap_rwsem`` is replaced by an array of per-tree locks
+   ``mapping->i_mmap[i]->rwsem``. The lock ordering positions of
+   ``mapping->i_mmap_rwsem`` above apply to each per-tree lock
+   equivalently. VMA insert/remove operations acquire only the relevant
+   per-tree lock via :c:func:`!i_mmap_tree_lock_write`, while operations
+   that require all trees to be locked (such as
+   :c:func:`!unmap_mapping_range`) acquire all per-tree locks via
+   :c:func:`!i_mmap_lock_write` or :c:func:`!i_mmap_lock_read`.
+
 ------------------------------
 Locking Implementation Details
 ------------------------------
@@ -704,11 +733,15 @@ traversed or referenced by concurrent tasks.
 
 It is insufficient to simply hold an mmap write lock and VMA lock (which will
 prevent racing faults, and rmap operations), as a file-backed mapping can be
-truncated under the :c:struct:`!struct address_space->i_mmap_rwsem` alone.
+truncated under the :c:struct:`!struct address_space->i_mmap_rwsem` alone
+(or, when :c:macro:`!CONFIG_SPLIT_I_MMAP` is enabled, under all per-tree
+``mapping->i_mmap[i]->rwsem`` locks acquired via
+:c:func:`!i_mmap_lock_write`).
 
 As a result, no VMA which can be accessed via the reverse mapping (either
 through the :c:struct:`!struct anon_vma->rb_root` or the :c:member:`!struct
-address_space->i_mmap` interval trees) can have its page tables torn down.
+address_space->i_mmap` interval trees, or the sibling trees when
+:c:macro:`!CONFIG_SPLIT_I_MMAP` is enabled) can have its page tables torn down.
 
 The operation is typically performed via :c:func:`!free_pgtables`, which assumes
 either the mmap write lock has been taken (as specified by its
@@ -729,7 +762,9 @@ cleared without page table locks (in the :c:func:`!pgd_clear`, :c:func:`!p4d_cle
 .. note:: It is possible for leaf page tables to be torn down independent of
           the page tables above it as is done by
           :c:func:`!retract_page_tables`, which is performed under the i_mmap
-          read lock, PMD, and PTE page table locks, without this level of care.
+          read lock (or all per-tree ``mapping->i_mmap[i]->rwsem`` locks in
+          read mode when :c:macro:`!CONFIG_SPLIT_I_MMAP` is enabled), PMD, and
+          PTE page table locks, without this level of care.
 
 Page table moving
 ^^^^^^^^^^^^^^^^^
-- 
2.53.0



^ permalink raw reply related

* [PATCH v2 3/4] mm/fs: split the file's i_mmap tree
From: Huang Shijie @ 2026-06-11  6:18 UTC (permalink / raw)
  To: akpm, viro, brauner, jack, muchun.song, osalvador, david
  Cc: surenb, mjguzik, liam, ljs, vbabka, shakeel.butt, rppt, mhocko,
	corbet, skhan, linux, dinguyen, schuster.simon, James.Bottomley,
	deller, djbw, willy, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	mhiramat, oleg, ziy, baolin.wang, npache, ryan.roberts, dev.jain,
	baohua, lance.yang, linmiaohe, nao.horiguchi, jannh, pfalcato,
	riel, harry, will, brian.ruley, rmk+kernel, dave.anglin, linux-mm,
	linux-doc, linux-kernel, linux-arm-kernel, linux-parisc,
	linux-fsdevel, nvdimm, linux-perf-users, linux-trace-kernel,
	zhongyuan, fangbaoshun, yingzhiwei, Huang Shijie
In-Reply-To: <20260611061915.2354307-1-huangsj@hygon.cn>

In the UnixBench tests, there is a test "execl" which tests
the execve system call.
  For example, a Hygon's server has 12 NUMA nodes, and 384 CPUs.
When we test our server with "./Run -c 384 execl",
the test result is not good enough. The i_mmap locks contended heavily on
"libc.so" and "ld.so". The i_mmap tree for "libc.so" can be
over 6000 VMAs, all the VMAs can be in different NUMA mode. The insert/remove
operations do not run quickly enough.

 In order to reduce the competition of the i_mmap lock, this patch does
following:
   1.) Split the single i_mmap tree into several sibling trees:
       Each tree has a lock. The CONFIG_SPLIT_I_MMAP is used to
       turn on/off this feature.
   2.) Introduce a new field "tree_idx" for vm_area_struct to save the
       sibling tree index for this VMA.
   3.) Introduce a new field "vma_count" for address_space.
       The new mapping_mapped() will use it.
   4.) Rewrite the vma_interval_tree_foreach()
   5.) Rewrite the lock functions.	

 After this patch, the VMA insert/remove operations will work faster,
and we can get over 400% performance improvement with the above test.

Signed-off-by: Huang Shijie <huangsj@hygon.cn>
---
 fs/Kconfig               |   8 ++
 fs/hugetlbfs/inode.c     |  20 ++++-
 fs/inode.c               |  75 ++++++++++++++++-
 include/linux/fs.h       | 174 ++++++++++++++++++++++++++++++++++++++-
 include/linux/mm.h       |  80 ++++++++++++++++++
 include/linux/mm_types.h |   3 +
 mm/internal.h            |   3 +-
 mm/mmap.c                |  11 ++-
 mm/nommu.c               |  23 ++++--
 mm/pagewalk.c            |   2 +-
 mm/vma.c                 |  72 +++++++++++-----
 mm/vma_init.c            |   3 +
 12 files changed, 436 insertions(+), 38 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 43cb06de297f..e24804f70432 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -9,6 +9,14 @@ menu "File systems"
 config DCACHE_WORD_ACCESS
        bool
 
+config SPLIT_I_MMAP
+	bool "Split the file's i_mmap to several trees"
+	default n
+	help
+	  Split the file's i_mmap to several trees, each tree has a separate
+	  lock. This will reduce the lock contention of file's i_mmap tree,
+	  but it will cost more memory for per inode.
+
 config VALIDATE_FS_PARSER
 	bool "Validate filesystem parameter description"
 	help
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index da5b41ea5bdd..68d8308418dd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -891,6 +891,23 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
  */
 static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
 
+#ifdef CONFIG_SPLIT_I_MMAP
+static void hugetlbfs_lockdep_set_class(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++) {
+		lockdep_set_class(&mapping->i_mmap[i].rwsem,
+				&hugetlbfs_i_mmap_rwsem_key);
+	}
+}
+#else
+static void hugetlbfs_lockdep_set_class(struct address_space *mapping)
+{
+	lockdep_set_class(&mapping->i_mmap_rwsem, &hugetlbfs_i_mmap_rwsem_key);
+}
+#endif
+
 static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 					struct mnt_idmap *idmap,
 					struct inode *dir,
@@ -915,8 +932,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 
 		inode->i_ino = get_next_ino();
 		inode_init_owner(idmap, inode, dir, mode);
-		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
-				&hugetlbfs_i_mmap_rwsem_key);
+		hugetlbfs_lockdep_set_class(inode->i_mapping);
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		simple_inode_init_ts(inode);
 		info->resv_map = resv_map;
diff --git a/fs/inode.c b/fs/inode.c
index 62c579a0cf7d..cb67ae83f5b3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -214,6 +214,70 @@ static int no_open(struct inode *inode, struct file *file)
 	return -ENXIO;
 }
 
+#ifdef CONFIG_SPLIT_I_MMAP
+int split_tree_num;
+static int split_tree_align __maybe_unused = 32;
+
+static void __init init_split_tree_num(void)
+{
+#ifdef CONFIG_NUMA
+	split_tree_num = nr_node_ids;
+#else
+	split_tree_num = ALIGN(nr_cpu_ids, split_tree_align);
+#endif
+}
+
+static void free_mapping_i_mmap(struct address_space *mapping)
+{
+	int i;
+
+	if (!mapping->i_mmap)
+		return;
+
+	for (i = 0; i < split_tree_num; i++)
+		kfree(mapping->i_mmap[i]);
+
+	kfree(mapping->i_mmap);
+	mapping->i_mmap = NULL;
+}
+
+static int init_mapping_i_mmap(struct address_space *mapping, gfp_t gfp)
+{
+	struct i_mmap_tree *tree;
+	int i;
+
+	/* The extra one is used as terminator in vma_interval_tree_foreach() */
+	mapping->i_mmap = kzalloc(sizeof(tree) * (split_tree_num + 1), gfp);
+	if (!mapping->i_mmap)
+		return -ENOMEM;
+
+	for (i = 0; i < split_tree_num; i++) {
+		tree = kzalloc_node(sizeof(*tree), gfp, i);
+		if (!tree)
+			goto nomem;
+
+		tree->root = RB_ROOT_CACHED;
+		init_rwsem(&tree->rwsem);
+
+		mapping->i_mmap[i] = tree;
+	}
+	return 0;
+nomem:
+	free_mapping_i_mmap(mapping);
+	return -ENOMEM;
+}
+#else
+static int init_mapping_i_mmap(struct address_space *mapping, gfp_t gfp)
+{
+	mapping->i_mmap = RB_ROOT_CACHED;
+	init_rwsem(&mapping->i_mmap_rwsem);
+	return 0;
+}
+
+static void free_mapping_i_mmap(struct address_space *mapping) { }
+static void __init init_split_tree_num(void) {}
+#endif
+
 /**
  * inode_init_always_gfp - perform inode structure initialisation
  * @sb: superblock inode belongs to
@@ -302,9 +366,14 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
 #endif
 	inode->i_flctx = NULL;
 
-	if (unlikely(security_inode_alloc(inode, gfp)))
+	if (init_mapping_i_mmap(mapping, gfp))
 		return -ENOMEM;
 
+	if (unlikely(security_inode_alloc(inode, gfp))) {
+		free_mapping_i_mmap(mapping);
+		return -ENOMEM;
+	}
+
 	this_cpu_inc(nr_inodes);
 
 	return 0;
@@ -380,6 +449,7 @@ void __destroy_inode(struct inode *inode)
 	if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
 		posix_acl_release(inode->i_default_acl);
 #endif
+	free_mapping_i_mmap(&inode->i_data);
 	this_cpu_dec(nr_inodes);
 }
 EXPORT_SYMBOL(__destroy_inode);
@@ -480,9 +550,7 @@ EXPORT_SYMBOL(inc_nlink);
 static void __address_space_init_once(struct address_space *mapping)
 {
 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
-	init_rwsem(&mapping->i_mmap_rwsem);
 	spin_lock_init(&mapping->i_private_lock);
-	mapping->i_mmap = RB_ROOT_CACHED;
 }
 
 void address_space_init_once(struct address_space *mapping)
@@ -2619,6 +2687,7 @@ void __init inode_init(void)
 					&i_hash_mask,
 					0,
 					0);
+	init_split_tree_num();
 }
 
 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cd46615b8f53..f4b3645b61df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -450,6 +450,25 @@ struct mapping_metadata_bhs {
 	struct list_head list;	/* The list of bhs (b_assoc_buffers) */
 };
 
+#ifdef CONFIG_SPLIT_I_MMAP
+/*
+ * struct i_mmap_tree - A single sibling tree of the file's split i_mmap.
+ * @root: The red/black interval tree root.
+ * @rwsem: Protects insert/remove operations on this sibling tree.
+ * @vma_count: Number of VMAs in this sibling tree.
+ *
+ * When CONFIG_SPLIT_I_MMAP is enabled, the file's single i_mmap tree is
+ * split into split_tree_num sibling trees, each with its own lock. This
+ * reduces lock contention by allowing concurrent VMA insert/remove
+ * operations on different sibling trees.
+ */
+struct i_mmap_tree {
+	struct rb_root_cached	root;
+	struct rw_semaphore	rwsem;
+	atomic_t		vma_count;
+};
+#endif
+
 /**
  * struct address_space - Contents of a cacheable, mappable object.
  * @host: Owner, either the inode or the block_device.
@@ -461,8 +480,13 @@ struct mapping_metadata_bhs {
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
  * @nr_thps: Number of THPs in the pagecache (non-shmem only).
- * @i_mmap: Tree of private and shared mappings.
- * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+ * @i_mmap: Tree of private and shared mappings. When CONFIG_SPLIT_I_MMAP
+ *   is enabled, this is an array of split_tree_num struct i_mmap_tree
+ *   pointers (plus a NULL terminator).
+ * @vma_count: Total number of VMAs across all sibling trees (only when
+ *   CONFIG_SPLIT_I_MMAP is enabled). Used by mapping_mapped().
+ * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable (only when
+ *   CONFIG_SPLIT_I_MMAP is disabled; otherwise per-tree rwsem is used).
  * @nrpages: Number of page entries, protected by the i_pages lock.
  * @writeback_index: Writeback starts here.
  * @a_ops: Methods.
@@ -480,14 +504,19 @@ struct address_space {
 	/* number of thp, only for non-shmem files */
 	atomic_t		nr_thps;
 #endif
+#ifdef CONFIG_SPLIT_I_MMAP
+	struct i_mmap_tree	**i_mmap;
+	atomic_t		vma_count;
+#else
 	struct rb_root_cached	i_mmap;
+	struct rw_semaphore	i_mmap_rwsem;
+#endif
 	unsigned long		nrpages;
 	pgoff_t			writeback_index;
 	const struct address_space_operations *a_ops;
 	unsigned long		flags;
 	errseq_t		wb_err;
 	spinlock_t		i_private_lock;
-	struct rw_semaphore	i_mmap_rwsem;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
@@ -508,6 +537,133 @@ static inline bool mapping_tagged(const struct address_space *mapping, xa_mark_t
 	return xa_marked(&mapping->i_pages, tag);
 }
 
+#ifdef CONFIG_SPLIT_I_MMAP
+static inline int mapping_mapped(const struct address_space *mapping)
+{
+	return	atomic_read(&mapping->vma_count);
+}
+
+static inline void inc_mapping_vma(struct address_space *mapping,
+				struct vm_area_struct *vma)
+{
+	struct i_mmap_tree *tree = mapping->i_mmap[vma->tree_idx];
+
+	atomic_inc(&tree->vma_count);
+	atomic_inc(&mapping->vma_count);
+}
+
+static inline void dec_mapping_vma(struct address_space *mapping,
+				struct vm_area_struct *vma)
+{
+	struct i_mmap_tree *tree = mapping->i_mmap[vma->tree_idx];
+
+	atomic_dec(&tree->vma_count);
+	atomic_dec(&mapping->vma_count);
+}
+
+static inline struct rb_root_cached *get_i_mmap_root(struct address_space *mapping)
+{
+	return (struct rb_root_cached *)mapping->i_mmap;
+}
+
+static inline void i_mmap_tree_lock_write(struct address_space *mapping,
+					struct vm_area_struct *vma)
+{
+	struct i_mmap_tree *tree = mapping->i_mmap[vma->tree_idx];
+
+	down_write(&tree->rwsem);
+}
+
+static inline void i_mmap_tree_unlock_write(struct address_space *mapping,
+					struct vm_area_struct *vma)
+{
+	struct i_mmap_tree *tree = mapping->i_mmap[vma->tree_idx];
+
+	up_write(&tree->rwsem);
+}
+
+#define i_mmap_lock_write_prepare(mapping)
+#define i_mmap_unlock_write_complete(mapping)
+
+extern int split_tree_num;
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		down_write(&mapping->i_mmap[i]->rwsem);
+}
+
+static inline int i_mmap_trylock_write(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++) {
+		if (!down_write_trylock(&mapping->i_mmap[i]->rwsem)) {
+			while (i--)
+				up_write(&mapping->i_mmap[i]->rwsem);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		up_write(&mapping->i_mmap[i]->rwsem);
+}
+
+static inline int i_mmap_trylock_read(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++) {
+		if (!down_read_trylock(&mapping->i_mmap[i]->rwsem)) {
+			while (i--)
+				up_read(&mapping->i_mmap[i]->rwsem);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static inline void i_mmap_lock_read(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		down_read(&mapping->i_mmap[i]->rwsem);
+}
+
+static inline void i_mmap_unlock_read(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		up_read(&mapping->i_mmap[i]->rwsem);
+}
+
+static inline void i_mmap_assert_locked(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		lockdep_assert_held(&mapping->i_mmap[i]->rwsem);
+}
+
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		lockdep_assert_held_write(&mapping->i_mmap[i]->rwsem);
+}
+
+#else
+
 static inline void i_mmap_lock_write(struct address_space *mapping)
 {
 	down_write(&mapping->i_mmap_rwsem);
@@ -561,6 +717,18 @@ static inline struct rb_root_cached *get_i_mmap_root(struct address_space *mappi
 	return &mapping->i_mmap;
 }
 
+static inline void inc_mapping_vma(struct address_space *mapping,
+				struct vm_area_struct *vma) { }
+static inline void dec_mapping_vma(struct address_space *mapping,
+				struct vm_area_struct *vma) { }
+
+#define i_mmap_lock_write_prepare(mapping)	i_mmap_lock_write(mapping)
+#define i_mmap_unlock_write_complete(mapping)	i_mmap_unlock_write(mapping)
+#define i_mmap_tree_lock_write(mapping, vma)
+#define i_mmap_tree_unlock_write(mapping, vma)
+
+#endif
+
 /*
  * Might pages of this file have been modified in userspace?
  * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0a45c6a8b9f2..9aa8119fa9bf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4041,11 +4041,91 @@ struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root,
 struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
 				unsigned long start, unsigned long last);
 
+#ifdef CONFIG_SPLIT_I_MMAP
+extern int split_tree_num;
+
+static inline int smallest_tree_idx(struct file *file)
+{
+	struct address_space *mapping = file->f_mapping;
+	int tmp = INT_MAX, count;
+	int i, j = 0;
+
+	/*
+	 * Since a not 100% accurate value is still okay,
+	 * we do not need any lock here.
+	 */
+	for (i = 0; i < split_tree_num; i++) {
+		count = atomic_read(&mapping->i_mmap[i]->vma_count);
+		if (count < tmp) {
+			j = i;
+			tmp = count;
+			if (!tmp)
+				break;
+		}
+	}
+	return j;
+}
+
+static inline void vma_set_tree_idx(struct vm_area_struct *vma)
+{
+#ifdef CONFIG_NUMA
+	vma->tree_idx = numa_node_id();
+#else
+	vma->tree_idx = smallest_tree_idx(vma->vm_file);
+#endif
+}
+
+static inline struct rb_root_cached *get_rb_root(struct vm_area_struct *vma,
+					struct address_space *mapping)
+{
+	return &mapping->i_mmap[vma->tree_idx]->root;
+}
+
+/* Find the first valid VMA in the sibling trees */
+static inline struct vm_area_struct *first_vma(struct i_mmap_tree ***__r,
+				unsigned long start, unsigned long last)
+{
+	struct vm_area_struct *vma = NULL;
+	struct i_mmap_tree **tree = *__r;
+	struct rb_root_cached *root;
+
+	while (*tree) {
+		root = &(*tree)->root;
+		tree++;
+		vma = vma_interval_tree_iter_first(root, start, last);
+		if (vma)
+			break;
+	}
+
+	/* Save for the next loop */
+	*__r = tree;
+	return vma;
+}
+
+/*
+ * Please use get_i_mmap_root() to get the @root.
+ * @_tmp is referenced to avoid unused variable warning.
+ */
+#define vma_interval_tree_foreach(vma, root, start, last)		\
+	for (struct i_mmap_tree **_r = (struct i_mmap_tree **)(root),	\
+		**_tmp = (vma = first_vma(&_r, start, last)) ? _r : NULL;\
+	     ((_tmp && vma) || (vma = first_vma(&_r, start, last)));	\
+		vma = vma_interval_tree_iter_next(vma, start, last))
+#else
 /* Please use get_i_mmap_root() to get the @root */
 #define vma_interval_tree_foreach(vma, root, start, last)		\
 	for (vma = vma_interval_tree_iter_first(root, start, last);	\
 	     vma; vma = vma_interval_tree_iter_next(vma, start, last))
 
+static inline void vma_set_tree_idx(struct vm_area_struct *vma) { }
+
+static inline struct rb_root_cached *get_rb_root(struct vm_area_struct *vma,
+					struct address_space *mapping)
+{
+	return &mapping->i_mmap;
+}
+#endif
+
 void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
 				   struct rb_root_cached *root);
 void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a308e2c23b82..8d6aab3346ce 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1072,6 +1072,9 @@ struct vm_area_struct {
 #ifdef __HAVE_PFNMAP_TRACKING
 	struct pfnmap_track_ctx *pfnmap_track_ctx;
 #endif
+#ifdef CONFIG_SPLIT_I_MMAP
+	int tree_idx;			/* The sibling tree index for the VMA */
+#endif
 } __randomize_layout;
 
 /* Clears all bits in the VMA flags bitmap, non-atomically. */
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..2d35cacffd19 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1888,7 +1888,8 @@ static inline void maybe_rmap_unlock_action(struct vm_area_struct *vma,
 
 	VM_WARN_ON_ONCE(vma_is_anonymous(vma));
 	file = vma->vm_file;
-	i_mmap_unlock_write(file->f_mapping);
+	i_mmap_tree_unlock_write(file->f_mapping, vma);
+	i_mmap_unlock_write_complete(file->f_mapping);
 	action->hide_from_rmap_until_complete = false;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index d714fdb357e5..70036ec9dcaa 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1825,15 +1825,20 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			struct address_space *mapping = file->f_mapping;
 
 			get_file(file);
-			i_mmap_lock_write(mapping);
+			i_mmap_lock_write_prepare(mapping);
+			i_mmap_tree_lock_write(mapping, mpnt);
+
 			if (vma_is_shared_maywrite(tmp))
 				mapping_allow_writable(mapping);
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			vma_interval_tree_insert_after(tmp, mpnt,
-					get_i_mmap_root(mapping));
+					get_rb_root(mpnt, mapping));
+			inc_mapping_vma(mapping, tmp);
 			flush_dcache_mmap_unlock(mapping);
-			i_mmap_unlock_write(mapping);
+
+			i_mmap_tree_unlock_write(mapping, mpnt);
+			i_mmap_unlock_write_complete(mapping);
 		}
 
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
diff --git a/mm/nommu.c b/mm/nommu.c
index 0f18ffc658e9..1f2c60a220f6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -567,11 +567,16 @@ static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
 	if (vma->vm_file) {
 		struct address_space *mapping = vma->vm_file->f_mapping;
 
-		i_mmap_lock_write(mapping);
+		i_mmap_lock_write_prepare(mapping);
+		i_mmap_tree_lock_write(mapping, vma);
+
 		flush_dcache_mmap_lock(mapping);
-		vma_interval_tree_insert(vma, get_i_mmap_root(mapping));
+		vma_interval_tree_insert(vma, get_rb_root(vma, mapping));
+		inc_mapping_vma(mapping, vma);
 		flush_dcache_mmap_unlock(mapping);
-		i_mmap_unlock_write(mapping);
+
+		i_mmap_tree_unlock_write(mapping, vma);
+		i_mmap_unlock_write_complete(mapping);
 	}
 }
 
@@ -583,11 +588,16 @@ static void cleanup_vma_from_mm(struct vm_area_struct *vma)
 		struct address_space *mapping;
 		mapping = vma->vm_file->f_mapping;
 
-		i_mmap_lock_write(mapping);
+		i_mmap_lock_write_prepare(mapping);
+		i_mmap_tree_lock_write(mapping, vma);
+
 		flush_dcache_mmap_lock(mapping);
-		vma_interval_tree_remove(vma, get_i_mmap_root(mapping));
+		vma_interval_tree_remove(vma, get_rb_root(vma, mapping));
+		dec_mapping_vma(mapping, vma);
 		flush_dcache_mmap_unlock(mapping);
-		i_mmap_unlock_write(mapping);
+
+		i_mmap_tree_unlock_write(mapping, vma);
+		i_mmap_unlock_write_complete(mapping);
 	}
 }
 
@@ -1063,6 +1073,7 @@ unsigned long do_mmap(struct file *file,
 	if (file) {
 		region->vm_file = get_file(file);
 		vma->vm_file = get_file(file);
+		vma_set_tree_idx(vma);
 	}
 
 	down_write(&nommu_region_sem);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 8df1b5077951..d5745519d95a 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -809,7 +809,7 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
 	if (!check_ops_safe(ops))
 		return -EINVAL;
 
-	lockdep_assert_held(&mapping->i_mmap_rwsem);
+	i_mmap_assert_locked(mapping);
 	vma_interval_tree_foreach(vma, get_i_mmap_root(mapping), first_index,
 				  first_index + nr - 1) {
 		/* Clip to the vma */
diff --git a/mm/vma.c b/mm/vma.c
index 6159650c1b42..2055758064a9 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -234,22 +234,23 @@ static void __vma_link_file(struct vm_area_struct *vma,
 		mapping_allow_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_insert(vma, get_i_mmap_root(mapping));
+	vma_interval_tree_insert(vma, get_rb_root(vma, mapping));
+	inc_mapping_vma(mapping, vma);
 	flush_dcache_mmap_unlock(mapping);
 }
 
-/*
- * Requires inode->i_mapping->i_mmap_rwsem
- */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 				      struct address_space *mapping)
 {
+	i_mmap_tree_lock_write(mapping, vma);
 	if (vma_is_shared_maywrite(vma))
 		mapping_unmap_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_interval_tree_remove(vma, get_i_mmap_root(mapping));
+	vma_interval_tree_remove(vma, get_rb_root(vma, mapping));
+	dec_mapping_vma(mapping, vma);
 	flush_dcache_mmap_unlock(mapping);
+	i_mmap_tree_unlock_write(mapping, vma);
 }
 
 /*
@@ -297,8 +298,9 @@ static void vma_prepare(struct vma_prepare *vp)
 			uprobe_munmap(vp->adj_next, vp->adj_next->vm_start,
 				      vp->adj_next->vm_end);
 
-		i_mmap_lock_write(vp->mapping);
+		i_mmap_lock_write_prepare(vp->mapping);
 		if (vp->insert && vp->insert->vm_file) {
+			i_mmap_tree_lock_write(vp->mapping, vp->insert);
 			/*
 			 * Put into interval tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
@@ -307,6 +309,7 @@ static void vma_prepare(struct vma_prepare *vp)
 			 */
 			__vma_link_file(vp->insert,
 					vp->insert->vm_file->f_mapping);
+			i_mmap_tree_unlock_write(vp->mapping, vp->insert);
 		}
 	}
 
@@ -318,12 +321,17 @@ static void vma_prepare(struct vma_prepare *vp)
 	}
 
 	if (vp->file) {
+		i_mmap_tree_lock_write(vp->mapping, vp->vma);
 		flush_dcache_mmap_lock(vp->mapping);
 		vma_interval_tree_remove(vp->vma,
-					get_i_mmap_root(vp->mapping));
-		if (vp->adj_next)
+					get_rb_root(vp->vma, vp->mapping));
+		dec_mapping_vma(vp->mapping, vp->vma);
+		if (vp->adj_next) {
+			i_mmap_tree_lock_write(vp->mapping, vp->adj_next);
 			vma_interval_tree_remove(vp->adj_next,
-					get_i_mmap_root(vp->mapping));
+					get_rb_root(vp->adj_next, vp->mapping));
+			dec_mapping_vma(vp->mapping, vp->adj_next);
+		}
 	}
 
 }
@@ -340,12 +348,17 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
 			 struct mm_struct *mm)
 {
 	if (vp->file) {
-		if (vp->adj_next)
+		if (vp->adj_next) {
 			vma_interval_tree_insert(vp->adj_next,
-					get_i_mmap_root(vp->mapping));
+					get_rb_root(vp->adj_next, vp->mapping));
+			inc_mapping_vma(vp->mapping, vp->adj_next);
+			i_mmap_tree_unlock_write(vp->mapping, vp->adj_next);
+		}
 		vma_interval_tree_insert(vp->vma,
-					get_i_mmap_root(vp->mapping));
+					get_rb_root(vp->vma, vp->mapping));
+		inc_mapping_vma(vp->mapping, vp->vma);
 		flush_dcache_mmap_unlock(vp->mapping);
+		i_mmap_tree_unlock_write(vp->mapping, vp->vma);
 	}
 
 	if (vp->remove && vp->file) {
@@ -370,7 +383,7 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
 	}
 
 	if (vp->file) {
-		i_mmap_unlock_write(vp->mapping);
+		i_mmap_unlock_write_complete(vp->mapping);
 
 		if (!vp->skip_vma_uprobe) {
 			uprobe_mmap(vp->vma);
@@ -1799,12 +1812,12 @@ static void unlink_file_vma_batch_process(struct unlink_vma_file_batch *vb)
 	int i;
 
 	mapping = vb->vmas[0]->vm_file->f_mapping;
-	i_mmap_lock_write(mapping);
+	i_mmap_lock_write_prepare(mapping);
 	for (i = 0; i < vb->count; i++) {
 		VM_WARN_ON_ONCE(vb->vmas[i]->vm_file->f_mapping != mapping);
 		__remove_shared_vm_struct(vb->vmas[i], mapping);
 	}
-	i_mmap_unlock_write(mapping);
+	i_mmap_unlock_write_complete(mapping);
 
 	unlink_file_vma_batch_init(vb);
 }
@@ -1836,10 +1849,13 @@ static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock)
 
 	if (file) {
 		mapping = file->f_mapping;
-		i_mmap_lock_write(mapping);
+		i_mmap_lock_write_prepare(mapping);
+		i_mmap_tree_lock_write(mapping, vma);
 		__vma_link_file(vma, mapping);
-		if (!hold_rmap_lock)
-			i_mmap_unlock_write(mapping);
+		if (!hold_rmap_lock) {
+			i_mmap_tree_unlock_write(mapping, vma);
+			i_mmap_unlock_write_complete(mapping);
+		}
 	}
 }
 
@@ -2164,6 +2180,23 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
 	}
 }
 
+#ifdef CONFIG_SPLIT_I_MMAP
+static inline void i_mmap_nest_lock(struct address_space *mapping,
+				struct rw_semaphore *lock)
+{
+	int i;
+
+	for (i = 0; i < split_tree_num; i++)
+		down_write_nest_lock(&mapping->i_mmap[i]->rwsem, lock);
+}
+#else
+static inline void i_mmap_nest_lock(struct address_space *mapping,
+				struct rw_semaphore *lock)
+{
+	down_write_nest_lock(&mapping->i_mmap_rwsem, lock);
+}
+#endif
+
 static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 {
 	if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
@@ -2178,7 +2211,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 		 */
 		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
 			BUG();
-		down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
+		i_mmap_nest_lock(mapping, &mm->mmap_lock);
 	}
 }
 
@@ -2489,6 +2522,7 @@ static int __mmap_new_file_vma(struct mmap_state *map,
 	int error;
 
 	vma->vm_file = map->file;
+	vma_set_tree_idx(vma);
 	if (!map->file_doesnt_need_get)
 		get_file(map->file);
 
diff --git a/mm/vma_init.c b/mm/vma_init.c
index 3c0b65950510..c115e33d4812 100644
--- a/mm/vma_init.c
+++ b/mm/vma_init.c
@@ -72,6 +72,9 @@ static void vm_area_init_from(const struct vm_area_struct *src,
 #ifdef CONFIG_NUMA
 	dest->vm_policy = src->vm_policy;
 #endif
+#ifdef CONFIG_SPLIT_I_MMAP
+	dest->tree_idx = src->tree_idx;
+#endif
 #ifdef __HAVE_PFNMAP_TRACKING
 	dest->pfnmap_track_ctx = NULL;
 #endif
-- 
2.53.0



^ permalink raw reply related

* Re: [PATCH net-next V3 2/7] netdevsim: Register devlink after device init
From: Mark Bloch @ 2026-06-11  6:02 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Eric Dumazet, Paolo Abeni, Andrew Lunn, David S. Miller,
	Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
	Sunil Goutham, Linu Cherian, Geetha sowjanya, hariprasad,
	Subbaraya Sundeep, Bharat Bhushan, Saeed Mahameed,
	Leon Romanovsky, Tariq Toukan, Ethan Nelson-Moore, linux-doc,
	netdev, linux-rdma
In-Reply-To: <20260610165053.7c91f331@kernel.org>



On 11/06/2026 2:50, Jakub Kicinski wrote:
> On Fri, 5 Jun 2026 21:10:25 +0300 Mark Bloch wrote:
>> devl_register() makes the devlink instance visible to userspace. A later
>> patch also makes registration the point where devlink core may call
>> eswitch_mode_set() to apply a boot-time default eswitch mode.
>>
>> Move netdevsim registration after all objects (resources, params, regions,
>> traps, debugfs etc) are initialized, and after the initial eswitch mode is
>> set to legacy.
>>
>> Move devl_unregister() to the beginning of nsim_drv_remove(), before those
>> devlink objects are torn down. This keeps devlink register/unregister as
>> the notification barrier and makes the later object teardown paths run
>> after devlink is no longer registered, so they do not emit their own
>> netlink DEL notifications.
> 
> This is going backwards. At some point someone from nVidia thought that
> we can order our way out of locking, so mlx5 is likely ordered this way,
> but this must not be required, or in any way normalized.
> We (syzbot) quickly discovered that it doesn't cover all corner cases.
> devl_lock() is exposed specifically to allow the driver to finish
> whatever init it needs without letting user space invoke callbacks, yet.
> Almost (?) all driver callbacks hold devl_lock(), so maybe the devlink
> instance is "visible" to user space but that should not matter.

Let me clarify.

No locking is changed here, and I don't want to make register/unregister
ordering a substitute for devl_lock().

The only requirement I have for this series is that devl_register() is called
only once the driver is ready for devlink core to call eswitch_mode_set().
That follows from the earlier direction to have the core apply the default
mode from devl_register() instead of adding an explicit driver call.

So if the objection is to the commit message wording, I can fix that and drop
the "notification barrier" language.

For unregister, I can probably leave the old ordering as-is. I moved it only
to mirror the register path, which felt cleaner, but it is not required for
the default-mode change and as the lock is held I see no issue with doing
that.

Mark


^ permalink raw reply

* [PATCH v3] kconfig: add optional warnings for changed input values
From: Pengpeng Hou @ 2026-06-11  6:00 UTC (permalink / raw)
  To: Masahiro Yamada, Nathan Chancellor, Nicolas Schier
  Cc: Jonathan Corbet, linux-kbuild, linux-doc, linux-kernel,
	Pengpeng Hou

When reading .config input, Kconfig stores user-provided values first
and then resolves the final value after applying dependencies, ranges,
and other constraints.

If the final value differs from the user input, Kconfig already tracks
that state internally, but it does not provide a focused diagnostic to
show which explicit inputs were adjusted. This is particularly confusing
for requested values that get forced down by unmet dependencies or
clamped by ranges.

Add an opt-in diagnostic controlled by KCONFIG_WARN_CHANGED_INPUT. Emit
the warnings from conf_write() and conf_write_defconfig() after value
resolution. Print the diagnostic to stderr directly, not through the
normal message callback, so it remains visible when conf is run with -s,
such as from make -s.

Keep the diagnostic out of the conf_message() formatting buffer so long
warning lists are not truncated, and mark processed symbols as written
before the SYMBOL_WRITE check so duplicate menu nodes cannot emit
duplicate warnings.

Document the new environment variable and add tests for olddefconfig,
savedefconfig, and the silent-conf path.

Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
---
Changes since v2:
https://lore.kernel.org/all/20260521022824.38591-1-pengpeng@iscas.ac.cn/
- print the changed-input diagnostic to stderr directly so it remains
  visible when conf is run with -s, as reported by Nathan
- move the selftest expectation from stdout to stderr
- add explicit silent-conf coverage for the KCONFIG_WARN_CHANGED_INPUT
  warning path
- keep Nicolas's v2 Reviewed-by/Tested-by out of this revision because
  the warning output path changed

 Documentation/kbuild/kconfig.rst              |   5 +
 scripts/kconfig/confdata.c                    | 106 +++++++++++++++++-
 scripts/kconfig/tests/conftest.py             |   8 +-
 .../kconfig/tests/warn_changed_input/Kconfig  |  40 +++++++
 .../tests/warn_changed_input/__init__.py      |  33 ++++++
 .../kconfig/tests/warn_changed_input/config   |   3 +
 .../tests/warn_changed_input/expected_config  |   6 +
 .../warn_changed_input/expected_defconfig     |   1 +
 .../tests/warn_changed_input/expected_stderr  |   4 +
 9 files changed, 200 insertions(+), 6 deletions(-)
 create mode 100644 scripts/kconfig/tests/warn_changed_input/Kconfig
 create mode 100644 scripts/kconfig/tests/warn_changed_input/__init__.py
 create mode 100644 scripts/kconfig/tests/warn_changed_input/config
 create mode 100644 scripts/kconfig/tests/warn_changed_input/expected_config
 create mode 100644 scripts/kconfig/tests/warn_changed_input/expected_defconfig
 create mode 100644 scripts/kconfig/tests/warn_changed_input/expected_stderr

diff --git a/Documentation/kbuild/kconfig.rst b/Documentation/kbuild/kconfig.rst
index fc4e845bc249..9b2625c768f0 100644
--- a/Documentation/kbuild/kconfig.rst
+++ b/Documentation/kbuild/kconfig.rst
@@ -59,6 +59,11 @@ Environment variables for ``*config``:
     This environment variable makes Kconfig warn about all unrecognized
     symbols in the config input.
 
+``KCONFIG_WARN_CHANGED_INPUT``
+    If set to a non-blank value, Kconfig prints optional warnings for
+    user-provided values that change after Kconfig resolves dependencies
+    or applies other constraints such as ranges.
+
 ``KCONFIG_WERROR``
     If set, Kconfig treats warnings as errors.
 
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index ac95661a1c9d..34be06d4b563 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -206,6 +206,78 @@ static void conf_message(const char *fmt, ...)
 	va_end(ap);
 }
 
+static void conf_changed_input_warning(const char *s)
+{
+	fputs(s, stderr);
+}
+
+static bool conf_warn_changed_input_enabled(void)
+{
+	const char *env = getenv("KCONFIG_WARN_CHANGED_INPUT");
+
+	return env && *env;
+}
+
+static const char *sym_get_user_value_string(struct symbol *sym)
+{
+	switch (sym->type) {
+	case S_BOOLEAN:
+	case S_TRISTATE:
+		switch (sym->def[S_DEF_USER].tri) {
+		case yes:
+			return "y";
+		case mod:
+			return "m";
+		default:
+			return "n";
+		}
+	default:
+		return sym->def[S_DEF_USER].val ?: "";
+	}
+}
+
+static bool sym_user_value_changed(struct symbol *sym)
+{
+	if (!sym_has_value(sym) || sym->type == S_UNKNOWN)
+		return false;
+
+	switch (sym->type) {
+	case S_BOOLEAN:
+	case S_TRISTATE:
+		return sym->def[S_DEF_USER].tri != sym_get_tristate_value(sym);
+	default:
+		return strcmp(sym_get_user_value_string(sym),
+			      sym_get_string_value(sym));
+	}
+}
+
+static void conf_clear_written_flags(void)
+{
+	struct symbol *sym;
+
+	for_all_symbols(sym)
+		sym->flags &= ~SYMBOL_WRITTEN;
+}
+
+static void conf_append_changed_input_warning(struct gstr *gs,
+					      struct symbol *sym,
+					      bool *changed_input_found)
+{
+	if (!sym_user_value_changed(sym))
+		return;
+
+	if (!*changed_input_found) {
+		str_printf(gs,
+			   "warning: user-provided values changed by Kconfig:\n");
+		*changed_input_found = true;
+	}
+
+	str_printf(gs, "  %s%s: %s -> %s\n",
+		   CONFIG_, sym->name,
+		   sym_get_user_value_string(sym),
+		   sym_get_string_value(sym));
+}
+
 const char *conf_get_configname(void)
 {
 	char *name = getenv("KCONFIG_CONFIG");
@@ -759,11 +831,15 @@ int conf_write_defconfig(const char *filename)
 {
 	struct symbol *sym;
 	struct menu *menu;
+	struct gstr gs;
 	FILE *out;
+	bool warn_changed_input = conf_warn_changed_input_enabled();
+	bool changed_input_found = false;
 
 	out = fopen(filename, "w");
 	if (!out)
 		return 1;
+	gs = str_new();
 
 	sym_clear_all_valid();
 
@@ -772,10 +848,14 @@ int conf_write_defconfig(const char *filename)
 
 		sym = menu->sym;
 
-		if (!sym || sym_is_choice(sym))
+		if (!sym || sym_is_choice(sym) || sym->flags & SYMBOL_WRITTEN)
 			continue;
 
 		sym_calc_value(sym);
+		if (warn_changed_input)
+			conf_append_changed_input_warning(&gs, sym,
+							  &changed_input_found);
+		sym->flags |= SYMBOL_WRITTEN;
 		if (!(sym->flags & SYMBOL_WRITE))
 			continue;
 		sym->flags &= ~SYMBOL_WRITE;
@@ -798,6 +878,13 @@ int conf_write_defconfig(const char *filename)
 		print_symbol_for_dotconfig(out, sym);
 	}
 	fclose(out);
+
+	conf_clear_written_flags();
+
+	if (changed_input_found)
+		conf_changed_input_warning(str_get(&gs));
+
+	str_free(&gs);
 	return 0;
 }
 
@@ -809,7 +896,10 @@ int conf_write(const char *name)
 	const char *str;
 	char tmpname[PATH_MAX + 1], oldname[PATH_MAX + 1];
 	char *env;
+	struct gstr gs;
 	bool need_newline = false;
+	bool warn_changed_input = conf_warn_changed_input_enabled();
+	bool changed_input_found = false;
 
 	if (!name)
 		name = conf_get_configname();
@@ -838,6 +928,7 @@ int conf_write(const char *name)
 	}
 	if (!out)
 		return 1;
+	gs = str_new();
 
 	conf_write_heading(out, &comment_style_pound);
 
@@ -859,13 +950,16 @@ int conf_write(const char *name)
 		} else if (!sym_is_choice(sym) &&
 			   !(sym->flags & SYMBOL_WRITTEN)) {
 			sym_calc_value(sym);
+			if (warn_changed_input)
+				conf_append_changed_input_warning(&gs, sym,
+								  &changed_input_found);
+			sym->flags |= SYMBOL_WRITTEN;
 			if (!(sym->flags & SYMBOL_WRITE))
 				goto next;
 			if (need_newline) {
 				fprintf(out, "\n");
 				need_newline = false;
 			}
-			sym->flags |= SYMBOL_WRITTEN;
 			print_symbol_for_dotconfig(out, sym);
 		}
 
@@ -892,8 +986,12 @@ int conf_write(const char *name)
 	}
 	fclose(out);
 
-	for_all_symbols(sym)
-		sym->flags &= ~SYMBOL_WRITTEN;
+	conf_clear_written_flags();
+
+	if (changed_input_found)
+		conf_changed_input_warning(str_get(&gs));
+
+	str_free(&gs);
 
 	if (*tmpname) {
 		if (is_same(name, tmpname)) {
diff --git a/scripts/kconfig/tests/conftest.py b/scripts/kconfig/tests/conftest.py
index 2a2a7e2da060..87860b1bfd9f 100644
--- a/scripts/kconfig/tests/conftest.py
+++ b/scripts/kconfig/tests/conftest.py
@@ -37,7 +37,8 @@ class Conf:
 
     # runners
     def _run_conf(self, mode, dot_config=None, out_file='.config',
-                  interactive=False, in_keys=None, extra_env={}):
+                  interactive=False, in_keys=None, extra_env={},
+                  silent=False):
         """Run text-based Kconfig executable and save the result.
 
         mode: input mode option (--oldaskconfig, --defconfig=<file> etc.)
@@ -48,7 +49,10 @@ class Conf:
         extra_env: additional environments
         returncode: exit status of the Kconfig executable
         """
-        command = [CONF_PATH, mode, 'Kconfig']
+        command = [CONF_PATH]
+        if silent:
+            command.append('-s')
+        command += [mode, 'Kconfig']
 
         # Override 'srctree' environment to make the test as the top directory
         extra_env['srctree'] = self._test_dir
diff --git a/scripts/kconfig/tests/warn_changed_input/Kconfig b/scripts/kconfig/tests/warn_changed_input/Kconfig
new file mode 100644
index 000000000000..69845e2f3fb3
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DEP
+	bool "DEP"
+	help
+	  Test dependency symbol for Kconfig warning coverage.
+	  This is used by the warn_changed_input selftest.
+	  It intentionally stays unset in the input fragment.
+	  The test checks how dependent user input is adjusted.
+
+config A
+	bool "A"
+	depends on DEP
+	help
+	  Test bool symbol for changed-input diagnostics.
+	  The input fragment requests this symbol as built-in.
+	  The unmet dependency on DEP forces the final value to n.
+	  The warning should report that downgrade.
+
+config NUM
+	int "NUM"
+	range 10 20
+	help
+	  Test integer symbol for changed-input diagnostics.
+	  The input fragment requests a value outside the allowed range.
+	  Kconfig resolves it to the constrained in-range value.
+	  The warning should report that adjustment.
+
+config DUP
+	bool "DUP"
+	depends on DEP
+	help
+	  Test duplicate-definition handling for changed-input diagnostics.
+	  The input fragment requests this symbol as built-in.
+	  The duplicate definition below must not produce a duplicate warning.
+	  This keeps the warning output stable for repeated menu entries.
+
+config DUP
+	bool
+	depends on DEP
diff --git a/scripts/kconfig/tests/warn_changed_input/__init__.py b/scripts/kconfig/tests/warn_changed_input/__init__.py
new file mode 100644
index 000000000000..4c3bca6af846
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/__init__.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+"""
+Test optional warnings for user-provided values changed by Kconfig.
+
+Warnings should stay disabled by default, and should only appear when
+KCONFIG_WARN_CHANGED_INPUT is enabled.
+"""
+
+
+def test(conf):
+    assert conf.olddefconfig('config') == 0
+    assert 'user-provided values changed by Kconfig' not in conf.stderr
+
+    assert conf._run_conf('--olddefconfig', dot_config='config',
+                          extra_env={
+                              'KCONFIG_WARN_CHANGED_INPUT': '1',
+                          }) == 0
+    assert conf.stderr_contains('expected_stderr')
+    assert conf.config_matches('expected_config')
+
+    assert conf._run_conf('--olddefconfig', dot_config='config',
+                          extra_env={
+                              'KCONFIG_WARN_CHANGED_INPUT': '1',
+                          }, silent=True) == 0
+    assert conf.stderr_contains('expected_stderr')
+
+    assert conf._run_conf('--savedefconfig=defconfig', dot_config='config',
+                          out_file='defconfig',
+                          extra_env={
+                              'KCONFIG_WARN_CHANGED_INPUT': '1',
+                          }) == 0
+    assert conf.stderr_contains('expected_stderr')
+    assert conf.config_matches('expected_defconfig')
diff --git a/scripts/kconfig/tests/warn_changed_input/config b/scripts/kconfig/tests/warn_changed_input/config
new file mode 100644
index 000000000000..dbe93ff26408
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/config
@@ -0,0 +1,3 @@
+CONFIG_A=y
+CONFIG_NUM=30
+CONFIG_DUP=y
diff --git a/scripts/kconfig/tests/warn_changed_input/expected_config b/scripts/kconfig/tests/warn_changed_input/expected_config
new file mode 100644
index 000000000000..fe8bbec66c53
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/expected_config
@@ -0,0 +1,6 @@
+#
+# Automatically generated file; DO NOT EDIT.
+# Main menu
+#
+# CONFIG_DEP is not set
+CONFIG_NUM=20
diff --git a/scripts/kconfig/tests/warn_changed_input/expected_defconfig b/scripts/kconfig/tests/warn_changed_input/expected_defconfig
new file mode 100644
index 000000000000..af9e34851d2a
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/expected_defconfig
@@ -0,0 +1 @@
+CONFIG_NUM=20
diff --git a/scripts/kconfig/tests/warn_changed_input/expected_stderr b/scripts/kconfig/tests/warn_changed_input/expected_stderr
new file mode 100644
index 000000000000..9ec8446b4ac2
--- /dev/null
+++ b/scripts/kconfig/tests/warn_changed_input/expected_stderr
@@ -0,0 +1,4 @@
+warning: user-provided values changed by Kconfig:
+  CONFIG_A: y -> n
+  CONFIG_NUM: 30 -> 20
+  CONFIG_DUP: y -> n
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* Re: [RFC V2 1/3] lib/vsprintf: Add support for pgtable entries
From: Anshuman Khandual @ 2026-06-11  5:15 UTC (permalink / raw)
  To: Usama Arif
  Cc: linux-mm, Andy Shevchenko, Rasmus Villemoes, Sergey Senozhatsky,
	Petr Mladek, Steven Rostedt, Jonathan Corbet, Andrew Morton,
	David Hildenbrand, linux-kernel, linux-doc, David Hildenbrand,
	Lorenzo Stoakes, Andy Whitcroft
In-Reply-To: <20260610111339.2465922-1-usama.arif@linux.dev>



On 10/06/26 4:43 PM, Usama Arif wrote:
> On Wed, 10 Jun 2026 05:35:43 +0100 Anshuman Khandual <anshuman.khandual@arm.com> wrote:
> 
>> Add some print formats for pgtable entries at any pgtable level. These new
>> formats are %pp[g|4|u|m|t][d|e] i.e %ppgd, %pp4d, %ppud, %ppmd, and %ppte.
>> These currently support both 32 bit and 64 bit pgtable entries that can be
>> extended up to 128 bit when required.
>>
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: David Hildenbrand <david@kernel.org>
>> Cc: Lorenzo Stoakes <ljs@kernel.org>
>> Cc: Petr Mladek <pmladek@suse.com>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Jonathan Corbet <corbet@lwn.net>
>> Cc: Andy Whitcroft <apw@canonical.com>
>> Cc: linux-mm@kvack.org
>> Cc: linux-kernel@vger.kernel.org
>> Cc: linux-doc@vger.kernel.org
>>
>>  Documentation/core-api/printk-formats.rst | 19 ++++++++
>>  lib/vsprintf.c                            | 58 +++++++++++++++++++++++
>>  scripts/checkpatch.pl                     |  2 +-
>>  3 files changed, 78 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
>> index c0b1b6089307..e69f91a9dd9d 100644
>> --- a/Documentation/core-api/printk-formats.rst
>> +++ b/Documentation/core-api/printk-formats.rst
>> @@ -696,6 +696,25 @@ Rust
>>  Only intended to be used from Rust code to format ``core::fmt::Arguments``.
>>  Do *not* use it from C.
>>  
>> +Page Table Entry
>> +----------------
>> +
>> +::
>> +
>> +        %p[pgd|p4dp|pud|pmd|pte]
> 
> s/p4dp/p4d to match others

Will fix.
> 
> 
>> +
>> +Print page table entry at any level.
>> +
>> +Passed by reference.
>> +
>> +Examples for a 64 bit page table entry, given &(u64)0xc0ffee::
>> +
>> +        %ppte   0x0000000000c0ffee
>> +        %ppmd   0x0000000000c0ffee
>> +        %ppud   0x0000000000c0ffee
>> +        %pp4d   0x0000000000c0ffee
>> +        %ppgd   0x0000000000c0ffee
>> +
>>  Thanks
>>  ======
>>  
>> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
>> index 9f359b31c8d1..d4ad3048a4db 100644
>> --- a/lib/vsprintf.c
>> +++ b/lib/vsprintf.c
>> @@ -856,6 +856,59 @@ static char *default_pointer(char *buf, char *end, const void *ptr,
>>  	return ptr_to_id(buf, end, ptr, spec);
>>  }
>>  
>> +static char *pxd_pointer(char *buf, char *end, const void *ptr,
>> +			 struct printf_spec spec, const char *fmt)
>> +{
>> +	if (check_pointer(&buf, end, ptr, spec))
>> +		return buf;
>> +
>> +	if (fmt[1] == 't' && fmt[2] == 'e') {
>> +		pte_t *pte = (pte_t *)ptr;
>> +
>> +		static_assert(sizeof(pte_t) == 4 ||
>> +			      sizeof(pte_t) == 8,
>> +			      "pte_t size must be 4 or 8 bytes");
>> +		return special_hex_number(buf, end, pte_val(ptep_get(pte)), sizeof(pte_t));
>> +	}
>> +
>> +	if (fmt[1] == 'm' && fmt[2] == 'd') {
>> +		pmd_t *pmd = (pmd_t *)ptr;
>> +
>> +		static_assert(sizeof(pmd_t) == 4 ||
>> +			      sizeof(pmd_t) == 8,
>> +			      "pmd_t size must be 4 or 8 bytes");
>> +		return special_hex_number(buf, end, pmd_val(pmdp_get(pmd)), sizeof(pmd_t));
>> +	}
>> +
>> +	if (fmt[1] == 'u' && fmt[2] == 'd') {
>> +		pud_t *pud = (pud_t *)ptr;
>> +
>> +		static_assert(sizeof(pud_t) == 4 ||
>> +			      sizeof(pud_t) == 8,
>> +			      "pud_t size must be 4 or 8 bytes");
>> +		return special_hex_number(buf, end, pud_val(pudp_get(pud)), sizeof(pud_t));
>> +	}
>> +
>> +	if (fmt[1] == '4' && fmt[2] == 'd') {
>> +		p4d_t *p4d = (p4d_t *)ptr;
>> +
>> +		static_assert(sizeof(p4d_t) == 4 ||
>> +			      sizeof(p4d_t) == 8,
>> +			      "p4d_t size must be 4 or 8 bytes");
>> +		return special_hex_number(buf, end, p4d_val(p4dp_get(p4d)), sizeof(p4d_t));
>> +	}
>> +
>> +	if (fmt[1] == 'g' && fmt[2] == 'd') {
>> +		pgd_t *pgd = (pgd_t *)ptr;
>> +
>> +		static_assert(sizeof(pgd_t) == 4 ||
>> +			      sizeof(pgd_t) == 8,
>> +			      "pgd_t size must be 4 or 8 bytes");
>> +		return special_hex_number(buf, end, pgd_val(pgdp_get(pgd)), sizeof(pgd_t));
> 
> You mentioned in the coverletter that pgdp_get() is the reason arm32 builds dont work.
> Just wanted to check what the issue is?
> 
> I had a look at arch/arm/include/asm/pgtable.h and I couldnt understand why
> it reads pgdp_get(pgpd) instead of pgdp_get(pgdp)?

Right - that's a typo.

Following arm32 pgtable header change enables the build.

diff --git a/arch/arm/include/asm/pgtable-2level-types.h b/arch/arm/include/asm/pgtable-2level-types.h
index 650e793f4142..3f1d52402129 100644
--- a/arch/arm/include/asm/pgtable-2level-types.h
+++ b/arch/arm/include/asm/pgtable-2level-types.h
@@ -31,6 +31,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 #define __pte(x)        ((pte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
 #define __pgprot(x)     ((pgprot_t) { (x) } )
+#define __pgd(x)        ((pgd_t) { { (x), 0 } })

 #else
 /*
@@ -49,6 +50,7 @@ typedef pteval_t pgprot_t;
 #define __pte(x)        (x)
 #define __pmd(x)        (x)
 #define __pgprot(x)     (x)
+#define __pgd(x)        { (x), 0 }

 #endif /* STRICT_MM_TYPECHECKS */

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 982795cf4563..349e1f819385 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -141,7 +141,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,

 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];

-#define pgdp_get(pgpd)         READ_ONCE(*pgdp)
+#define pgdp_get(pgdp)         READ_ONCE(*pgdp)

 #define pud_page(pud)          pmd_page(__pmd(pud_val(pud)))
 #define pud_write(pud)         pmd_write(__pmd(pud_val(pud)))
>    
> 
>> +	}
>> +	return default_pointer(buf, end, ptr, spec);
>> +}
>> +
>>  int kptr_restrict __read_mostly;
>>  
>>  static noinline_for_stack
>> @@ -2506,6 +2559,9 @@ early_param("no_hash_pointers", no_hash_pointers_enable);
>>   *		Without an option prints the full name of the node
>>   *		f full name
>>   *		P node name, including a possible unit address
>> + * - 'p[g|4|u|m|t|][d|e]' For a page table entry, this prints its
>> + *			  contents in a hexadecimal format
>> + *
>>   * - 'x' For printing the address unmodified. Equivalent to "%lx".
>>   *       Please read the documentation (path below) before using!
>>   * - '[ku]s' For a BPF/tracing related format specifier, e.g. used out of
>> @@ -2615,6 +2671,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
>>  		default:
>>  			return error_string(buf, end, "(einval)", spec);
>>  		}
>> +	case 'p':
>> +		return pxd_pointer(buf, end, ptr, spec, fmt);
>>  	default:
>>  		return default_pointer(buf, end, ptr, spec);
>>  	}
>> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
>> index 0492d6afc9a1..f68955858e29 100755
>> --- a/scripts/checkpatch.pl
>> +++ b/scripts/checkpatch.pl
>> @@ -6975,7 +6975,7 @@ sub process {
>>  				my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0));
>>  				$fmt =~ s/%%//g;
>>  
>> -				while ($fmt =~ /(\%[\*\d\.]*p(\w)(\w*))/g) {
>> +				while ($fmt =~ /(\%[\*\d\.]*p(\w)(\w*)(pte|pmd|pud|p4d|pgd))/g) {
>>  					$specifier = $1;
>>  					$extension = $2;
>>  					$qualifier = $3;
>> -- 
>> 2.30.2
>>
>>


^ permalink raw reply related

* [PATCH v18 net-next 10/11] net/nebula-matrix: add common/ctrl dev init/reinit operation
From: illusion.wang @ 2026-06-11  4:49 UTC (permalink / raw)
  To: dimon.zhao, illusion.wang, alvin.wang, sam.chen, netdev
  Cc: andrew+netdev, corbet, kuba, horms, linux-doc, pabeni,
	vadim.fedorenko, lukas.bulwahn, edumazet, enelsonmoore, skhan,
	hkallweit1, open list
In-Reply-To: <20260611044916.2383-1-illusion.wang@nebula-matrix.com>

Common Device Setup: nbl_dev_setup_common_dev configures mailbox queues,
registers cleanup tasks, and MSI-X interrupt counter initialization.
Control Device Setup (optional): nbl_dev_setup_ctrl_dev initializes
the chip and configures all channel queues.

Signed-off-by: illusion.wang <illusion.wang@nebula-matrix.com>
---
 .../nebula-matrix/nbl/nbl_core/nbl_dev.c      | 173 ++++++++++++++++++
 .../nebula-matrix/nbl/nbl_core/nbl_dev.h      |  31 ++++
 2 files changed, 204 insertions(+)

diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
index 5deb21e35f8e..b520b9e922dd 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
@@ -6,6 +6,160 @@
 #include <linux/pci.h>
 #include "nbl_dev.h"
 
+static void nbl_dev_init_msix_cnt(struct nbl_dev_mgt *dev_mgt)
+{
+	struct nbl_dev_common *dev_common = dev_mgt->common_dev;
+	struct nbl_msix_info *msix_info = &dev_common->msix_info;
+
+	msix_info->serv_info[NBL_MSIX_MAILBOX_TYPE].num = 1;
+}
+
+/* ----------  Channel config  ---------- */
+static void nbl_dev_setup_chan_qinfo(struct nbl_dev_mgt *dev_mgt, u8 chan_type)
+{
+	struct nbl_channel_ops *chan_ops = dev_mgt->chan_ops_tbl->ops;
+	struct nbl_channel_mgt *priv = dev_mgt->chan_ops_tbl->priv;
+
+	if (!chan_ops->check_queue_exist(priv, chan_type))
+		return;
+
+	chan_ops->cfg_chan_qinfo_map_table(priv);
+}
+
+static int nbl_dev_setup_chan_queue(struct nbl_dev_mgt *dev_mgt, u8 chan_type)
+{
+	struct nbl_channel_ops *chan_ops = dev_mgt->chan_ops_tbl->ops;
+	struct nbl_channel_mgt *priv = dev_mgt->chan_ops_tbl->priv;
+	int ret = 0;
+
+	if (chan_ops->check_queue_exist(priv, chan_type))
+		ret = chan_ops->setup_queue(priv, chan_type);
+
+	return ret;
+}
+
+static int nbl_dev_remove_chan_queue(struct nbl_dev_mgt *dev_mgt, u8 chan_type)
+{
+	struct nbl_channel_ops *chan_ops = dev_mgt->chan_ops_tbl->ops;
+	struct nbl_channel_mgt *priv = dev_mgt->chan_ops_tbl->priv;
+	int ret = 0;
+
+	if (chan_ops->check_queue_exist(priv, chan_type))
+		ret = chan_ops->teardown_queue(priv, chan_type);
+
+	return ret;
+}
+
+static void nbl_dev_register_chan_task(struct nbl_dev_mgt *dev_mgt,
+				       u8 chan_type, struct work_struct *task)
+{
+	struct nbl_channel_ops *chan_ops = dev_mgt->chan_ops_tbl->ops;
+
+	if (chan_ops->check_queue_exist(dev_mgt->chan_ops_tbl->priv, chan_type))
+		chan_ops->register_chan_task(dev_mgt->chan_ops_tbl->priv,
+					     chan_type, task);
+}
+
+/* ----------  Tasks config  ---------- */
+static void nbl_dev_clean_mailbox_task(struct work_struct *work)
+{
+	struct nbl_dev_common *common_dev =
+		container_of(work, struct nbl_dev_common, clean_mbx_task);
+	struct nbl_dev_mgt *dev_mgt = common_dev->dev_mgt;
+	struct nbl_channel_ops *chan_ops = dev_mgt->chan_ops_tbl->ops;
+
+	chan_ops->clean_queue_subtask(dev_mgt->chan_ops_tbl->priv,
+				      NBL_CHAN_TYPE_MAILBOX);
+}
+
+/* ----------  Dev init process  ---------- */
+static int nbl_dev_setup_common_dev(struct nbl_adapter *adapter)
+{
+	struct nbl_dev_mgt *dev_mgt = adapter->core.dev_mgt;
+	struct nbl_dispatch_ops *disp_ops = dev_mgt->disp_ops_tbl->ops;
+	struct nbl_dispatch_mgt *priv = dev_mgt->disp_ops_tbl->priv;
+	struct nbl_common_info *common = dev_mgt->common;
+	struct nbl_dev_common *common_dev;
+	int ret;
+
+	common_dev = devm_kzalloc(&adapter->pdev->dev, sizeof(*common_dev),
+				  GFP_KERNEL);
+	if (!common_dev)
+		return -ENOMEM;
+	common_dev->dev_mgt = dev_mgt;
+
+	ret = nbl_dev_setup_chan_queue(dev_mgt, NBL_CHAN_TYPE_MAILBOX);
+	if (ret)
+		return ret;
+
+	INIT_WORK(&common_dev->clean_mbx_task, nbl_dev_clean_mailbox_task);
+	common->vsi_id = disp_ops->get_vsi_id(priv, NBL_VSI_DATA);
+	if (common->vsi_id == U16_MAX) {
+		ret = -ENOENT;
+		goto err_cleanup;
+	}
+	ret = disp_ops->get_eth_id(priv, common->vsi_id, &common->eth_num,
+			     &common->eth_id, &common->logic_eth_id);
+	if (ret)
+		goto err_cleanup;
+	nbl_dev_register_chan_task(dev_mgt, NBL_CHAN_TYPE_MAILBOX,
+				   &common_dev->clean_mbx_task);
+
+	dev_mgt->common_dev = common_dev;
+	nbl_dev_init_msix_cnt(dev_mgt);
+
+	return 0;
+err_cleanup:
+	nbl_dev_remove_chan_queue(dev_mgt, NBL_CHAN_TYPE_MAILBOX);
+	return ret;
+}
+
+static void nbl_dev_remove_common_dev(struct nbl_adapter *adapter)
+{
+	struct nbl_dev_mgt *dev_mgt = adapter->core.dev_mgt;
+	struct nbl_dev_common *common_dev = dev_mgt->common_dev;
+
+	if (!common_dev)
+		return;
+	nbl_dev_register_chan_task(dev_mgt, NBL_CHAN_TYPE_MAILBOX, NULL);
+	cancel_work_sync(&common_dev->clean_mbx_task);
+	nbl_dev_remove_chan_queue(dev_mgt, NBL_CHAN_TYPE_MAILBOX);
+}
+
+static int nbl_dev_setup_ctrl_dev(struct nbl_adapter *adapter)
+{
+	struct nbl_dev_mgt *dev_mgt = adapter->core.dev_mgt;
+	struct nbl_dispatch_ops *disp_ops = dev_mgt->disp_ops_tbl->ops;
+	int ret;
+
+	ret = disp_ops->init_chip_module(dev_mgt->disp_ops_tbl->priv);
+	if (ret)
+		return ret;
+
+	nbl_dev_setup_chan_qinfo(dev_mgt, NBL_CHAN_TYPE_MAILBOX);
+
+	return 0;
+}
+
+/*
+ * This is intentional.  The qinfo registers are managed by the chip
+ * firmware, not by the driver.  Setting driver status to false is the
+ * designed teardown mechanism — it notifies the firmware, which then
+ * performs its own cleanup of all per-PF state including the qinfo
+ * registers.
+ * An inverse helper would duplicate work that the firmware already
+ * does, and would add error-path complexity for no benefit.  We keep
+ * the deinit path minimal and rely on the firmware cleanup for
+ * correctness, including in abnormal reset scenarios.
+ */
+static void nbl_dev_remove_ctrl_dev(struct nbl_adapter *adapter)
+{
+	struct nbl_dev_mgt *dev_mgt = adapter->core.dev_mgt;
+	struct nbl_dispatch_ops *disp_ops = dev_mgt->disp_ops_tbl->ops;
+
+	disp_ops->deinit_chip_module(dev_mgt->disp_ops_tbl->priv);
+}
+
 static struct nbl_dev_mgt *nbl_dev_setup_dev_mgt(struct nbl_common_info *common)
 {
 	struct nbl_dev_mgt *dev_mgt;
@@ -38,11 +192,30 @@ int nbl_dev_init(struct nbl_adapter *adapter)
 	dev_mgt->chan_ops_tbl = chan_ops_tbl;
 	adapter->core.dev_mgt = dev_mgt;
 
+	ret = nbl_dev_setup_common_dev(adapter);
+	if (ret)
+		return ret;
+
+	if (common->has_ctrl) {
+		ret = nbl_dev_setup_ctrl_dev(adapter);
+		if (ret)
+			goto setup_ctrl_dev_fail;
+	}
+
 	return 0;
+
+setup_ctrl_dev_fail:
+	nbl_dev_remove_common_dev(adapter);
+	return ret;
 }
 
 void nbl_dev_remove(struct nbl_adapter *adapter)
 {
+	struct nbl_common_info *common = &adapter->common;
+
+	if (common->has_ctrl)
+		nbl_dev_remove_ctrl_dev(adapter);
+	nbl_dev_remove_common_dev(adapter);
 }
 
 /* ----------  Dev start process  ---------- */
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
index 9b71092b99a0..b51c8a4424c5 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
@@ -18,10 +18,41 @@
 #include "../nbl_include/nbl_def_common.h"
 #include "../nbl_core.h"
 
+#define NBL_STRING_NAME_LEN			32
+
+enum nbl_msix_serv_type {
+	/* virtio_dev has a config vector_id, and the vector_id need is 0 */
+	NBL_MSIX_VIRTIO_TYPE = 0,
+	NBL_MSIX_NET_TYPE,
+	NBL_MSIX_MAILBOX_TYPE,
+	NBL_MSIX_TYPE_MAX
+};
+
+struct nbl_msix_serv_info {
+	char irq_name[NBL_STRING_NAME_LEN];
+	u16 num;
+	u16 base_vector_id;
+	/* true: hw report msix, hw need to mask actively */
+	bool hw_self_mask_en;
+};
+
+struct nbl_msix_info {
+	struct nbl_msix_serv_info serv_info[NBL_MSIX_TYPE_MAX];
+};
+
+struct nbl_dev_common {
+	struct nbl_dev_mgt *dev_mgt;
+	struct nbl_msix_info msix_info;
+	char mailbox_name[NBL_STRING_NAME_LEN];
+	/* for ctrl-dev/net-dev mailbox recv msg */
+	struct work_struct clean_mbx_task;
+};
+
 struct nbl_dev_mgt {
 	struct nbl_common_info *common;
 	struct nbl_dispatch_ops_tbl *disp_ops_tbl;
 	struct nbl_channel_ops_tbl *chan_ops_tbl;
+	struct nbl_dev_common *common_dev;
 };
 
 #endif
-- 
2.47.3


^ permalink raw reply related

* [PATCH v18 net-next 06/11] net/nebula-matrix: add common resource implementation
From: illusion.wang @ 2026-06-11  4:49 UTC (permalink / raw)
  To: dimon.zhao, illusion.wang, alvin.wang, sam.chen, netdev
  Cc: andrew+netdev, corbet, kuba, horms, linux-doc, pabeni,
	vadim.fedorenko, lukas.bulwahn, edumazet, enelsonmoore, skhan,
	hkallweit1, open list
In-Reply-To: <20260611044916.2383-1-illusion.wang@nebula-matrix.com>

The Resource layer processes the entries/data of various modules within
the processing chip to accomplish specific entry management operations,
this describes the module business capabilities of the chip and the data
it manages.
The resource layer comprises the following sub-modules: common,
interrupt, and vsi(txrx,queue not contained this time)

This patch provides the common part, including the conversion
relationships among vsi_id, func_id, eth_id, and pf_id. These
relationships may be utilized in the upper layer or the resource layer.

Key Assumptions:
- nbl_res_start() initializes VSI/Eth/PF data structures **only for
control devices** (`common->has_ctrl == true`).
- APIs like nbl_res_func_id_to_vsi_id() **are guaranteed to be called
 only on control devices** by the framework's dispatch layer.

Signed-off-by: illusion.wang <illusion.wang@nebula-matrix.com>
---
 .../net/ethernet/nebula-matrix/nbl/Makefile   |   1 +
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c  |  47 +++++
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h  |  14 ++
 .../nbl_hw_leonis/nbl_resource_leonis.c       | 186 ++++++++++++++++++
 .../nebula-matrix/nbl/nbl_hw/nbl_resource.c   | 134 +++++++++++++
 .../nebula-matrix/nbl/nbl_hw/nbl_resource.h   |  49 ++++-
 .../nbl/nbl_include/nbl_def_common.h          |  15 ++
 .../nbl/nbl_include/nbl_def_resource.h        |  15 ++
 .../nbl/nbl_include/nbl_include.h             |   8 +
 9 files changed, 468 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.c

diff --git a/drivers/net/ethernet/nebula-matrix/nbl/Makefile b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
index c9bc060732e7..b03c20f9988e 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/Makefile
+++ b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
@@ -8,6 +8,7 @@ nbl-objs +=       nbl_common/nbl_common.o \
 				nbl_hw/nbl_hw_leonis/nbl_hw_leonis.o \
 				nbl_hw/nbl_hw_leonis/nbl_resource_leonis.o \
 				nbl_hw/nbl_hw_leonis/nbl_hw_leonis_regs.o \
+				nbl_hw/nbl_resource.o \
 				nbl_core/nbl_dispatch.o \
 				nbl_core/nbl_dev.o \
 				nbl_main.o
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
index 1d25d7770d8d..f31f54d1f4f7 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
@@ -10,6 +10,18 @@
 #include <linux/bitfield.h>
 #include "nbl_hw_leonis.h"
 
+static void nbl_hw_read_mbx_regs(struct nbl_hw_mgt *hw_mgt, u64 reg, u32 *data,
+				 u32 len)
+{
+	u32 i;
+
+	if (len % 4)
+		return;
+
+	for (i = 0; i < len / 4; i++)
+		data[i] = nbl_mbx_rd32(hw_mgt, reg + i * sizeof(u32));
+}
+
 static void nbl_hw_write_mbx_regs(struct nbl_hw_mgt *hw_mgt, u64 reg,
 				  const u32 *data, u32 len)
 {
@@ -52,6 +64,15 @@ static void nbl_hw_wr_regs(struct nbl_hw_mgt *hw_mgt, u64 reg, const u32 *data,
 	spin_unlock(&hw_mgt->reg_lock);
 }
 
+static u32 nbl_hw_get_fw_eth_map(struct nbl_hw_mgt *hw_mgt)
+{
+	u32 data;
+
+	nbl_hw_read_mbx_regs(hw_mgt, NBL_FW_BOARD_DW6_OFFSET, &data,
+			     sizeof(data));
+	return FIELD_GET(NBL_FW_BOARD_DW6_ETH_BITMAP_MASK, data);
+}
+
 static void nbl_hw_update_mailbox_queue_tail_ptr(struct nbl_hw_mgt *hw_mgt,
 						 u16 tail_ptr, u8 txrx)
 {
@@ -133,6 +154,14 @@ static u32 nbl_hw_get_host_pf_mask(struct nbl_hw_mgt *hw_mgt)
 	return data;
 }
 
+static u8 nbl_hw_get_real_bus(struct nbl_hw_mgt *hw_mgt)
+{
+	u32 data;
+
+	data = nbl_hw_rd32(hw_mgt, NBL_PCIE_HOST_TL_CFG_BUSDEV);
+	return FIELD_GET(NBL_PCIE_BUS_MASK, data);
+}
+
 static void nbl_hw_cfg_mailbox_qinfo(struct nbl_hw_mgt *hw_mgt, u16 func_id,
 				     u8 bus, u8 devid, u8 function)
 {
@@ -145,6 +174,20 @@ static void nbl_hw_cfg_mailbox_qinfo(struct nbl_hw_mgt *hw_mgt, u16 func_id,
 		       sizeof(data));
 }
 
+static void nbl_hw_get_board_info(struct nbl_hw_mgt *hw_mgt,
+				  struct nbl_board_port_info *board_info)
+{
+	u32 data = 0;
+
+	nbl_hw_read_mbx_regs(hw_mgt, NBL_FW_BOARD_DW3_OFFSET, &data,
+			     sizeof(data));
+	board_info->eth_num = FIELD_GET(NBL_FW_BOARD_DW3_PORT_NUM_MASK, data);
+	board_info->eth_speed =
+		FIELD_GET(NBL_FW_BOARD_DW3_PORT_SPEED_MASK, data);
+	board_info->p4_version =
+		FIELD_GET(NBL_FW_BOARD_DW3_P4_VERSION_MASK, data);
+}
+
 static struct nbl_hw_ops hw_ops = {
 	.update_mailbox_queue_tail_ptr = nbl_hw_update_mailbox_queue_tail_ptr,
 	.config_mailbox_rxq = nbl_hw_config_mailbox_rxq,
@@ -152,8 +195,12 @@ static struct nbl_hw_ops hw_ops = {
 	.stop_mailbox_rxq = nbl_hw_stop_mailbox_rxq,
 	.stop_mailbox_txq = nbl_hw_stop_mailbox_txq,
 	.get_host_pf_mask = nbl_hw_get_host_pf_mask,
+	.get_real_bus = nbl_hw_get_real_bus,
+
 	.cfg_mailbox_qinfo = nbl_hw_cfg_mailbox_qinfo,
 
+	.get_fw_eth_map = nbl_hw_get_fw_eth_map,
+	.get_board_info = nbl_hw_get_board_info,
 };
 
 /* Structure starts here, adding an op should not modify anything below */
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
index d2c85175554d..e32f740d8d3f 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
@@ -69,4 +69,18 @@ struct nbl_mailbox_qinfo_cfg_table {
 #define NBL_PCIE_HOST_K_PF_MASK_REG (NBL_INTF_HOST_PCIE_BASE + 0x00001004)
 #define NBL_PCIE_HOST_TL_CFG_BUSDEV (NBL_INTF_HOST_PCIE_BASE + 0x11040)
 
+#define NBL_PCIE_BUS_MASK	GENMASK(12, 5)
+#define NBL_FW_BOARD_CONFIG			0x200
+#define NBL_FW_BOARD_DW3_OFFSET			(NBL_FW_BOARD_CONFIG + 12)
+#define NBL_FW_BOARD_DW6_OFFSET			(NBL_FW_BOARD_CONFIG + 24)
+
+#define NBL_FW_BOARD_DW3_PORT_TYPE_MASK BIT(0)
+#define NBL_FW_BOARD_DW3_PORT_NUM_MASK GENMASK(7, 1)
+#define NBL_FW_BOARD_DW3_PORT_SPEED_MASK GENMASK(9, 8)
+#define NBL_FW_BOARD_DW3_GPIO_TYPE_MASK GENMASK(12, 10)
+#define NBL_FW_BOARD_DW3_P4_VERSION_MASK GENMASK(13, 13)
+
+#define NBL_FW_BOARD_DW6_LANE_BITMAP_MASK GENMASK(7, 0)
+#define NBL_FW_BOARD_DW6_ETH_BITMAP_MASK GENMASK(15, 8)
+
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
index 4b4f8e2e7fe7..dd9205ee7252 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
@@ -4,9 +4,12 @@
  */
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/bits.h>
 #include "nbl_resource_leonis.h"
 
 static struct nbl_resource_ops res_ops = {
+	.get_vsi_id = nbl_res_func_id_to_vsi_id,
+	.get_eth_id = nbl_res_get_eth_id,
 };
 
 static struct nbl_resource_mgt *
@@ -45,8 +48,191 @@ nbl_res_setup_ops(struct device *dev, struct nbl_resource_mgt *res_mgt)
 	return res_ops_tbl;
 }
 
+static int nbl_res_ctrl_dev_setup_eth_info(struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_hw_ops *hw_ops = res_mgt->hw_ops_tbl->ops;
+	struct device *dev = res_mgt->common->dev;
+	struct nbl_eth_info *eth_info;
+	u32 eth_bitmap, eth_id;
+	u32 eth_num = 0;
+	u32 fw_port_num;
+	int i;
+
+	eth_info = devm_kzalloc(dev, sizeof(*eth_info), GFP_KERNEL);
+	if (!eth_info)
+		return -ENOMEM;
+
+	res_mgt->resource_info->eth_info = eth_info;
+
+	fw_port_num = res_mgt->resource_info->board_info.eth_num;
+	eth_bitmap = hw_ops->get_fw_eth_map(res_mgt->hw_ops_tbl->priv);
+	if (eth_bitmap & ~((1 << NBL_MAX_ETHERNET) - 1)) {
+		dev_err(dev, "FW reported invalid eth_bitmap 0x%x\n",
+			eth_bitmap);
+		return -EINVAL;
+	}
+	if (fw_port_num !=  hweight32(eth_bitmap)) {
+		dev_err(dev, "FW inconsistency: port_num=%u, bitmap=0x%x\n",
+			fw_port_num, eth_bitmap);
+		return -EINVAL;
+	}
+
+	/* eth_num is guaranteed to be 1/2/4 here, so NBL_VSI_ID_GAP()
+	 * will always hit one of the explicit branches. Values 3 or any
+	 * other unsupported count are rejected above.
+	 */
+	if (fw_port_num > NBL_MAX_ETHERNET || fw_port_num == 3) {
+		dev_warn(dev, "FW reports %u Ethernet ports, not supported\n",
+			 fw_port_num);
+		return -EINVAL;
+	}
+	eth_info->eth_num = fw_port_num;
+	if (res_mgt->resource_info->max_pf != eth_info->eth_num) {
+		dev_err(dev, "Invalid PF-to-port topology: max_pf=%u, eth_num=%u\n",
+			res_mgt->resource_info->max_pf, eth_info->eth_num);
+		return -EINVAL;
+	}
+
+	/* for 2 eth port board, the eth_id is 0, 2 */
+	for (i = 0; i < NBL_MAX_ETHERNET; i++) {
+		if ((1 << i) & eth_bitmap) {
+			set_bit(i, eth_info->eth_bitmap);
+			eth_info->eth_id[eth_num] = i;
+			eth_info->logic_eth_id[i] = eth_num;
+			eth_num++;
+		}
+	}
+
+	for (i = 0; i < res_mgt->resource_info->max_pf; i++) {
+		eth_id = eth_info->eth_id[i];
+		eth_info->pf_bitmap[eth_id] |= BIT(i);
+	}
+
+	return 0;
+}
+
+static int nbl_res_ctrl_dev_sriov_info_init(struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_hw_ops *hw_ops = res_mgt->hw_ops_tbl->ops;
+	struct nbl_hw_mgt *p = res_mgt->hw_ops_tbl->priv;
+	struct nbl_common_info *common = res_mgt->common;
+	struct nbl_sriov_info *sriov_info;
+	struct device *dev = common->dev;
+	u16 function;
+	u16 func_id;
+
+	sriov_info = devm_kcalloc(dev, res_mgt->resource_info->max_pf,
+				  sizeof(*sriov_info), GFP_KERNEL);
+	if (!sriov_info)
+		return -ENOMEM;
+
+	res_mgt->resource_info->sriov_info = sriov_info;
+	common->hw_bus = hw_ops->get_real_bus(p);
+	if (common->function + res_mgt->resource_info->max_pf > NBL_MAX_PF) {
+		dev_err(dev, "PF count exceeds available function space\n");
+		return -EINVAL;
+	}
+	for (func_id = 0; func_id < res_mgt->resource_info->max_pf; func_id++) {
+		sriov_info = res_mgt->resource_info->sriov_info + func_id;
+		function = common->function + func_id;
+		sriov_info->bdf = PCI_DEVID(common->hw_bus,
+					    PCI_DEVFN(common->devid, function));
+	}
+
+	return 0;
+}
+
+static int nbl_res_ctrl_dev_vsi_info_init(struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_eth_info *eth_info = res_mgt->resource_info->eth_info;
+	struct nbl_common_info *common = res_mgt->common;
+	struct device *dev = common->dev;
+	struct nbl_vsi_info *vsi_info;
+	int i;
+
+	vsi_info = devm_kzalloc(dev, sizeof(*vsi_info), GFP_KERNEL);
+	if (!vsi_info)
+		return -ENOMEM;
+
+	res_mgt->resource_info->vsi_info = vsi_info;
+	/*
+	 * case 1 one port(1pf)
+	 * pf0 (NBL_VSI_SERV_PF_DATA_TYPE) vsi is 0
+	 * case 2 two port(2pf)
+	 * pf0,pf1(NBL_VSI_SERV_PF_DATA_TYPE) vsi is 0,512
+	 * case 3 four port(4pf)
+	 * pf0,pf1,pf2,pf3(NBL_VSI_SERV_PF_DATA_TYPE) vsi is 0,256,512,768
+
+	 */
+
+	vsi_info->num = eth_info->eth_num;
+	for (i = 0; i < vsi_info->num; i++) {
+		vsi_info->serv_info[i][NBL_VSI_SERV_PF_DATA_TYPE].base_id =
+			i * NBL_VSI_ID_GAP(vsi_info->num);
+		vsi_info->serv_info[i][NBL_VSI_SERV_PF_DATA_TYPE].num = 1;
+	}
+
+	return 0;
+}
+
+static int nbl_res_init_pf_num(struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_hw_ops *hw_ops = res_mgt->hw_ops_tbl->ops;
+	u32 pf_num = 0;
+	u32 pf_mask;
+	int i;
+
+	pf_mask = hw_ops->get_host_pf_mask(res_mgt->hw_ops_tbl->priv);
+	/*
+	 * Hardware guarantees pf_mask has contiguous cleared bits
+	 * starting from bit 0 (e.g., 0b11111100, not 0b01010101).
+	 * This allows us to stop at the first set bit.
+	 */
+	for (i = 0; i < NBL_MAX_PF; i++) {
+		if (!(pf_mask & (1 << i)))
+			pf_num++;
+		else
+			break;
+	}
+	if (pf_num == 0)
+		return -EINVAL;
+	res_mgt->resource_info->max_pf = pf_num;
+
+	return 0;
+}
+
+static void nbl_res_init_board_info(struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_hw_ops *hw_ops = res_mgt->hw_ops_tbl->ops;
+
+	hw_ops->get_board_info(res_mgt->hw_ops_tbl->priv,
+			       &res_mgt->resource_info->board_info);
+}
+
 static int nbl_res_start(struct nbl_resource_mgt *res_mgt)
 {
+	struct nbl_common_info *common = res_mgt->common;
+	int ret = 0;
+
+	if (common->has_ctrl) {
+		nbl_res_init_board_info(res_mgt);
+
+		ret = nbl_res_init_pf_num(res_mgt);
+		if (ret)
+			return ret;
+
+		ret = nbl_res_ctrl_dev_sriov_info_init(res_mgt);
+		if (ret)
+			return ret;
+
+		ret = nbl_res_ctrl_dev_setup_eth_info(res_mgt);
+		if (ret)
+			return ret;
+
+		ret = nbl_res_ctrl_dev_vsi_info_init(res_mgt);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.c
new file mode 100644
index 000000000000..a8234038f1d7
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#include <linux/pci.h>
+#include "nbl_resource.h"
+
+u16 nbl_res_func_id_to_vsi_id(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			      u16 type)
+{
+	struct nbl_vsi_info *vsi_info = res_mgt->resource_info->vsi_info;
+	enum nbl_vsi_serv_type dst_type = NBL_VSI_SERV_PF_DATA_TYPE;
+	struct nbl_common_info *common = res_mgt->common;
+	struct device *dev = res_mgt->common->dev;
+	u16 vsi_id = U16_MAX;
+	int pfid = func_id;
+	u32 diff;
+
+	if (!common->has_ctrl) {
+		dev_dbg(dev, "No control plane available\n");
+		return vsi_id;
+	}
+	diff = nbl_common_pf_id_subtraction_mgtpf_id(common, pfid);
+	if (diff == U32_MAX) {
+		dev_dbg(dev, "Invalid PF ID subtraction result\n");
+		return vsi_id;
+	}
+	if (diff >= vsi_info->num) {
+		dev_err(dev, "PF %d (diff=%u) exceeds vsi_info->num (%u)\n",
+			pfid, diff, vsi_info->num);
+		return U16_MAX;
+	}
+
+	nbl_res_pf_dev_vsi_type_to_hw_vsi_type(type, &dst_type);
+	vsi_id = vsi_info->serv_info[diff][dst_type].base_id;
+
+	return vsi_id;
+}
+
+int nbl_res_vsi_id_to_pf_id(struct nbl_resource_mgt *res_mgt, u16 vsi_id)
+{
+	struct nbl_vsi_info *vsi_info = res_mgt->resource_info->vsi_info;
+	struct nbl_common_info *common = res_mgt->common;
+	struct device *dev = res_mgt->common->dev;
+	int j = NBL_VSI_SERV_PF_DATA_TYPE;
+	int pf_id, i;
+
+	if (!common->has_ctrl) {
+		dev_dbg(dev, "No control plane available\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < vsi_info->num; i++) {
+		if (vsi_id >= vsi_info->serv_info[i][j].base_id &&
+		    (vsi_id < vsi_info->serv_info[i][j].base_id +
+					vsi_info->serv_info[i][j].num)) {
+			pf_id = i + common->mgt_pf;
+			if (pf_id >= NBL_MAX_PF) {
+				dev_err(dev, "PF ID overflow\n");
+				return -ERANGE;
+			}
+			return pf_id;
+		}
+	}
+
+	dev_dbg(dev, "VSI ID %u not found\n", vsi_id);
+	return -ENOENT;
+}
+
+int nbl_res_func_id_to_bdf(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			   u8 *bus, u8 *dev, u8 *function)
+{
+	struct nbl_common_info *common = res_mgt->common;
+	struct nbl_sriov_info *sriov_info;
+	int pfid = func_id;
+	u8 pf_bus, devfn;
+	u32 diff;
+
+	if (!common->has_ctrl || !bus || !dev || !function)
+		return -EINVAL;
+	diff = nbl_common_pf_id_subtraction_mgtpf_id(common, pfid);
+	if (diff == U32_MAX)
+		return -EINVAL;
+	if (diff >= res_mgt->resource_info->max_pf) {
+		dev_err(common->dev, "PF ID %u exceeds maximum supported PF count %u\n",
+			pfid, res_mgt->resource_info->max_pf);
+		return -ERANGE;
+	}
+	sriov_info = res_mgt->resource_info->sriov_info + diff;
+	pf_bus = PCI_BUS_NUM(sriov_info->bdf);
+	devfn = sriov_info->bdf & 0xff;
+	*bus = pf_bus;
+	*dev = PCI_SLOT(devfn);
+	*function = PCI_FUNC(devfn);
+
+	return 0;
+}
+
+int nbl_res_get_eth_id(struct nbl_resource_mgt *res_mgt, u16 vsi_id,
+		       u8 *eth_num, u8 *eth_id, u8 *logic_eth_id)
+{
+	struct nbl_eth_info *eth_info = res_mgt->resource_info->eth_info;
+	struct nbl_common_info *common = res_mgt->common;
+	struct device *dev = res_mgt->common->dev;
+	int rel_pf_id;
+	int abs_pf_id;
+
+	if (!common->has_ctrl || !eth_num || !eth_id || !logic_eth_id)
+		return -EINVAL;
+	abs_pf_id = nbl_res_vsi_id_to_pf_id(res_mgt, vsi_id);
+	if (abs_pf_id < 0) {
+		dev_err(dev, "Failed to get PF ID from VSI ID %u\n", vsi_id);
+		return -EINVAL;
+	}
+	rel_pf_id = abs_pf_id - common->mgt_pf;
+
+	if (rel_pf_id < 0 || rel_pf_id >= eth_info->eth_num) {
+		dev_err(dev, "rel_pf_id %d out of range [0, %u)\n",
+			rel_pf_id, eth_info->eth_num);
+		return -ERANGE;
+	}
+
+	*eth_num = eth_info->eth_num;
+	*eth_id = eth_info->eth_id[rel_pf_id];
+	*logic_eth_id = rel_pf_id;
+	return 0;
+}
+
+void nbl_res_pf_dev_vsi_type_to_hw_vsi_type(u16 src_type,
+					    enum nbl_vsi_serv_type *dst_type)
+{
+	if (src_type == NBL_VSI_DATA)
+		*dst_type = NBL_VSI_SERV_PF_DATA_TYPE;
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
index 5bfd0ddd1cec..ec4749f6a23d 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
@@ -16,8 +16,46 @@
 #include "../nbl_include/nbl_def_common.h"
 #include "../nbl_core.h"
 
+struct nbl_resource_mgt;
+
+/* --------- INFO ---------- */
+struct nbl_sriov_info {
+	unsigned int bdf;
+};
+
+struct nbl_eth_info {
+	DECLARE_BITMAP(eth_bitmap, NBL_MAX_ETHERNET);
+	u8 pf_bitmap[NBL_MAX_ETHERNET];
+	u8 eth_num;
+	u8 resv[3];
+	u8 eth_id[NBL_MAX_PF];
+	u8 logic_eth_id[NBL_MAX_PF];
+};
+
+enum nbl_vsi_serv_type {
+	NBL_VSI_SERV_PF_DATA_TYPE,
+	NBL_VSI_SERV_MAX_TYPE,
+};
+
+struct nbl_vsi_serv_info {
+	u16 base_id;
+	u16 num;
+};
+
+struct nbl_vsi_info {
+	u16 num;
+	struct nbl_vsi_serv_info serv_info[NBL_MAX_ETHERNET]
+					  [NBL_VSI_SERV_MAX_TYPE];
+};
+
 struct nbl_resource_info {
-	void *reserved;  /* placeholder to be replaced in the future*/
+	/* ctrl-dev owned pfs */
+	DECLARE_BITMAP(func_bitmap, NBL_MAX_FUNC);
+	struct nbl_sriov_info *sriov_info;
+	struct nbl_eth_info *eth_info;
+	struct nbl_vsi_info *vsi_info;
+	u8 max_pf;
+	struct nbl_board_port_info board_info;
 };
 
 struct nbl_resource_mgt {
@@ -28,4 +66,13 @@ struct nbl_resource_mgt {
 	struct nbl_interrupt_mgt *intr_mgt;
 };
 
+int nbl_res_vsi_id_to_pf_id(struct nbl_resource_mgt *res_mgt, u16 vsi_id);
+u16 nbl_res_func_id_to_vsi_id(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			      u16 type);
+int nbl_res_func_id_to_bdf(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			   u8 *bus, u8 *dev, u8 *function);
+int nbl_res_get_eth_id(struct nbl_resource_mgt *res_mgt, u16 vsi_id,
+		       u8 *eth_num, u8 *eth_id, u8 *logic_eth_id);
+void nbl_res_pf_dev_vsi_type_to_hw_vsi_type(u16 src_type,
+					    enum nbl_vsi_serv_type *dst_type);
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
index 633f7100beb0..bc741b7df7b9 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
@@ -12,6 +12,21 @@
 #include "nbl_include.h"
 
 struct nbl_hash_tbl_mgt;
+#define NBL_TWO_ETHERNET_PORT			2
+#define NBL_FOUR_ETHERNET_PORT			4
+#define NBL_DEFAULT_VSI_ID_GAP			1024
+#define NBL_TWO_ETHERNET_VSI_ID_GAP		512
+#define NBL_FOUR_ETHERNET_VSI_ID_GAP		256
+
+#define NBL_VSI_ID_GAP(m)					\
+	({							\
+		typeof(m) _m = (m);				\
+		_m == NBL_FOUR_ETHERNET_PORT ?			\
+			NBL_FOUR_ETHERNET_VSI_ID_GAP :		\
+			(_m == NBL_TWO_ETHERNET_PORT ?		\
+				 NBL_TWO_ETHERNET_VSI_ID_GAP :	\
+				 NBL_DEFAULT_VSI_ID_GAP);	\
+	})
 
 struct nbl_common_info {
 	struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
index d55934af5a9a..54717dbccde8 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
@@ -6,10 +6,25 @@
 #ifndef _NBL_DEF_RESOURCE_H_
 #define _NBL_DEF_RESOURCE_H_
 
+#include <linux/types.h>
+
 struct nbl_resource_mgt;
 struct nbl_adapter;
 
 struct nbl_resource_ops {
+	int (*init_chip_module)(struct nbl_resource_mgt *res_mgt);
+	void (*deinit_chip_module)(struct nbl_resource_mgt *res_mgt);
+
+	int (*configure_msix_map)(struct nbl_resource_mgt *res_mgt, u16 func_id,
+				  u16 num_net_msix, u16 num_others_msix,
+				  bool net_msix_mask_en);
+	int (*destroy_msix_map)(struct nbl_resource_mgt *res_mgt, u16 func_id);
+	int (*set_mailbox_irq)(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			       u16 vector_id, bool enable_msix);
+	u16 (*get_vsi_id)(struct nbl_resource_mgt *res_mgt, u16 func_id,
+			  u16 type);
+	int (*get_eth_id)(struct nbl_resource_mgt *res_mgt, u16 vsi_id,
+			  u8 *eth_num, u8 *eth_id, u8 *logic_eth_id);
 };
 
 struct nbl_resource_ops_tbl {
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
index 2a1ae9a1eb9d..a547ac596d9e 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
@@ -17,11 +17,19 @@
 		((_id) == (max) ? 0 : (_id) + 1);	\
 	})
 
+#define NBL_MAX_FUNC					520
+#define NBL_MAX_ETHERNET				4
+
 enum nbl_product_type {
 	NBL_LEONIS_TYPE,
 	NBL_PRODUCT_MAX,
 };
 
+enum {
+	NBL_VSI_DATA = 0,
+	NBL_VSI_MAX,
+};
+
 struct nbl_func_caps {
 	u32 has_ctrl:1;
 	u32 has_net:1;
-- 
2.47.3


^ permalink raw reply related

* [PATCH v18 net-next 05/11] net/nebula-matrix: add channel layer
From: illusion.wang @ 2026-06-11  4:49 UTC (permalink / raw)
  To: dimon.zhao, illusion.wang, alvin.wang, sam.chen, netdev
  Cc: andrew+netdev, corbet, kuba, horms, linux-doc, pabeni,
	vadim.fedorenko, lukas.bulwahn, edumazet, enelsonmoore, skhan,
	hkallweit1, open list
In-Reply-To: <20260611044916.2383-1-illusion.wang@nebula-matrix.com>

A channel management layer provides a structured approach to handle
communication between different components and drivers. Here's a summary
of its key functionalities:

1. Message Handling Framework
   Message Registration: Functions (nbl_chan_register_msg) allow dynamic
   registration of message handlers for specific message types, enabling
   extensible communication protocols.

   Message Sending/Acknowledgment: Core functions (nbl_chan_send_msg,
   nbl_chan_send_ack) handle message transmission, including asynchronous
   operations with acknowledgment (ACK) support. Received ACKs are
   processed via nbl_chan_recv_ack_msg.

   Hash-Based Handler Lookup: A hash table (`handle_hash_tbl`) stores
   message handlers for efficient O(1) lookup by message type. The
   entire table is removed via `nbl_chan_remove_msg_handler` during
   driver teardown (per-message-type removal is not implemented
   in this version).

2. Channel Types and Queue Management
   Mailbox Channel: For direct communication between PF0 and Other PF.

   Queue Initialization: Functions (nbl_chan_init_queue,
   nbl_chan_init_tx_queue) allocate resources:
   - TX descriptors: dmam_alloc_coherent()
   - RX descriptors: dmam_alloc_coherent()
   - TX/RX buffer metadata arrays (txq->buf, rxq->buf): devm_kcalloc()

   Queue Teardown: nbl_chan_teardown_queue() stops queues, cancels
   pending work items (clean_task), and destroys mutexes. It does NOT
   free DMA memory, which is released automatically via devm on driver
   remove.

   IMPORTANT - Resource Lifecycle Design:
   DMA memory allocated with dmam_alloc_coherent() is intentionally NOT
   freed in nbl_chan_teardown_queue(). The queues are allocated once
   during driver probe and freed only during driver remove (when all
   devm_ resources are released). This assumes queues are NOT dynamically
   torn down and recreated per-PF during normal operation.

   Queue Configuration: Hardware-specific queue parameters (e.g., buffer
   sizes, entry counts) are set via nbl_chan_config_queue, with hardware
   interactions delegated to hw_ops.

3. Hardware Abstraction Layer (HW Ops)
   Hardware-Specific Operations: The nbl_hw_ops structure abstracts
   hardware interactions: queue configuration (config_mailbox_txq/rxq),
   tail pointer updates (update_mailbox_queue_tail_ptr).

Signed-off-by: illusion.wang <illusion.wang@nebula-matrix.com>
---
 .../net/ethernet/nebula-matrix/nbl/Makefile   |    3 +-
 .../nbl/nbl_channel/nbl_channel.c             | 1000 ++++++++++++++++-
 .../nbl/nbl_channel/nbl_channel.h             |  137 +++
 .../nebula-matrix/nbl/nbl_common/nbl_common.c |  193 ++++
 .../nebula-matrix/nbl/nbl_common/nbl_common.h |   33 +
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c  |  146 +++
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h  |   57 +
 .../nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h     |    1 +
 .../nbl/nbl_include/nbl_def_channel.h         |   83 ++
 .../nbl/nbl_include/nbl_def_common.h          |   26 +
 .../nbl/nbl_include/nbl_def_hw.h              |   27 +
 .../nbl/nbl_include/nbl_include.h             |    6 +
 12 files changed, 1708 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.h

diff --git a/drivers/net/ethernet/nebula-matrix/nbl/Makefile b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
index 63116d1d7043..c9bc060732e7 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/Makefile
+++ b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
@@ -3,7 +3,8 @@
 
 obj-$(CONFIG_NBL) := nbl.o
 
-nbl-objs +=       nbl_channel/nbl_channel.o \
+nbl-objs +=       nbl_common/nbl_common.o \
+				nbl_channel/nbl_channel.o \
 				nbl_hw/nbl_hw_leonis/nbl_hw_leonis.o \
 				nbl_hw/nbl_hw_leonis/nbl_resource_leonis.o \
 				nbl_hw/nbl_hw_leonis/nbl_hw_leonis_regs.o \
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
index c7689f0e4029..de437e34b4c8 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
@@ -2,12 +2,970 @@
 /*
  * Copyright (c) 2025 Nebula Matrix Limited.
  */
-
+#include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/bitfield.h>
 #include <linux/pci.h>
+#include <linux/bits.h>
+#include <linux/dma-mapping.h>
 #include "nbl_channel.h"
 
+static int nbl_chan_add_msg_handler(struct nbl_channel_mgt *chan_mgt,
+				    u16 msg_type, nbl_chan_resp func,
+				    void *priv)
+{
+	struct nbl_chan_msg_node_data handler = { 0 };
+	int ret;
+
+	handler.func = func;
+	handler.priv = priv;
+	ret = nbl_common_alloc_hash_node(chan_mgt->handle_hash_tbl, &msg_type,
+					 &handler, NULL);
+
+	return ret;
+}
+
+static int nbl_chan_init_msg_handler(struct nbl_channel_mgt *chan_mgt)
+{
+	struct nbl_common_info *common = chan_mgt->common;
+	struct nbl_hash_tbl_key tbl_key = { 0 };
+
+	tbl_key.dev = common->dev;
+	tbl_key.key_size = sizeof(u16);
+	tbl_key.data_size = sizeof(struct nbl_chan_msg_node_data);
+	tbl_key.bucket_size = NBL_CHAN_HANDLER_TBL_BUCKET_SIZE;
+
+	chan_mgt->handle_hash_tbl = nbl_common_init_hash_table(&tbl_key);
+	if (!chan_mgt->handle_hash_tbl)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nbl_chan_remove_msg_handler(struct nbl_channel_mgt *chan_mgt)
+{
+	nbl_common_remove_hash_table(chan_mgt->handle_hash_tbl, NULL);
+
+	chan_mgt->handle_hash_tbl = NULL;
+}
+
+static void nbl_chan_init_queue_param(struct nbl_chan_info *chan_info,
+				      u16 num_txq_entries, u16 num_rxq_entries,
+				      u16 txq_buf_size, u16 rxq_buf_size)
+{
+	mutex_init(&chan_info->txq_lock);
+	chan_info->num_txq_entries = num_txq_entries;
+	chan_info->num_rxq_entries = num_rxq_entries;
+	chan_info->txq_buf_size = txq_buf_size;
+	chan_info->rxq_buf_size = rxq_buf_size;
+}
+
+static int nbl_chan_init_tx_queue(struct nbl_common_info *common,
+				  struct nbl_chan_info *chan_info)
+{
+	struct nbl_chan_ring *txq = &chan_info->txq;
+	struct device *dev = common->dev;
+	size_t size =
+		chan_info->num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+	int i;
+
+	txq->desc.tx_desc =
+		dmam_alloc_coherent(dev, size, &txq->dma, GFP_KERNEL);
+	if (!txq->desc.tx_desc)
+		return -ENOMEM;
+
+	chan_info->wait = devm_kcalloc(dev, chan_info->num_txq_entries,
+				       sizeof(*chan_info->wait), GFP_KERNEL);
+	if (!chan_info->wait)
+		return -ENOMEM;
+	for (i = 0; i < chan_info->num_txq_entries; i++) {
+		init_waitqueue_head(&chan_info->wait[i].wait_queue);
+		chan_info->wait[i].status = NBL_MBX_STATUS_IDLE;
+	}
+
+	txq->buf = devm_kcalloc(dev, chan_info->num_txq_entries,
+				sizeof(*txq->buf), GFP_KERNEL);
+	if (!txq->buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int nbl_chan_init_rx_queue(struct nbl_common_info *common,
+				  struct nbl_chan_info *chan_info)
+{
+	struct nbl_chan_ring *rxq = &chan_info->rxq;
+	struct device *dev = common->dev;
+	size_t size =
+		chan_info->num_rxq_entries * sizeof(struct nbl_chan_rx_desc);
+
+	rxq->desc.rx_desc =
+		dmam_alloc_coherent(dev, size, &rxq->dma, GFP_KERNEL);
+	if (!rxq->desc.rx_desc) {
+		dev_err(dev,
+			"Allocate DMA for chan rx descriptor ring failed\n");
+		return -ENOMEM;
+	}
+
+	rxq->buf = devm_kcalloc(dev, chan_info->num_rxq_entries,
+				sizeof(*rxq->buf), GFP_KERNEL);
+	if (!rxq->buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int nbl_chan_init_queue(struct nbl_common_info *common,
+			       struct nbl_chan_info *chan_info)
+{
+	int err;
+
+	err = nbl_chan_init_tx_queue(common, chan_info);
+	if (err)
+		return err;
+
+	err = nbl_chan_init_rx_queue(common, chan_info);
+
+	return err;
+}
+
+static void nbl_chan_config_queue(struct nbl_channel_mgt *chan_mgt,
+				  struct nbl_chan_info *chan_info, bool tx)
+{
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+	struct nbl_hw_mgt *p = chan_mgt->hw_ops_tbl->priv;
+	int size_bwid;
+	struct nbl_chan_ring *ring;
+	dma_addr_t dma_addr;
+
+	if (tx)
+		ring = &chan_info->txq;
+	else
+		ring = &chan_info->rxq;
+	dma_addr = ring->dma;
+	if (tx) {
+		size_bwid = ilog2(chan_info->num_txq_entries);
+		hw_ops->config_mailbox_txq(p, dma_addr, size_bwid);
+	} else {
+		size_bwid = ilog2(chan_info->num_rxq_entries);
+		hw_ops->config_mailbox_rxq(p, dma_addr, size_bwid);
+	}
+}
+
+static int nbl_chan_alloc_all_tx_bufs(struct nbl_channel_mgt *chan_mgt,
+				      struct nbl_chan_info *chan_info)
+{
+	struct nbl_chan_ring *txq = &chan_info->txq;
+	struct device *dev = chan_mgt->common->dev;
+	struct nbl_chan_buf *buf;
+	u16 i;
+
+	for (i = 0; i < chan_info->num_txq_entries; i++) {
+		buf = &txq->buf[i];
+		buf->va = dmam_alloc_coherent(dev, chan_info->txq_buf_size,
+					      &buf->pa, GFP_KERNEL);
+		if (!buf->va) {
+			dev_err(dev,
+				"Allocate buffer for chan tx queue failed\n");
+			return -ENOMEM;
+		}
+	}
+
+	txq->next_to_clean = 0;
+	txq->next_to_use = 0;
+	txq->tail_ptr = 0;
+
+	return 0;
+}
+
+static void nbl_chan_cfg_qinfo_map_table(struct nbl_channel_mgt *chan_mgt)
+{
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+	struct nbl_common_info *common = chan_mgt->common;
+	struct nbl_hw_mgt *p = chan_mgt->hw_ops_tbl->priv;
+	u8 func_id;
+	u32 pf_mask;
+
+	pf_mask = hw_ops->get_host_pf_mask(p);
+	for (func_id = 0; func_id < NBL_MAX_PF; func_id++) {
+		if (!(pf_mask & (1 << func_id)))
+			hw_ops->cfg_mailbox_qinfo(p, func_id, common->hw_bus,
+						  common->devid,
+						  common->function + func_id);
+	}
+}
+
+#define NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt, tail_ptr, qid)\
+do {									\
+	(void)(chan_info);						\
+	typeof(hw_ops) _hw_ops = (hw_ops);				\
+	typeof(chan_mgt) _chan_mgt = (chan_mgt);			\
+	typeof(tail_ptr) _tail_ptr = (tail_ptr);			\
+	typeof(qid) _qid = (qid);					\
+	(_hw_ops)->update_mailbox_queue_tail_ptr(			\
+		_chan_mgt->hw_ops_tbl->priv, _tail_ptr, _qid);	\
+} while (0)
+
+static int nbl_chan_alloc_all_rx_bufs(struct nbl_channel_mgt *chan_mgt,
+				      struct nbl_chan_info *chan_info)
+{
+	struct nbl_chan_ring *rxq = &chan_info->rxq;
+	struct device *dev = chan_mgt->common->dev;
+	struct nbl_chan_rx_desc *desc;
+	struct nbl_chan_buf *buf;
+	u16 i;
+
+	for (i = 0; i < chan_info->num_rxq_entries; i++) {
+		buf = &rxq->buf[i];
+		buf->va = dmam_alloc_coherent(dev, chan_info->rxq_buf_size,
+					      &buf->pa, GFP_KERNEL);
+		if (!buf->va) {
+			dev_err(dev,
+				"Allocate buffer for chan rx queue failed\n");
+			goto err;
+		}
+	}
+
+	desc = rxq->desc.rx_desc;
+	for (i = 0; i < chan_info->num_rxq_entries - 1; i++) {
+		buf = &rxq->buf[i];
+		desc[i].buf_addr = cpu_to_le64(buf->pa);
+		desc[i].buf_len = cpu_to_le32(chan_info->rxq_buf_size);
+		desc[i].flags = cpu_to_le16(BIT(NBL_CHAN_RX_DESC_AVAIL));
+	}
+
+	rxq->next_to_clean = 0;
+	rxq->next_to_use = chan_info->num_rxq_entries - 1;
+	rxq->tail_ptr = chan_info->num_rxq_entries - 1;
+
+	return 0;
+err:
+	return -ENOMEM;
+}
+
+static int nbl_chan_alloc_all_bufs(struct nbl_channel_mgt *chan_mgt,
+				   struct nbl_chan_info *chan_info)
+{
+	int err;
+
+	err = nbl_chan_alloc_all_tx_bufs(chan_mgt, chan_info);
+	if (err)
+		return err;
+	err = nbl_chan_alloc_all_rx_bufs(chan_mgt, chan_info);
+
+	return err;
+}
+
+static void nbl_chan_stop_queue(struct nbl_channel_mgt *chan_mgt)
+{
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+
+	hw_ops->stop_mailbox_rxq(chan_mgt->hw_ops_tbl->priv);
+	hw_ops->stop_mailbox_txq(chan_mgt->hw_ops_tbl->priv);
+}
+
+static int nbl_chan_teardown_queue(struct nbl_channel_mgt *chan_mgt,
+				   u8 chan_type)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+	struct nbl_chan_waitqueue_head *wait_head;
+	u16 i;
+
+	/* Mark the channel as abnormal to prevent new operations */
+	set_bit(NBL_CHAN_ABNORMAL, chan_info->state);
+
+	/* Stop hardware queues */
+	nbl_chan_stop_queue(chan_mgt);
+
+	/* Cancel any pending cleanup work */
+	if (chan_info->clean_task)
+		cancel_work_sync(chan_info->clean_task);
+	/*
+	 * Wake up all threads waiting for ACKs to prevent use-after-free.
+	 * This is critical because these threads may be sleeping on waitqueues
+	 * that will be freed when the device is detached.
+	 */
+	for (i = 0; i < chan_info->num_txq_entries; i++) {
+		wait_head = &chan_info->wait[i];
+		spin_lock_irq(&wait_head->status_lock);
+		/* Only wake threads that are actually waiting */
+		if (wait_head->status == NBL_MBX_STATUS_WAITING) {
+			/* Mark as timeout so waking threads know to abort */
+			wait_head->status = NBL_MBX_STATUS_TIMEOUT;
+			/* Ensure status is written */
+			smp_wmb();
+		}
+		spin_unlock_irq(&wait_head->status_lock);
+		if (wait_head->status == NBL_MBX_STATUS_TIMEOUT)
+			wake_up(&wait_head->wait_queue);
+	}
+
+	/*
+	 * Lock and unlock to ensure all in-flight callers have released the
+	 * lock. After unlock returns, we're guaranteed no other thread is
+	 * using the lock, so it's safe to destroy it.
+	 */
+	mutex_lock(&chan_info->txq_lock);
+	mutex_unlock(&chan_info->txq_lock);
+	mutex_destroy(&chan_info->txq_lock);
+
+	/* Clear the abnormal flag */
+	clear_bit(NBL_CHAN_ABNORMAL, chan_info->state);
+	return 0;
+}
+
+static int nbl_chan_setup_queue(struct nbl_channel_mgt *chan_mgt, u8 chan_type)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+	struct nbl_common_info *common = chan_mgt->common;
+	struct nbl_chan_ring *rxq = &chan_info->rxq;
+	int err;
+
+	nbl_chan_init_queue_param(chan_info, NBL_CHAN_QUEUE_LEN,
+				  NBL_CHAN_QUEUE_LEN, NBL_CHAN_BUF_LEN,
+				  NBL_CHAN_BUF_LEN);
+	err = nbl_chan_init_queue(common, chan_info);
+	if (err)
+		goto chan_setup_fail;
+	err = nbl_chan_alloc_all_bufs(chan_mgt, chan_info);
+	if (err)
+		goto chan_setup_fail;
+	nbl_chan_config_queue(chan_mgt, chan_info, true); /* tx */
+	nbl_chan_config_queue(chan_mgt, chan_info, false); /* rx */
+	NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt, rxq->tail_ptr,
+				  NBL_MB_RX_QID);
+	return 0;
+chan_setup_fail:
+	mutex_destroy(&chan_info->txq_lock);
+	return err;
+}
+
+static int nbl_chan_update_txqueue(struct nbl_channel_mgt *chan_mgt,
+				   struct nbl_chan_info *chan_info,
+				   struct nbl_chan_tx_param *param)
+{
+	struct nbl_chan_ring *txq = &chan_info->txq;
+	struct nbl_chan_tx_desc *tx_desc =
+		NBL_CHAN_TX_RING_TO_DESC(txq, txq->next_to_use);
+	struct nbl_chan_buf *tx_buf =
+		NBL_CHAN_TX_RING_TO_BUF(txq, txq->next_to_use);
+
+	if (param->arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc))
+		return -EINVAL;
+
+	tx_desc->dstid = cpu_to_le16(param->dstid);
+	tx_desc->msg_type = cpu_to_le16(param->msg_type);
+	tx_desc->msgid = cpu_to_le16(param->msgid);
+
+	if (param->arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
+		memcpy(tx_buf->va, param->arg, param->arg_len);
+		tx_desc->buf_addr = cpu_to_le64(tx_buf->pa);
+		tx_desc->buf_len = cpu_to_le16(param->arg_len);
+		tx_desc->data_len = 0;
+		memset(tx_desc->data, 0, sizeof(tx_desc->data));
+	} else {
+		memset(tx_desc->data, 0, sizeof(tx_desc->data));
+		memcpy(tx_desc->data, param->arg, param->arg_len);
+		tx_desc->buf_len = 0;
+		tx_desc->data_len = cpu_to_le16(param->arg_len);
+	}
+	/* Ensure desc body is visible to DMA before writing AVAIL flag */
+	dma_wmb();
+	tx_desc->flags = cpu_to_le16(BIT(NBL_CHAN_TX_DESC_AVAIL));
+
+	txq->next_to_use =
+		NBL_NEXT_ID(txq->next_to_use, chan_info->num_txq_entries - 1);
+	txq->tail_ptr++;
+
+	return 0;
+}
+
+static int nbl_chan_kick_tx_ring(struct nbl_channel_mgt *chan_mgt,
+				 struct nbl_chan_info *chan_info)
+{
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+	struct nbl_chan_ring *txq = &chan_info->txq;
+	struct device *dev = chan_mgt->common->dev;
+	int max_retries = NBL_CHAN_TX_WAIT_TIMES;
+	struct nbl_chan_tx_desc *tx_desc;
+	int retry_count = 0;
+
+	NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt, txq->tail_ptr,
+				  NBL_MB_TX_QID);
+
+	tx_desc = NBL_CHAN_TX_RING_TO_DESC(txq, txq->next_to_clean);
+	while (retry_count < max_retries) {
+		if (le16_to_cpu(READ_ONCE(tx_desc->flags)) &
+		    BIT(NBL_CHAN_TX_DESC_USED)) {
+			dma_rmb();
+			break;
+		}
+
+		retry_count++;
+		if (retry_count == max_retries) {
+			dev_err(dev, "chan send message type: %d timeout\n",
+				le16_to_cpu(READ_ONCE(tx_desc->msg_type)));
+			return -ETIMEDOUT;
+		}
+		usleep_range(NBL_CHAN_TX_WAIT_US, NBL_CHAN_TX_WAIT_US_MAX);
+	}
+
+	txq->next_to_clean = txq->next_to_use;
+
+	return 0;
+}
+
+static void nbl_chan_recv_ack_msg(void *priv, u16 srcid, u16 msgid, void *data,
+				  u32 data_len)
+{
+	struct nbl_channel_mgt *chan_mgt = (struct nbl_channel_mgt *)priv;
+	struct nbl_chan_waitqueue_head *wait_head = NULL;
+	struct device *dev = chan_mgt->common->dev;
+	struct nbl_chan_info *chan_info =
+		chan_mgt->chan_info[NBL_CHAN_TYPE_MAILBOX];
+	u32 ack_datalen, ack_msgtype = 0;
+	u32 *payload = data;
+	u16 ack_msgid = 0;
+	u32 copy_len;
+
+	if (data_len > NBL_CHAN_BUF_LEN ||
+	    data_len < NBL_CHAN_ACK_HEAD_LEN * sizeof(u32)) {
+		dev_err(dev, "Invalid ACK data_len: %u\n", data_len);
+		return;
+	}
+	ack_datalen = data_len - NBL_CHAN_ACK_HEAD_LEN * sizeof(u32);
+	ack_msgtype = le16_to_cpu(*(__le16 *)(payload + NBL_CHAN_MSG_TYPE_POS));
+	ack_msgid = le16_to_cpu(*(__le16 *)(payload + NBL_CHAN_MSG_ID_POS));
+	if (FIELD_GET(NBL_CHAN_MSGID_LOC_MASK, ack_msgid) >=
+	    NBL_CHAN_QUEUE_LEN) {
+		dev_err(dev, "chan recv msg id: %d err\n", ack_msgid);
+		return;
+	}
+	wait_head =
+		&chan_info->wait[FIELD_GET(NBL_CHAN_MSGID_LOC_MASK, ack_msgid)];
+	spin_lock_irq(&wait_head->status_lock);
+	if (srcid != wait_head->dstid) {
+		/* Do not modify the status; the slot remains WAITING,
+		 * and the sender will time out normally
+		 */
+		spin_unlock_irq(&wait_head->status_lock);
+		dev_err(dev, "ACK srcid=%u != dstid=%u, rejecting\n", srcid,
+			wait_head->dstid);
+		return;
+	}
+	if (wait_head->status != NBL_MBX_STATUS_WAITING) {
+		spin_unlock_irq(&wait_head->status_lock);
+		dev_err(dev,
+			"Skip ack with invalid status, wait_head msgtype:%u msg_index:%u status:%d ack_data_len:%d, ack msgtype:%u msgid:%u datalen:%d\n",
+			wait_head->msg_type, wait_head->msg_index,
+			wait_head->status, wait_head->ack_data_len, ack_msgtype,
+			ack_msgid, ack_datalen);
+		return;
+	}
+
+	if (wait_head->msg_type != ack_msgtype) {
+		/*
+		 * Mismatched ACK. Restore state to WAITING so the original
+		 * sender will time out and not reuse the slot.
+		 */
+		wait_head->status = NBL_MBX_STATUS_WAITING;
+
+		dev_err(dev,
+			"Skip ack msg type donot match, wait_head msgtype:%u msg_index:%u status:%d ack_data_len:%d, ack msgtype:%u msgid:%u datalen:%d\n",
+			wait_head->msg_type, wait_head->msg_index,
+			wait_head->status, wait_head->ack_data_len, ack_msgtype,
+			ack_msgid, ack_datalen);
+		spin_unlock_irq(&wait_head->status_lock);
+		/* Wake up the sender to let it know the ACK was invalid */
+		wake_up(&wait_head->wait_queue);
+		return;
+	}
+	if (FIELD_GET(NBL_CHAN_MSGID_INDEX_MASK, ack_msgid) !=
+	    wait_head->msg_index) {
+		/*
+		 * Stale ACK. Restore state to WAITING so the original
+		 * sender will time out and not reuse the slot.
+		 */
+		wait_head->status = NBL_MBX_STATUS_WAITING;
+
+		dev_err(dev,
+			"Stale ACK: expected index=%u, got msgid %u\n",
+			wait_head->msg_index, ack_msgid);
+		spin_unlock_irq(&wait_head->status_lock);
+		/* Wake up the sender to let it know the ACK was stale */
+		wake_up(&wait_head->wait_queue);
+		return;
+	}
+
+	wait_head->ack_err =
+		le32_to_cpu(*(__le32 *)(payload + NBL_CHAN_ACK_RET_POS));
+
+	copy_len = min_t(u32, wait_head->ack_data_len, ack_datalen);
+	if (wait_head->ack_err >= 0 && copy_len > 0) {
+		if (!wait_head->ack_data) {
+			dev_err(dev, "ACK payload dropped: ack_data is NULL\n");
+			wait_head->ack_data_len = 0;
+			goto ack_done;
+		}
+		memcpy((char *)wait_head->ack_data,
+		       payload + NBL_CHAN_ACK_HEAD_LEN, copy_len);
+		wait_head->ack_data_len = (u16)copy_len;
+	} else {
+		wait_head->ack_data_len = 0;
+	}
+ack_done:
+	/*
+	 * Ensure all writes to ack_data and ack_data_len are completed
+	 * before setting the 'acked' flag. This prevents other threads
+	 * from observing stale or partially updated data.
+	 */
+	smp_wmb();
+	wait_head->acked = 1;
+	spin_unlock_irq(&wait_head->status_lock);
+	if (wait_head->acked)
+		wake_up(&wait_head->wait_queue);
+}
+
+static void nbl_chan_recv_msg(struct nbl_channel_mgt *chan_mgt, void *data)
+{
+	struct device *dev = chan_mgt->common->dev;
+	struct nbl_chan_msg_node_data *msg_handler;
+	u16 msg_type, payload_len, srcid, msgid;
+	struct nbl_chan_tx_desc *tx_desc;
+	void *payload;
+
+	tx_desc = data;
+	msg_type = le16_to_cpu(tx_desc->msg_type);
+	dev_dbg(dev, "recv msg_type: %d\n", msg_type);
+
+	srcid = le16_to_cpu(tx_desc->srcid);
+	msgid = le16_to_cpu(tx_desc->msgid);
+	/* Only check if the value exceeds the maximum, relying on the hash
+	 * table to filter invalid message IDs.
+	 * The gap values are reserved for future protocol extensions.
+	 */
+	if (msg_type >= NBL_CHAN_MSG_MAILBOX_MAX)
+		return;
+
+	if (tx_desc->data_len) {
+		payload_len = le16_to_cpu(tx_desc->data_len);
+		if (payload_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
+			dev_err(dev,
+				"data_len=%u exceeds embedded buffer size=%u\n",
+				payload_len,
+				NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN);
+			return;
+		}
+		payload = tx_desc->data;
+	} else {
+		payload_len = le16_to_cpu(tx_desc->buf_len);
+		if (payload_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+			dev_err(dev,
+				"buf_len=%u exceeds external buffer size=%zu\n",
+				payload_len,
+				NBL_CHAN_BUF_LEN - sizeof(*tx_desc));
+			return;
+		}
+		payload = tx_desc + 1;
+	}
+
+	msg_handler =
+		nbl_common_get_hash_node(chan_mgt->handle_hash_tbl, &msg_type);
+	if (!msg_handler || !msg_handler->func) {
+		dev_err(dev,
+			"No handler for msg_type: %u (srcid=%u, msgid=%u)\n",
+			msg_type, srcid, msgid);
+		return;
+	}
+	msg_handler->func(msg_handler->priv, srcid, msgid, payload,
+			  payload_len);
+}
+
+static void nbl_chan_advance_rx_ring(struct nbl_channel_mgt *chan_mgt,
+				     struct nbl_chan_info *chan_info,
+				     struct nbl_chan_ring *rxq)
+{
+	struct nbl_hw_ops *hw_ops = chan_mgt->hw_ops_tbl->ops;
+	struct nbl_chan_rx_desc *rx_desc;
+	struct nbl_chan_buf *rx_buf;
+	u16 next_to_use;
+
+	next_to_use = rxq->next_to_use;
+	rx_desc = NBL_CHAN_RX_RING_TO_DESC(rxq, next_to_use);
+	rx_buf = NBL_CHAN_RX_RING_TO_BUF(rxq, next_to_use);
+
+	rx_desc->buf_addr = cpu_to_le64(rx_buf->pa);
+	rx_desc->buf_len = cpu_to_le32(chan_info->rxq_buf_size);
+
+	/*
+	 * DMA Write Memory Barrier:
+	 * Ensures all previous DMA-mapped writes (buffer address/length)
+	 * are completed before the descriptor flags are updated.
+	 * This prevents hardware from seeing a partially updated descriptor
+	 * where flags are set but buffer info isn't ready yet.
+	 */
+	dma_wmb();
+
+	rx_desc->flags = cpu_to_le16(BIT(NBL_CHAN_RX_DESC_AVAIL));
+
+	/*
+	 * CPU Write Memory Barrier:
+	 * Ensures the descriptor flags update is visible to other CPUs
+	 * before we update the tail pointer. This is important for:
+	 * 1. Software cleaning threads that might be checking the tail pointer
+	 * 2. Maintaining proper memory ordering in multi-core systems
+	 */
+	wmb();
+	rxq->next_to_use++;
+	if (rxq->next_to_use == chan_info->num_rxq_entries)
+		rxq->next_to_use = 0;
+	rxq->tail_ptr++;
+
+	NBL_UPDATE_QUEUE_TAIL_PTR(chan_info, hw_ops, chan_mgt, rxq->tail_ptr,
+				  NBL_MB_RX_QID);
+}
+
+/*
+ * Since the channel operates in either polling mode or interrupt mode
+ * (mutually exclusive, configured via set_queue_state), nbl_chan_clean_queue
+ * is always called in a serialized manner:
+ * 1. In polling mode: nbl_chan_clean_queue is called directly within
+ * nbl_chan_send_msg, in the same thread after txq_lock has been released.
+ * No other thread can call it concurrently.
+ * 2. In interrupt mode: nbl_chan_clean_queue is called from a workqueue
+ * (nbl_dev_clean_mailbox_task). Linux workqueue guarantees that the same
+ * work item never runs concurrently on multiple CPUs.
+ * Therefore, at any given time, only one execution context can be inside
+ * nbl_chan_clean_queue. There is no concurrency, and thus no need for
+ * locking
+ */
+static void nbl_chan_clean_queue(struct nbl_channel_mgt *chan_mgt,
+				 struct nbl_chan_info *chan_info)
+{
+	struct nbl_chan_ring *rxq = &chan_info->rxq;
+	struct device *dev = chan_mgt->common->dev;
+	struct nbl_chan_rx_desc *rx_desc;
+	struct nbl_chan_buf *rx_buf;
+	u16 next_to_clean;
+
+	next_to_clean = rxq->next_to_clean;
+	rx_desc = NBL_CHAN_RX_RING_TO_DESC(rxq, next_to_clean);
+	rx_buf = NBL_CHAN_RX_RING_TO_BUF(rxq, next_to_clean);
+	while (le16_to_cpu(rx_desc->flags) & BIT(NBL_CHAN_RX_DESC_USED)) {
+		if (!(le16_to_cpu(rx_desc->flags) &
+		      BIT(NBL_CHAN_RX_DESC_WRITE)))
+			dev_dbg(dev,
+				"mailbox rx flag 0x%x has no NBL_CHAN_RX_DESC_WRITE\n",
+				le16_to_cpu(rx_desc->flags));
+
+		dma_rmb();
+		nbl_chan_recv_msg(chan_mgt, rx_buf->va);
+		nbl_chan_advance_rx_ring(chan_mgt, chan_info, rxq);
+		next_to_clean++;
+		if (next_to_clean == chan_info->num_rxq_entries)
+			next_to_clean = 0;
+		rx_desc = NBL_CHAN_RX_RING_TO_DESC(rxq, next_to_clean);
+		rx_buf = NBL_CHAN_RX_RING_TO_BUF(rxq, next_to_clean);
+	}
+	rxq->next_to_clean = next_to_clean;
+}
+
+static void nbl_chan_clean_queue_subtask(struct nbl_channel_mgt *chan_mgt,
+					 u8 chan_type)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+
+	if (!test_bit(NBL_CHAN_INTERRUPT_READY, chan_info->state))
+		return;
+
+	nbl_chan_clean_queue(chan_mgt, chan_info);
+}
+
+static int nbl_chan_get_msg_id(struct nbl_chan_info *chan_info,
+			       u16 *msgid)
+{
+	int valid_loc = chan_info->wait_head_index, i;
+	struct nbl_chan_waitqueue_head *wait = NULL;
+	int status;
+
+	for (i = 0; i < NBL_CHAN_QUEUE_LEN; i++) {
+		wait = &chan_info->wait[valid_loc];
+		status = wait->status;
+		if (status == NBL_MBX_STATUS_IDLE ||
+		    status == NBL_MBX_STATUS_TIMEOUT) {
+			wait->msg_index = NBL_NEXT_ID(wait->msg_index,
+						      NBL_CHAN_MSG_INDEX_MAX);
+			*msgid =
+				FIELD_PREP(NBL_CHAN_MSGID_INDEX_MASK,
+					   wait->msg_index) |
+				FIELD_PREP(NBL_CHAN_MSGID_LOC_MASK, valid_loc);
+			valid_loc = NBL_NEXT_ID(valid_loc,
+						chan_info->num_txq_entries - 1);
+			chan_info->wait_head_index = valid_loc;
+			return 0;
+		}
+
+		valid_loc =
+			NBL_NEXT_ID(valid_loc, chan_info->num_txq_entries - 1);
+	}
+
+	/*
+	 * the current NBL_CHAN_QUEUE_LEN configuration meets the design
+	 * requirements and theoretically should not return errors, the
+	 * following scenarios may still cause the waiting queue to
+	 * become full:
+	 * High-concurrency scenarios:
+	 * If the sender (calling nbl_chan_send_msg()) generates messages
+	 * at a rate far exceeding the receiver's ability to process
+	 * acknowledgments (ACKs),the waiting queue may become fully occupied.
+	 * Delayed or failed ACK handling by the receiver:
+	 * The receiver may fail to send ACKs in a timely manner due to
+	 * processing delays, blocking, or faults, causing the sender's
+	 * waiting queue slots to remain occupied for an extended period.
+	 */
+	return -EAGAIN;
+}
+
+static int nbl_chan_send_msg(struct nbl_channel_mgt *chan_mgt,
+			     struct nbl_chan_send_info *chan_send)
+{
+	struct nbl_common_info *common = chan_mgt->common;
+	struct nbl_chan_waitqueue_head *wait_head;
+	struct nbl_chan_tx_param tx_param = { 0 };
+	u16 msgid = 0;
+	int i = NBL_CHAN_TX_WAIT_ACK_TIMES, ret;
+	struct nbl_chan_info *chan_info =
+		chan_mgt->chan_info[NBL_CHAN_TYPE_MAILBOX];
+	struct device *dev = common->dev;
+
+	if (test_bit(NBL_CHAN_ABNORMAL, chan_info->state))
+		return -EIO;
+	if (chan_send->resp_len > NBL_CHAN_BUF_LEN) {
+		dev_err(dev, "resp_len %zu exceeds max %d\n",
+			chan_send->resp_len, NBL_CHAN_BUF_LEN);
+		return -EINVAL;
+	}
+	mutex_lock(&chan_info->txq_lock);
+
+	ret = nbl_chan_get_msg_id(chan_info, &msgid);
+	if (ret) {
+		mutex_unlock(&chan_info->txq_lock);
+		dev_err(dev,
+			"Channel tx wait head full, send msgtype:%u to dstid:%u failed\n",
+			chan_send->msg_type, chan_send->dstid);
+		return ret;
+	}
+
+	tx_param.msg_type = chan_send->msg_type;
+	tx_param.arg = chan_send->arg;
+	tx_param.arg_len = chan_send->arg_len;
+	tx_param.dstid = chan_send->dstid;
+	tx_param.msgid = msgid;
+
+	ret = nbl_chan_update_txqueue(chan_mgt, chan_info, &tx_param);
+	if (ret) {
+		mutex_unlock(&chan_info->txq_lock);
+		dev_err(dev,
+			"Channel tx queue full, send msgtype:%u to dstid:%u failed\n",
+			chan_send->msg_type, chan_send->dstid);
+		return ret;
+	}
+
+	wait_head =
+		&chan_info->wait[FIELD_GET(NBL_CHAN_MSGID_LOC_MASK, msgid)];
+	spin_lock_irq(&wait_head->status_lock);
+	wait_head->acked = 0;
+	wait_head->ack_data = chan_send->resp;
+	wait_head->ack_data_len = chan_send->resp_len;
+	wait_head->msg_type = chan_send->msg_type;
+	wait_head->msg_index = FIELD_GET(NBL_CHAN_MSGID_INDEX_MASK, msgid);
+	wait_head->dstid = chan_send->dstid;
+	/* Ensure all fields above are visible before status update, so receiver
+	 * won't see WAITING with stale data
+	 */
+	smp_wmb();
+	wait_head->status = chan_send->ack ? NBL_MBX_STATUS_WAITING :
+					     NBL_MBX_STATUS_IDLE;
+	spin_unlock_irq(&wait_head->status_lock);
+
+	ret = nbl_chan_kick_tx_ring(chan_mgt, chan_info);
+	mutex_unlock(&chan_info->txq_lock);
+	if (ret) {
+		mutex_lock(&chan_info->txq_lock);
+		spin_lock_irq(&wait_head->status_lock);
+		wait_head->status = NBL_MBX_STATUS_TIMEOUT;
+		spin_unlock_irq(&wait_head->status_lock);
+		mutex_unlock(&chan_info->txq_lock);
+		return ret;
+	}
+
+	if (!chan_send->ack)
+		return 0;
+
+	if (test_bit(NBL_CHAN_INTERRUPT_READY, chan_info->state)) {
+		spin_lock_irq(&wait_head->status_lock);
+		while (!wait_head->acked) {
+			spin_unlock_irq(&wait_head->status_lock);
+			ret = wait_event_timeout(wait_head->wait_queue, 0,
+						 NBL_CHAN_ACK_WAIT_TIME);
+			spin_lock_irq(&wait_head->status_lock);
+
+			if (ret == 0) {
+				if (wait_head->status ==
+				    NBL_MBX_STATUS_WAITING) {
+					wait_head->status =
+						NBL_MBX_STATUS_TIMEOUT;
+					wait_head->acked = 0;
+					wait_head->ack_data = NULL;
+					wait_head->ack_data_len = 0;
+				}
+				spin_unlock_irq(&wait_head->status_lock);
+				dev_err(dev,
+					"Channel waiting ack failed, message type: %d, msg id: %u\n",
+					chan_send->msg_type, msgid);
+				return -ETIMEDOUT;
+			}
+
+			if (wait_head->acked)
+				break;
+		}
+		/*
+		 * ensure that after observing 'acked == 1', all
+		 * subsequent reads (ack_data_len, ack_err) observe
+		 * the latest values written by the sender
+		 * (nbl_chan_recv_ack_msg()). This prevents stale reads
+		 * of ACK data or status.
+		 */
+		smp_rmb();
+		chan_send->ack_len = wait_head->ack_data_len;
+		ret = wait_head->ack_err;
+		wait_head->acked = 0;
+		wait_head->status = NBL_MBX_STATUS_IDLE;
+		spin_unlock_irq(&wait_head->status_lock);
+
+		return ret;
+	}
+
+	/*polling wait mailbox ack*/
+
+	while (i--) {
+		nbl_chan_clean_queue(chan_mgt, chan_info);
+
+		spin_lock_irq(&wait_head->status_lock);
+		if (wait_head->acked) {
+			/*
+			 * ensure that after observing 'acked == 1', all
+			 * subsequent reads (ack_data_len, ack_err) observe
+			 * the latest values written by the sender
+			 * (nbl_chan_recv_ack_msg()). This prevents stale reads
+			 * of ACK data or status.
+			 */
+			smp_rmb();
+			chan_send->ack_len = wait_head->ack_data_len;
+			ret = wait_head->ack_err;
+			wait_head->acked = 0;
+			wait_head->status = NBL_MBX_STATUS_IDLE;
+			spin_unlock_irq(&wait_head->status_lock);
+			return ret;
+		}
+		spin_unlock_irq(&wait_head->status_lock);
+
+		usleep_range(NBL_CHAN_TX_WAIT_ACK_US_MIN,
+			     NBL_CHAN_TX_WAIT_ACK_US_MAX);
+	}
+
+	spin_lock_irq(&wait_head->status_lock);
+	wait_head->acked = 0;
+	wait_head->status = NBL_MBX_STATUS_TIMEOUT;
+	spin_unlock_irq(&wait_head->status_lock);
+
+	dev_err(dev,
+		"Channel polling ack failed, message type: %d msg id: %u\n",
+		chan_send->msg_type, msgid);
+	return -ETIMEDOUT;
+}
+
+static int nbl_chan_send_ack(struct nbl_channel_mgt *chan_mgt,
+			     struct nbl_chan_ack_info *chan_ack)
+{
+	size_t head_len = NBL_CHAN_ACK_HEAD_LEN * sizeof(u32);
+	size_t data_len = chan_ack->data_len;
+	struct nbl_chan_send_info chan_send;
+	__le32 *tmp;
+	size_t len;
+	int ret;
+
+	if (data_len >
+	    NBL_CHAN_BUF_LEN - sizeof(struct nbl_chan_tx_desc) - head_len)
+		return -EINVAL;
+
+	len = head_len + data_len;
+	tmp = kzalloc(len, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	*(__le16 *)&tmp[NBL_CHAN_MSG_TYPE_POS] =
+		cpu_to_le16(chan_ack->msg_type);
+	*(__le16 *)&tmp[NBL_CHAN_MSG_ID_POS] = cpu_to_le16(chan_ack->msgid);
+	tmp[NBL_CHAN_ACK_RET_POS] = cpu_to_le32(chan_ack->err);
+	if (chan_ack->data && chan_ack->data_len)
+		memcpy(&tmp[NBL_CHAN_ACK_HEAD_LEN], chan_ack->data,
+		       chan_ack->data_len);
+
+	NBL_CHAN_SEND(chan_send, chan_ack->dstid, NBL_CHAN_MSG_ACK, tmp, len,
+		      NULL, 0, 0);
+	ret = nbl_chan_send_msg(chan_mgt, &chan_send);
+	kfree(tmp);
+
+	return ret;
+}
+
+static int nbl_chan_register_msg(struct nbl_channel_mgt *chan_mgt, u16 msg_type,
+				 nbl_chan_resp func, void *callback)
+{
+	return nbl_chan_add_msg_handler(chan_mgt, msg_type, func, callback);
+}
+
+static bool nbl_chan_check_queue_exist(struct nbl_channel_mgt *chan_mgt,
+				       u8 chan_type)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+
+	return chan_info ? true : false;
+}
+
+static void nbl_chan_register_chan_task(struct nbl_channel_mgt *chan_mgt,
+					u8 chan_type, struct work_struct *task)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+
+	chan_info->clean_task = task;
+}
+
+static void nbl_chan_set_queue_state(struct nbl_channel_mgt *chan_mgt,
+				     enum nbl_chan_state state, u8 chan_type,
+				     u8 set)
+{
+	struct nbl_chan_info *chan_info = chan_mgt->chan_info[chan_type];
+
+	if (set)
+		set_bit(state, chan_info->state);
+	else
+		clear_bit(state, chan_info->state);
+}
+
 static struct nbl_channel_ops chan_ops = {
+	.send_msg			= nbl_chan_send_msg,
+	.send_ack			= nbl_chan_send_ack,
+	.register_msg			= nbl_chan_register_msg,
+	.cfg_chan_qinfo_map_table	= nbl_chan_cfg_qinfo_map_table,
+	.check_queue_exist		= nbl_chan_check_queue_exist,
+	.setup_queue			= nbl_chan_setup_queue,
+	.teardown_queue			= nbl_chan_teardown_queue,
+	.clean_queue_subtask		= nbl_chan_clean_queue_subtask,
+	.register_chan_task		= nbl_chan_register_chan_task,
+	.set_queue_state		= nbl_chan_set_queue_state,
 };
 
 static struct nbl_channel_mgt *
@@ -18,6 +976,7 @@ nbl_chan_setup_chan_mgt(struct nbl_adapter *adapter)
 	struct device *dev = &adapter->pdev->dev;
 	struct nbl_channel_mgt *chan_mgt;
 	struct nbl_chan_info *mailbox;
+	int ret;
 
 	chan_mgt = devm_kzalloc(dev, sizeof(*chan_mgt), GFP_KERNEL);
 	if (!chan_mgt)
@@ -32,6 +991,10 @@ nbl_chan_setup_chan_mgt(struct nbl_adapter *adapter)
 	mailbox->chan_type = NBL_CHAN_TYPE_MAILBOX;
 	chan_mgt->chan_info[NBL_CHAN_TYPE_MAILBOX] = mailbox;
 
+	ret = nbl_chan_init_msg_handler(chan_mgt);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+
 	return chan_mgt;
 }
 
@@ -39,6 +1002,7 @@ static struct nbl_channel_ops_tbl *
 nbl_chan_setup_ops(struct device *dev, struct nbl_channel_mgt *chan_mgt)
 {
 	struct nbl_channel_ops_tbl *chan_ops_tbl;
+	int ret;
 
 	chan_ops_tbl = devm_kzalloc(dev, sizeof(*chan_ops_tbl), GFP_KERNEL);
 	if (!chan_ops_tbl)
@@ -47,6 +1011,11 @@ nbl_chan_setup_ops(struct device *dev, struct nbl_channel_mgt *chan_mgt)
 	chan_ops_tbl->ops = &chan_ops;
 	chan_ops_tbl->priv = chan_mgt;
 
+	ret = nbl_chan_register_msg(chan_mgt, NBL_CHAN_MSG_ACK,
+				    nbl_chan_recv_ack_msg, chan_mgt);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+
 	return chan_ops_tbl;
 }
 
@@ -57,22 +1026,47 @@ int nbl_chan_init_common(struct nbl_adapter *adap)
 	struct nbl_channel_mgt *chan_mgt;
 	int ret;
 
+	BUILD_BUG_ON(sizeof(struct nbl_chan_param_cfg_msix_map) != 8);
+	BUILD_BUG_ON(sizeof(struct nbl_chan_param_set_mailbox_irq) != 4);
+	BUILD_BUG_ON(sizeof(struct nbl_chan_param_get_vsi_id) != 4);
+	BUILD_BUG_ON(sizeof(struct nbl_chan_param_get_eth_id) != 8);
+	BUILD_BUG_ON(sizeof(struct nbl_board_port_info) != 8);
 	chan_mgt = nbl_chan_setup_chan_mgt(adap);
 	if (IS_ERR(chan_mgt)) {
 		ret = PTR_ERR(chan_mgt);
-		return ret;
+		goto exit;
 	}
 
 	chan_ops_tbl = nbl_chan_setup_ops(dev, chan_mgt);
 	if (IS_ERR(chan_ops_tbl)) {
 		ret = PTR_ERR(chan_ops_tbl);
-		return ret;
+		goto exit;
 	}
 	adap->intf.channel_ops_tbl = chan_ops_tbl;
 	adap->core.chan_mgt = chan_mgt;
 	return 0;
+
+exit:
+	if (!IS_ERR(chan_mgt)) {
+		nbl_chan_remove_msg_handler(chan_mgt);
+		adap->core.chan_mgt = NULL;
+	}
+	return ret;
 }
 
 void nbl_chan_remove_common(struct nbl_adapter *adap)
 {
+	struct nbl_channel_mgt *chan_mgt = adap->core.chan_mgt;
+
+	if (chan_mgt) {
+		nbl_chan_remove_msg_handler(chan_mgt);
+		adap->core.chan_mgt = NULL;
+	}
+	/*
+	 * Note: nbl_chan_remove_common() does not call teardown_queue() here.
+	 * The work cancellation (cancel_work_sync) is already handled in
+	 * nbl_dev_remove() -> nbl_dev_remove_common_dev()
+	 * -> nbl_dev_remove_chan_queue()-> teardown_queue(),
+	 * which is guaranteed to be called before this function.
+	 */
 }
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
index 637912d1e806..01e1a56ef0d1 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
@@ -15,10 +15,147 @@
 #include "../nbl_include/nbl_def_common.h"
 #include "../nbl_core.h"
 
+#define NBL_CHAN_TX_RING_TO_DESC(tx_ring, i) \
+	(&((((tx_ring)->desc.tx_desc))[i]))
+#define NBL_CHAN_RX_RING_TO_DESC(rx_ring, i) \
+	(&((((rx_ring)->desc.rx_desc))[i]))
+#define NBL_CHAN_TX_RING_TO_BUF(tx_ring, i) (&(((tx_ring)->buf)[i]))
+#define NBL_CHAN_RX_RING_TO_BUF(rx_ring, i) (&(((rx_ring)->buf)[i]))
+
+#define NBL_CHAN_TX_WAIT_US			100
+#define NBL_CHAN_TX_WAIT_US_MAX			120
+#define NBL_CHAN_TX_WAIT_TIMES			100
+#define NBL_CHAN_TX_WAIT_ACK_US_MIN		100
+#define NBL_CHAN_TX_WAIT_ACK_US_MAX		120
+#define NBL_CHAN_TX_WAIT_ACK_TIMES		50000
+#define NBL_CHAN_QUEUE_LEN			256
+#define NBL_CHAN_CLEAN_BATCH_SIZE		32
+#define NBL_CHAN_BUF_LEN			4096
+#define NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN	16
+
+#define NBL_CHAN_TX_DESC_AVAIL			0
+#define NBL_CHAN_TX_DESC_USED			1
+#define NBL_CHAN_RX_DESC_WRITE			1
+#define NBL_CHAN_RX_DESC_AVAIL			3
+#define NBL_CHAN_RX_DESC_USED			4
+
+#define NBL_CHAN_ACK_HEAD_LEN			3
+#define NBL_CHAN_ACK_RET_POS			2
+#define NBL_CHAN_MSG_ID_POS			1
+#define NBL_CHAN_MSG_TYPE_POS			0
+
+#define NBL_CHAN_ACK_WAIT_TIME			(3 * HZ)
+
+#define NBL_CHAN_HANDLER_TBL_BUCKET_SIZE	512
+
+enum {
+	NBL_MB_RX_QID = 0,
+	NBL_MB_TX_QID = 1,
+};
+
+enum {
+	NBL_MBX_STATUS_IDLE = 0,
+	NBL_MBX_STATUS_WAITING,
+	NBL_MBX_STATUS_TIMEOUT,
+};
+
+struct nbl_chan_tx_param {
+	enum nbl_chan_msg_type msg_type;
+	void *arg;
+	size_t arg_len;
+	u16 dstid;
+	u16 msgid;
+};
+
+struct nbl_chan_buf {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+struct nbl_chan_tx_desc {
+	__le16 flags;
+	__le16 srcid;
+	__le16 dstid;
+	__le16 data_len;
+	__le16 buf_len;
+	__le64 buf_addr;
+	__le16 msg_type;
+	u8 data[16];
+	__le16 msgid;
+	u8 rsv[26];
+} __packed;
+
+struct nbl_chan_rx_desc {
+	__le16 flags;
+	__le32 buf_len;
+	__le16 buf_id;
+	__le64 buf_addr;
+} __packed;
+
+union nbl_chan_desc_ptr {
+	struct nbl_chan_tx_desc *tx_desc;
+	struct nbl_chan_rx_desc *rx_desc;
+};
+
+struct nbl_chan_ring {
+	union nbl_chan_desc_ptr desc;
+	struct nbl_chan_buf *buf;
+	u16 next_to_use;
+	u16 tail_ptr;
+	u16 next_to_clean;
+	dma_addr_t dma;
+};
+
+#define NBL_CHAN_MSG_INDEX_MAX 63
+
+#define NBL_CHAN_MSGID_INDEX_MASK GENMASK(5, 0)
+#define NBL_CHAN_MSGID_LOC_MASK GENMASK(13, 6)
+
+struct nbl_chan_waitqueue_head {
+	struct wait_queue_head wait_queue;
+	char *ack_data;
+	int acked;
+	int ack_err;
+	u16 ack_data_len;
+	u16 msg_type;
+	/*
+	 * Spinlock protecting all fields.
+	 * Must be held when reading/writing: status, acked, ack_err,
+	 * ack_data_len, etc.
+	 * The lock ensures atomic updates of these fields and
+	 * proper memory ordering with smp_wmb()/smp_rmb().
+	 */
+	spinlock_t status_lock;
+	int status;
+	u8 msg_index;
+	u16 dstid;
+};
+
 struct nbl_chan_info {
+	struct nbl_chan_ring txq;
+	struct nbl_chan_ring rxq;
+	struct nbl_chan_waitqueue_head *wait;
+	/*
+	 *Protects access to the TX queue (txq) and related metadata.
+	 *This mutex ensures exclusive access when updating the TX queue
+	 */
+	struct mutex txq_lock;
+	struct work_struct *clean_task;
+	u16 wait_head_index;
+	u16 num_txq_entries;
+	u16 num_rxq_entries;
+	u16 txq_buf_size;
+	u16 rxq_buf_size;
+	DECLARE_BITMAP(state, NBL_CHAN_STATE_NBITS);
 	u8 chan_type;
 };
 
+struct nbl_chan_msg_node_data {
+	nbl_chan_resp func;
+	void *priv;
+};
+
 struct nbl_channel_mgt {
 	struct nbl_common_info *common;
 	struct nbl_hw_ops_tbl *hw_ops_tbl;
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.c
new file mode 100644
index 000000000000..2954cbbe0f95
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#include <linux/device.h>
+#include "nbl_common.h"
+
+u32 nbl_common_pf_id_subtraction_mgtpf_id(struct nbl_common_info *common,
+					  u32 pf_id)
+{
+	u32 diff = U32_MAX;
+
+	if (pf_id >= common->mgt_pf)
+		diff = pf_id - common->mgt_pf;
+
+	return diff;
+}
+
+#define FNV_PRIME_32 0x01000193
+#define FNV_OFFSET_32 0x811C9DC5
+static u32 nbl_common_calc_hash_key(void *key, u32 key_size, u32 bucket_size)
+{
+	u32 hash = FNV_OFFSET_32;
+	u8 *p = (u8 *)key;
+	u32 i;
+
+	if (bucket_size == 0 || bucket_size == NBL_HASH_TBL_LIST_BUCKET_SIZE)
+		return 0;
+
+	for (i = 0; i < key_size; i++) {
+		hash ^= p[i];
+		hash *= FNV_PRIME_32;
+	}
+	/* Use bitmask if bucket_size is a power of 2 */
+	if ((bucket_size & (bucket_size - 1)) == 0)
+		return hash & (bucket_size - 1);
+	else
+		return hash % bucket_size;
+}
+
+/*
+ * alloc a hash table
+ * the table support multi thread
+ */
+struct nbl_hash_tbl_mgt *
+nbl_common_init_hash_table(struct nbl_hash_tbl_key *key)
+{
+	struct nbl_hash_tbl_mgt *tbl_mgt;
+	int bucket_size;
+	int i;
+
+	tbl_mgt = devm_kzalloc(key->dev, sizeof(*tbl_mgt), GFP_KERNEL);
+	if (!tbl_mgt)
+		return NULL;
+
+	bucket_size = key->bucket_size;
+	tbl_mgt->hash = devm_kcalloc(key->dev, bucket_size,
+				     sizeof(struct hlist_head), GFP_KERNEL);
+	if (!tbl_mgt->hash)
+		goto alloc_hash_failed;
+
+	for (i = 0; i < bucket_size; i++)
+		INIT_HLIST_HEAD(tbl_mgt->hash + i);
+
+	memcpy(&tbl_mgt->tbl_key, key, sizeof(struct nbl_hash_tbl_key));
+
+	return tbl_mgt;
+
+alloc_hash_failed:
+	return NULL;
+}
+
+/*
+ * Allocate a hash node and add to the hash table.
+ *
+ * Note: The hash table is protected by the caller's mutex (txq_lock),
+ * not lock-free. The "single context" comment in nbl_common_alloc_hash_node
+ * refers to: this function is only called during init from
+ * nbl_chan_init_msg_handler, so there is no concurrent mutation during init.
+ * After init, the table is read-only (no unregister API), so no lock is needed
+ * for lookups either.
+ *
+ * The tbl_mgt and bucket array are devm-allocated, so they are automatically
+ * freed on device detach. Only the hash nodes themselves need explicit cleanup.
+ */
+int nbl_common_alloc_hash_node(struct nbl_hash_tbl_mgt *tbl_mgt, void *key,
+			       void *data, void **out_data)
+{
+	struct nbl_hash_entry_node *hash_node;
+	u16 data_size;
+	u32 hash_val;
+	u16 key_size;
+
+	hash_node = devm_kzalloc(tbl_mgt->tbl_key.dev, sizeof(*hash_node),
+				 GFP_KERNEL);
+	if (!hash_node)
+		return -ENOMEM;
+
+	key_size = tbl_mgt->tbl_key.key_size;
+	hash_node->key =
+		devm_kzalloc(tbl_mgt->tbl_key.dev, key_size, GFP_KERNEL);
+	if (!hash_node->key)
+		goto alloc_key_failed;
+
+	data_size = tbl_mgt->tbl_key.data_size;
+	hash_node->data =
+		devm_kzalloc(tbl_mgt->tbl_key.dev, data_size, GFP_KERNEL);
+	if (!hash_node->data)
+		goto alloc_data_failed;
+
+	memcpy(hash_node->key, key, key_size);
+	memcpy(hash_node->data, data, data_size);
+
+	hash_val = nbl_common_calc_hash_key(key, key_size,
+					    tbl_mgt->tbl_key.bucket_size);
+
+	hlist_add_head(&hash_node->node, tbl_mgt->hash + hash_val);
+	tbl_mgt->node_num++;
+	if (out_data)
+		*out_data = hash_node->data;
+
+	return 0;
+
+alloc_data_failed:
+	devm_kfree(tbl_mgt->tbl_key.dev, hash_node->key);
+alloc_key_failed:
+	devm_kfree(tbl_mgt->tbl_key.dev, hash_node);
+	return -ENOMEM;
+}
+
+/*
+ * get a hash node, return the data if node exist
+ */
+void *nbl_common_get_hash_node(struct nbl_hash_tbl_mgt *tbl_mgt, void *key)
+{
+	struct nbl_hash_entry_node *hash_node;
+	struct hlist_head *head;
+	void *data = NULL;
+	u32 hash_val;
+	u16 key_size;
+
+	key_size = tbl_mgt->tbl_key.key_size;
+	hash_val = nbl_common_calc_hash_key(key, key_size,
+					    tbl_mgt->tbl_key.bucket_size);
+	head = tbl_mgt->hash + hash_val;
+
+	hlist_for_each_entry(hash_node, head, node)
+		if (!memcmp(hash_node->key, key, key_size)) {
+			data = hash_node->data;
+			break;
+		}
+
+	return data;
+}
+
+static void nbl_common_detach_hash_node(struct nbl_hash_tbl_mgt *tbl_mgt,
+					struct nbl_hash_entry_node *hash_node)
+{
+	hlist_del(&hash_node->node);
+	devm_kfree(tbl_mgt->tbl_key.dev, hash_node->key);
+	devm_kfree(tbl_mgt->tbl_key.dev, hash_node->data);
+	devm_kfree(tbl_mgt->tbl_key.dev, hash_node);
+	tbl_mgt->node_num--;
+}
+
+/*
+ * Free all hash nodes in the table.
+ *
+ * Note: This function only frees the per-node allocations (key/data/node).
+ * The table skeleton (tbl_mgt + bucket array) is devm-allocated and will be
+ * automatically released by the devm framework on device detach. No need
+ * to free them here.
+ */
+void nbl_common_remove_hash_table(struct nbl_hash_tbl_mgt *tbl_mgt,
+				  struct nbl_hash_tbl_del_key *key)
+{
+	struct nbl_hash_entry_node *hash_node;
+	struct hlist_node *safe_node;
+	struct hlist_head *head;
+	u32 i;
+
+	for (i = 0; i < tbl_mgt->tbl_key.bucket_size; i++) {
+		head = tbl_mgt->hash + i;
+		hlist_for_each_entry_safe(hash_node, safe_node, head, node) {
+			if (key && key->action_func)
+				key->action_func(key->action_priv,
+						 hash_node->key,
+						 hash_node->data);
+			nbl_common_detach_hash_node(tbl_mgt, hash_node);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.h
new file mode 100644
index 000000000000..7a91d4eca105
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_common/nbl_common.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_COMMON_H_
+#define _NBL_COMMON_H_
+
+#include <linux/types.h>
+
+#include "../nbl_include/nbl_include.h"
+#include "../nbl_include/nbl_def_common.h"
+
+/* list only need one bucket size */
+#define NBL_HASH_TBL_LIST_BUCKET_SIZE 1
+
+struct nbl_common_wq_mgt {
+	struct workqueue_struct *ctrl_dev_wq;
+};
+
+struct nbl_hash_tbl_mgt {
+	struct nbl_hash_tbl_key tbl_key;
+	struct hlist_head *hash;
+	u16 node_num;
+};
+
+struct nbl_hash_entry_node {
+	struct hlist_node node;
+	void *key;
+	void *data;
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
index 08ddbf5b0eb2..1d25d7770d8d 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
@@ -7,9 +7,153 @@
 #include <linux/bits.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <linux/bitfield.h>
 #include "nbl_hw_leonis.h"
 
+static void nbl_hw_write_mbx_regs(struct nbl_hw_mgt *hw_mgt, u64 reg,
+				  const u32 *data, u32 len)
+{
+	u32 i;
+
+	if (len % 4)
+		return;
+
+	for (i = 0; i < len / 4; i++)
+		nbl_mbx_wr32(hw_mgt, reg + i * sizeof(u32), data[i]);
+}
+
+static void nbl_hw_rd_regs(struct nbl_hw_mgt *hw_mgt, u64 reg, u32 *data,
+			   u32 len)
+{
+	u32 size = len / 4;
+	u32 i;
+
+	if (len % 4)
+		return;
+
+	spin_lock(&hw_mgt->reg_lock);
+
+	for (i = 0; i < size; i++)
+		data[i] = rd32(hw_mgt->hw_addr, reg + i * sizeof(u32));
+	spin_unlock(&hw_mgt->reg_lock);
+}
+
+static void nbl_hw_wr_regs(struct nbl_hw_mgt *hw_mgt, u64 reg, const u32 *data,
+			   u32 len)
+{
+	u32 size = len / 4;
+	u32 i;
+
+	if (len % 4)
+		return;
+	spin_lock(&hw_mgt->reg_lock);
+	for (i = 0; i < size; i++)
+		wr32(hw_mgt->hw_addr, reg + i * sizeof(u32), data[i]);
+	spin_unlock(&hw_mgt->reg_lock);
+}
+
+static void nbl_hw_update_mailbox_queue_tail_ptr(struct nbl_hw_mgt *hw_mgt,
+						 u16 tail_ptr, u8 txrx)
+{
+	/* local_qid 0 and 1 denote rx and tx queue respectively */
+	u32 local_qid = txrx;
+	u32 value = ((u32)tail_ptr << 16) | local_qid;
+
+	/* wmb for doorbell */
+	wmb();
+	nbl_mbx_wr32(hw_mgt, NBL_MAILBOX_NOTIFY_ADDR, value);
+}
+
+static void nbl_hw_config_mailbox_rxq(struct nbl_hw_mgt *hw_mgt,
+				      dma_addr_t dma_addr, int size_bwid)
+{
+	struct nbl_mailbox_qinfo_cfg_table cfg_tbl;
+
+	memset(&cfg_tbl, 0, sizeof(cfg_tbl));
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_RX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+
+	cfg_tbl.data[0] = lower_32_bits(dma_addr);
+	cfg_tbl.data[1] = upper_32_bits(dma_addr);
+	cfg_tbl.data[2] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_SIZE_BWID_MASK,
+				     size_bwid);
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 0) |
+			  FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_EN_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_RX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+}
+
+static void nbl_hw_config_mailbox_txq(struct nbl_hw_mgt *hw_mgt,
+				      dma_addr_t dma_addr, int size_bwid)
+{
+	struct nbl_mailbox_qinfo_cfg_table cfg_tbl;
+
+	memset(&cfg_tbl, 0, sizeof(cfg_tbl));
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_TX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+
+	cfg_tbl.data[0] = lower_32_bits(dma_addr);
+	cfg_tbl.data[1] = upper_32_bits(dma_addr);
+	cfg_tbl.data[2] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_SIZE_BWID_MASK,
+				     size_bwid);
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 0) |
+			  FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_EN_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_TX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+}
+
+static void nbl_hw_stop_mailbox_rxq(struct nbl_hw_mgt *hw_mgt)
+{
+	struct nbl_mailbox_qinfo_cfg_table cfg_tbl;
+
+	memset(&cfg_tbl, 0, sizeof(cfg_tbl));
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_RX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+}
+
+static void nbl_hw_stop_mailbox_txq(struct nbl_hw_mgt *hw_mgt)
+{
+	struct nbl_mailbox_qinfo_cfg_table cfg_tbl;
+
+	memset(&cfg_tbl, 0, sizeof(cfg_tbl));
+	cfg_tbl.data[3] = FIELD_PREP(NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK, 1);
+	nbl_hw_write_mbx_regs(hw_mgt, NBL_MAILBOX_QINFO_CFG_TX_TABLE_ADDR,
+			      cfg_tbl.data, sizeof(cfg_tbl));
+}
+
+static u32 nbl_hw_get_host_pf_mask(struct nbl_hw_mgt *hw_mgt)
+{
+	u32 data;
+
+	nbl_hw_rd_regs(hw_mgt, NBL_PCIE_HOST_K_PF_MASK_REG, &data,
+		       sizeof(data));
+	return data;
+}
+
+static void nbl_hw_cfg_mailbox_qinfo(struct nbl_hw_mgt *hw_mgt, u16 func_id,
+				     u8 bus, u8 devid, u8 function)
+{
+	u32 data = 0;
+
+	data = FIELD_PREP(NBL_MAILBOX_QINFO_MAP_FUNCTION_MASK, function) |
+	       FIELD_PREP(NBL_MAILBOX_QINFO_MAP_DEVID_MASK, devid) |
+	       FIELD_PREP(NBL_MAILBOX_QINFO_MAP_BUS_MASK, bus);
+	nbl_hw_wr_regs(hw_mgt, NBL_MAILBOX_QINFO_MAP_REG_ARR(func_id), &data,
+		       sizeof(data));
+}
+
 static struct nbl_hw_ops hw_ops = {
+	.update_mailbox_queue_tail_ptr = nbl_hw_update_mailbox_queue_tail_ptr,
+	.config_mailbox_rxq = nbl_hw_config_mailbox_rxq,
+	.config_mailbox_txq = nbl_hw_config_mailbox_txq,
+	.stop_mailbox_rxq = nbl_hw_stop_mailbox_rxq,
+	.stop_mailbox_txq = nbl_hw_stop_mailbox_txq,
+	.get_host_pf_mask = nbl_hw_get_host_pf_mask,
+	.cfg_mailbox_qinfo = nbl_hw_cfg_mailbox_qinfo,
+
 };
 
 /* Structure starts here, adding an op should not modify anything below */
@@ -105,6 +249,8 @@ int nbl_hw_init_leonis(struct nbl_adapter *adapter)
 		goto mailbox_ioremap_err;
 	}
 
+	spin_lock_init(&hw_mgt->reg_lock);
+
 	hw_ops_tbl = nbl_hw_setup_ops(common, hw_mgt);
 	if (IS_ERR(hw_ops_tbl)) {
 		ret = PTR_ERR(hw_ops_tbl);
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
index a554900d9ca6..d2c85175554d 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
@@ -12,4 +12,61 @@
 #include "../nbl_hw_reg.h"
 
 #define NBL_BYTES_IN_REG 4
+
+/*  ----------  REG BASE ADDR  ----------  */
+/* Interface modules base addr */
+#define NBL_INTF_HOST_PCOMPLETER_BASE		0x00f08000
+#define NBL_INTF_HOST_PADPT_BASE		0x00f4c000
+#define NBL_INTF_HOST_MAILBOX_BASE		0x00fb0000
+#define NBL_INTF_HOST_PCIE_BASE			0X01504000
+/* DP modules base addr */
+#define NBL_DP_USTORE_BASE			0x00104000
+#define NBL_DP_UQM_BASE				0x00114000
+#define NBL_DP_UPED_BASE			0x0015c000
+#define NBL_DP_UVN_BASE				0x00244000
+#define NBL_DP_DSCH_BASE			0x00404000
+#define NBL_DP_SHAPING_BASE			0x00504000
+#define NBL_DP_DVN_BASE				0x00514000
+#define NBL_DP_DSTORE_BASE			0x00704000
+#define NBL_DP_DQM_BASE				0x00714000
+#define NBL_DP_DPED_BASE			0x0075c000
+#define NBL_DP_DDMUX_BASE			0x00984000
+/*  --------  MAILBOX BAR2 -----  */
+#define NBL_MAILBOX_NOTIFY_ADDR			0x00000000
+#define NBL_MAILBOX_BAR_REG			0x00000000
+#define NBL_MAILBOX_QINFO_CFG_RX_TABLE_ADDR	0x10
+#define NBL_MAILBOX_QINFO_CFG_TX_TABLE_ADDR	0x20
+#define NBL_MAILBOX_QINFO_CFG_DBG_TABLE_ADDR	0x30
+
+/*  --------  MAILBOX  --------  */
+
+/* mailbox BAR qinfo_cfg_table */
+#define MAILBOX_QINFO_CFG_TABLE_DWLEN	4
+/* data[2] */
+#define NBL_MAILBOX_QINFO_CFG_QUEUE_SIZE_BWID_MASK	GENMASK(3, 0)
+/* data[3] */
+#define NBL_MAILBOX_QINFO_CFG_QUEUE_RST_MASK		BIT(0)
+#define NBL_MAILBOX_QINFO_CFG_QUEUE_EN_MASK		BIT(1)
+#define NBL_MAILBOX_QINFO_CFG_DIF_ERR_MASK		BIT(2)
+#define NBL_MAILBOX_QINFO_CFG_PTR_ERR_MASK		BIT(3)
+struct nbl_mailbox_qinfo_cfg_table {
+	u32 data[MAILBOX_QINFO_CFG_TABLE_DWLEN];
+};
+
+/*  --------  MAILBOX BAR0 -----  */
+/* mailbox qinfo_map_table */
+#define NBL_MAILBOX_QINFO_MAP_REG_ARR(func_id) \
+	(NBL_INTF_HOST_MAILBOX_BASE + 0x00001000 + (func_id) * sizeof(u32))
+
+/* MAILBOX qinfo_map_table */
+#define NBL_MAILBOX_QINFO_MAP_FUNCTION_MASK		GENMASK(2, 0)
+#define NBL_MAILBOX_QINFO_MAP_DEVID_MASK		GENMASK(7, 3)
+#define NBL_MAILBOX_QINFO_MAP_BUS_MASK			GENMASK(15, 8)
+#define NBL_MAILBOX_QINFO_MAP_MSIX_IDX_MASK		GENMASK(28, 16)
+#define NBL_MAILBOX_QINFO_MAP_MSIX_IDX_VALID_MASK	BIT(29)
+
+/*  --------  HOST_PCIE  --------  */
+#define NBL_PCIE_HOST_K_PF_MASK_REG (NBL_INTF_HOST_PCIE_BASE + 0x00001004)
+#define NBL_PCIE_HOST_TL_CFG_BUSDEV (NBL_INTF_HOST_PCIE_BASE + 0x11040)
+
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
index 1828251e8c2a..18f3fa078758 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
@@ -26,6 +26,7 @@ struct nbl_hw_mgt {
 	u8 __iomem *mailbox_bar_hw_addr;
 	u64 notify_offset;
 	resource_size_t hw_size;
+	spinlock_t reg_lock; /* Protect reg access */
 };
 
 static inline u32 rd32(u8 __iomem *addr, u64 reg)
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
index f82926e2152c..1a4b3e81e231 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
@@ -10,6 +10,38 @@
 
 struct nbl_channel_mgt;
 struct nbl_adapter;
+#define NBL_CHAN_SEND(chan_send, dst_id, mesg_type, argument, arg_length,\
+		      response, resp_length, need_ack)			\
+do {									\
+	typeof(chan_send)	*__chan_send = &(chan_send);		\
+	__chan_send->dstid	= (dst_id);				\
+	__chan_send->msg_type	= (mesg_type);				\
+	__chan_send->arg	= (argument);				\
+	__chan_send->arg_len	= (arg_length);				\
+	__chan_send->resp	= (response);				\
+	__chan_send->resp_len	= (resp_length);			\
+	__chan_send->ack	= (need_ack);				\
+} while (0)
+
+#define NBL_CHAN_ACK(chan_ack, dst_id, mesg_type, msg_id, err_code, ack_data, \
+		     data_length)					\
+do {									\
+	typeof(chan_ack)	*__chan_ack = &(chan_ack);		\
+	__chan_ack->dstid	= (dst_id);				\
+	__chan_ack->msg_type	= (mesg_type);				\
+	__chan_ack->msgid	= (msg_id);				\
+	__chan_ack->err		= (err_code);				\
+	__chan_ack->data	= (ack_data);				\
+	__chan_ack->data_len	= (data_length);			\
+} while (0)
+
+typedef void (*nbl_chan_resp)(void *, u16, u16, void *, u32);
+
+enum {
+	NBL_CHAN_RESP_OK,
+	NBL_CHAN_RESP_ERR,
+};
+
 enum nbl_chan_msg_type {
 	NBL_CHAN_MSG_ACK,
 	NBL_CHAN_MSG_ADD_MACVLAN,
@@ -233,6 +265,12 @@ enum nbl_chan_msg_type {
 	NBL_CHAN_MSG_MAILBOX_MAX,
 };
 
+enum nbl_chan_state {
+	NBL_CHAN_INTERRUPT_READY,
+	NBL_CHAN_ABNORMAL,
+	NBL_CHAN_STATE_NBITS
+};
+
 struct nbl_chan_param_cfg_msix_map {
 	__le16 num_net_msix;
 	__le16 num_others_msix;
@@ -259,12 +297,57 @@ struct nbl_chan_param_get_eth_id {
 	u8 rsvd[3];
 };
 
+struct nbl_board_port_info {
+	u8 eth_num;
+	u8 eth_speed;
+	u8 p4_version;
+	u8 rsv[5];
+};
+
+struct nbl_chan_send_info {
+	void *arg;
+	size_t arg_len;
+	void *resp;
+	size_t resp_len;
+	u16 dstid;
+	u16 msg_type;
+	u16 ack;
+	u16 ack_len;
+};
+
+struct nbl_chan_ack_info {
+	void *data;
+	int err;
+	u32 data_len;
+	u16 dstid;
+	u16 msg_type;
+	u16 msgid;
+};
+
 enum nbl_channel_type {
 	NBL_CHAN_TYPE_MAILBOX,
 	NBL_CHAN_TYPE_MAX
 };
 
 struct nbl_channel_ops {
+	int (*send_msg)(struct nbl_channel_mgt *chan_mgt,
+			struct nbl_chan_send_info *chan_send);
+	int (*send_ack)(struct nbl_channel_mgt *chan_mgt,
+			struct nbl_chan_ack_info *chan_ack);
+	int (*register_msg)(struct nbl_channel_mgt *chan_mgt, u16 msg_type,
+			    nbl_chan_resp func, void *callback_priv);
+	void (*cfg_chan_qinfo_map_table)(struct nbl_channel_mgt *chan_mgt);
+	bool (*check_queue_exist)(struct nbl_channel_mgt *chan_mgt,
+				  u8 chan_type);
+	int (*setup_queue)(struct nbl_channel_mgt *chan_mgt, u8 chan_type);
+	int (*teardown_queue)(struct nbl_channel_mgt *chan_mgt, u8 chan_type);
+	void (*clean_queue_subtask)(struct nbl_channel_mgt *chan_mgt,
+				    u8 chan_type);
+	void (*register_chan_task)(struct nbl_channel_mgt *chan_mgt,
+				   u8 chan_type, struct work_struct *task);
+	void (*set_queue_state)(struct nbl_channel_mgt *chan_mgt,
+				enum nbl_chan_state state, u8 chan_type,
+				u8 set);
 };
 
 struct nbl_channel_ops_tbl {
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
index 03c19e1c8c3c..633f7100beb0 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
@@ -11,6 +11,8 @@
 #include <linux/device.h>
 #include "nbl_include.h"
 
+struct nbl_hash_tbl_mgt;
+
 struct nbl_common_info {
 	struct pci_dev *pdev;
 	struct device *dev;
@@ -31,4 +33,28 @@ struct nbl_common_info {
 	u8 has_net;
 };
 
+struct nbl_hash_tbl_key {
+	struct device *dev;
+	u16 key_size;
+	u16 data_size; /* no include key or node member */
+	u16 bucket_size;
+	u16 resv;
+};
+
+struct nbl_hash_tbl_del_key {
+	void *action_priv;
+	void (*action_func)(void *priv, void *key, void *data);
+};
+
+u32 nbl_common_pf_id_subtraction_mgtpf_id(struct nbl_common_info *common,
+					  u32 pf_id);
+
+struct nbl_hash_tbl_mgt *
+nbl_common_init_hash_table(struct nbl_hash_tbl_key *key);
+void nbl_common_remove_hash_table(struct nbl_hash_tbl_mgt *tbl_mgt,
+				  struct nbl_hash_tbl_del_key *key);
+int nbl_common_alloc_hash_node(struct nbl_hash_tbl_mgt *tbl_mgt, void *key,
+			       void *data, void **out_data);
+void *nbl_common_get_hash_node(struct nbl_hash_tbl_mgt *tbl_mgt, void *key);
+
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
index 168504b30973..db737157b603 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
@@ -6,9 +6,36 @@
 #ifndef _NBL_DEF_HW_H_
 #define _NBL_DEF_HW_H_
 
+#include <linux/types.h>
+
 struct nbl_hw_mgt;
 struct nbl_adapter;
 struct nbl_hw_ops {
+	void (*configure_msix_map)(struct nbl_hw_mgt *hw_mgt, u16 func_id,
+				   bool valid, dma_addr_t dma_addr, u8 bus,
+				   u8 devid, u8 function);
+	void (*configure_msix_info)(struct nbl_hw_mgt *hw_mgt, u16 func_id,
+				    bool valid, u16 interrupt_id, u8 bus,
+				    u8 devid, u8 function,
+				    bool net_msix_mask_en);
+	void (*update_mailbox_queue_tail_ptr)(struct nbl_hw_mgt *hw_mgt,
+					      u16 tail_ptr, u8 txrx);
+	void (*config_mailbox_rxq)(struct nbl_hw_mgt *hw_mgt,
+				   dma_addr_t dma_addr, int size_bwid);
+	void (*config_mailbox_txq)(struct nbl_hw_mgt *hw_mgt,
+				   dma_addr_t dma_addr, int size_bwid);
+	void (*stop_mailbox_rxq)(struct nbl_hw_mgt *hw_mgt);
+	void (*stop_mailbox_txq)(struct nbl_hw_mgt *hw_mgt);
+	u32 (*get_host_pf_mask)(struct nbl_hw_mgt *hw_mgt);
+	u8 (*get_real_bus)(struct nbl_hw_mgt *hw_mgt);
+
+	void (*cfg_mailbox_qinfo)(struct nbl_hw_mgt *hw_mgt, u16 func_id,
+				  u8 bus, u8 devid, u8 function);
+	void (*set_mailbox_irq)(struct nbl_hw_mgt *hw_mgt, u16 func_id,
+				bool enable_msix, u16 global_vec_id);
+	u32 (*get_fw_eth_map)(struct nbl_hw_mgt *hw_mgt);
+	void (*get_board_info)(struct nbl_hw_mgt *hw_mgt,
+			       struct nbl_board_port_info *board);
 };
 
 struct nbl_hw_ops_tbl {
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
index 65fe9a42ee2c..2a1ae9a1eb9d 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
@@ -10,6 +10,12 @@
 
 /*  ------  Basic definitions  -------  */
 #define NBL_DRIVER_NAME					"nbl"
+#define NBL_MAX_PF					8
+#define NBL_NEXT_ID(id, max)				\
+	({						\
+		typeof(id) _id = (id);			\
+		((_id) == (max) ? 0 : (_id) + 1);	\
+	})
 
 enum nbl_product_type {
 	NBL_LEONIS_TYPE,
-- 
2.47.3


^ permalink raw reply related

* [PATCH v18 net-next 02/11] net/nebula-matrix: add our driver architecture
From: illusion.wang @ 2026-06-11  4:49 UTC (permalink / raw)
  To: dimon.zhao, illusion.wang, alvin.wang, sam.chen, netdev
  Cc: andrew+netdev, corbet, kuba, horms, linux-doc, pabeni,
	vadim.fedorenko, lukas.bulwahn, edumazet, enelsonmoore, skhan,
	hkallweit1, open list
In-Reply-To: <20260611044916.2383-1-illusion.wang@nebula-matrix.com>

This commit introduces the baseline driver architecture for the
nebula-matrix networking device. It establishes the Hardware, Channel,
Resource, Dispatch, and Device layers for device management.

our driver architecture:
Hardware (HW), Channel, Resource, Dispatch, and Device Layer
Struct Initialization/Deinitialization, and Operation Set Registration/
Unregistration

Our driver architecture is relatively complex because the code is highly
reusable and designed to support multiple features. Additionally, the
codebase supports multiple chip variants, each with distinct
hardware-software interactions.
To ensure compatibility, our architecture is divided into the following
layers:

1. Dev Layer (Device Layer)
The top-level business logic layer where all operations are
device-centric. Every operation is performed relative to the device
context. The intergration of base functions encompasses:
management(ctrl only for leonis pf0), network(net_dev,this time not
contained),common.

2. Dispatch Layer
The distribution from services to specific data operations is mainly
divided into two types: direct pass-through and handling by the
management PF. It shields the upper layer from the differences in
specific underlying locations.
It describes the processing locations and paths of the services.

3. Resource Layer
Handles tasks dispatched from Dispatch Layer. These tasks fall into two
categories:
3.1 Hardware control
The Resource Layer further invokes the HW Layer when hardware access is
needed, as only the HW Layer has OS-level privileges.
3.2 Software resource management
Operations like packet statistics collection that don't require hardware
access.

4. HW Layer (Hardware Layer)
Serves the Resource Layer by interacting with different hardware
chipsets.Writes to hardware registers to drive the hardware based on
Resource Layer directives.

5. Channel Layer

Handle communication between PF0(has ctrl func) and other PF,and provide
basic interaction channels.

6. Common Layer
Provides fundamental services

Signed-off-by: illusion.wang <illusion.wang@nebula-matrix.com>
---
 .../net/ethernet/nebula-matrix/nbl/Makefile   |   7 +-
 .../nbl/nbl_channel/nbl_channel.c             |  78 +++++++
 .../nbl/nbl_channel/nbl_channel.h             |  29 +++
 .../net/ethernet/nebula-matrix/nbl/nbl_core.h |  43 ++++
 .../nebula-matrix/nbl/nbl_core/nbl_dev.c      |  56 +++++
 .../nebula-matrix/nbl/nbl_core/nbl_dev.h      |  27 +++
 .../nebula-matrix/nbl/nbl_core/nbl_dispatch.c |  78 +++++++
 .../nebula-matrix/nbl/nbl_core/nbl_dispatch.h |  25 +++
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c  | 144 +++++++++++++
 .../nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h  |  14 ++
 .../nbl_hw_leonis/nbl_resource_leonis.c       |  87 ++++++++
 .../nbl_hw_leonis/nbl_resource_leonis.h       |  10 +
 .../nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h     |  73 +++++++
 .../nebula-matrix/nbl/nbl_hw/nbl_resource.h   |  31 +++
 .../nbl/nbl_include/nbl_def_channel.h         |  26 +++
 .../nbl/nbl_include/nbl_def_common.h          |  34 ++++
 .../nbl/nbl_include/nbl_def_dev.h             |  16 ++
 .../nbl/nbl_include/nbl_def_dispatch.h        |  30 +++
 .../nbl/nbl_include/nbl_def_hw.h              |  22 ++
 .../nbl/nbl_include/nbl_def_resource.h        |  22 ++
 .../nbl/nbl_include/nbl_include.h             |  10 +
 .../nbl/nbl_include/nbl_product_base.h        |  19 ++
 .../net/ethernet/nebula-matrix/nbl/nbl_main.c | 191 ++++++++++++++++++
 23 files changed, 1071 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dev.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dispatch.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
 create mode 100644 drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_product_base.h

diff --git a/drivers/net/ethernet/nebula-matrix/nbl/Makefile b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
index b90fba239401..271605920396 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/Makefile
+++ b/drivers/net/ethernet/nebula-matrix/nbl/Makefile
@@ -3,4 +3,9 @@
 
 obj-$(CONFIG_NBL) := nbl.o
 
-nbl-objs +=      nbl_main.o
+nbl-objs +=       nbl_channel/nbl_channel.o \
+				nbl_hw/nbl_hw_leonis/nbl_hw_leonis.o \
+				nbl_hw/nbl_hw_leonis/nbl_resource_leonis.o \
+				nbl_core/nbl_dispatch.o \
+				nbl_core/nbl_dev.o \
+				nbl_main.o
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
new file mode 100644
index 000000000000..c7689f0e4029
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include "nbl_channel.h"
+
+static struct nbl_channel_ops chan_ops = {
+};
+
+static struct nbl_channel_mgt *
+nbl_chan_setup_chan_mgt(struct nbl_adapter *adapter)
+{
+	struct nbl_hw_ops_tbl *hw_ops_tbl = adapter->intf.hw_ops_tbl;
+	struct nbl_common_info *common = &adapter->common;
+	struct device *dev = &adapter->pdev->dev;
+	struct nbl_channel_mgt *chan_mgt;
+	struct nbl_chan_info *mailbox;
+
+	chan_mgt = devm_kzalloc(dev, sizeof(*chan_mgt), GFP_KERNEL);
+	if (!chan_mgt)
+		return ERR_PTR(-ENOMEM);
+
+	chan_mgt->common = common;
+	chan_mgt->hw_ops_tbl = hw_ops_tbl;
+
+	mailbox = devm_kzalloc(dev, sizeof(*mailbox), GFP_KERNEL);
+	if (!mailbox)
+		return ERR_PTR(-ENOMEM);
+	mailbox->chan_type = NBL_CHAN_TYPE_MAILBOX;
+	chan_mgt->chan_info[NBL_CHAN_TYPE_MAILBOX] = mailbox;
+
+	return chan_mgt;
+}
+
+static struct nbl_channel_ops_tbl *
+nbl_chan_setup_ops(struct device *dev, struct nbl_channel_mgt *chan_mgt)
+{
+	struct nbl_channel_ops_tbl *chan_ops_tbl;
+
+	chan_ops_tbl = devm_kzalloc(dev, sizeof(*chan_ops_tbl), GFP_KERNEL);
+	if (!chan_ops_tbl)
+		return ERR_PTR(-ENOMEM);
+
+	chan_ops_tbl->ops = &chan_ops;
+	chan_ops_tbl->priv = chan_mgt;
+
+	return chan_ops_tbl;
+}
+
+int nbl_chan_init_common(struct nbl_adapter *adap)
+{
+	struct nbl_channel_ops_tbl *chan_ops_tbl;
+	struct device *dev = &adap->pdev->dev;
+	struct nbl_channel_mgt *chan_mgt;
+	int ret;
+
+	chan_mgt = nbl_chan_setup_chan_mgt(adap);
+	if (IS_ERR(chan_mgt)) {
+		ret = PTR_ERR(chan_mgt);
+		return ret;
+	}
+
+	chan_ops_tbl = nbl_chan_setup_ops(dev, chan_mgt);
+	if (IS_ERR(chan_ops_tbl)) {
+		ret = PTR_ERR(chan_ops_tbl);
+		return ret;
+	}
+	adap->intf.channel_ops_tbl = chan_ops_tbl;
+	adap->core.chan_mgt = chan_mgt;
+	return 0;
+}
+
+void nbl_chan_remove_common(struct nbl_adapter *adap)
+{
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
new file mode 100644
index 000000000000..637912d1e806
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_channel/nbl_channel.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_CHANNEL_H_
+#define _NBL_CHANNEL_H_
+
+#include <linux/types.h>
+
+#include "../nbl_include/nbl_include.h"
+#include "../nbl_include/nbl_product_base.h"
+#include "../nbl_include/nbl_def_channel.h"
+#include "../nbl_include/nbl_def_hw.h"
+#include "../nbl_include/nbl_def_common.h"
+#include "../nbl_core.h"
+
+struct nbl_chan_info {
+	u8 chan_type;
+};
+
+struct nbl_channel_mgt {
+	struct nbl_common_info *common;
+	struct nbl_hw_ops_tbl *hw_ops_tbl;
+	struct nbl_chan_info *chan_info[NBL_CHAN_TYPE_MAX];
+	struct nbl_hash_tbl_mgt *handle_hash_tbl;
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core.h
index c525114297b4..8c186d95d3e7 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core.h
@@ -6,6 +6,20 @@
 #ifndef _NBL_CORE_H_
 #define _NBL_CORE_H_
 
+#include <linux/pci.h>
+#include "nbl_include/nbl_include.h"
+#include "nbl_include/nbl_def_common.h"
+
+struct nbl_hw_mgt;
+struct nbl_hw_ops_tbl;
+struct nbl_resource_mgt;
+struct nbl_resource_ops_tbl;
+struct nbl_dispatch_mgt;
+struct nbl_dispatch_ops_tbl;
+struct nbl_channel_ops_tbl;
+struct nbl_channel_mgt;
+struct nbl_dev_mgt;
+
 enum {
 	NBL_CAP_HAS_CTRL_BIT,
 	NBL_CAP_HAS_NET_BIT,
@@ -13,4 +27,33 @@ enum {
 	NBL_CAP_IS_LEONIS_BIT,
 };
 
+struct nbl_interface {
+	struct nbl_hw_ops_tbl *hw_ops_tbl;
+	struct nbl_resource_ops_tbl *resource_ops_tbl;
+	struct nbl_dispatch_ops_tbl *dispatch_ops_tbl;
+	struct nbl_channel_ops_tbl *channel_ops_tbl;
+};
+
+struct nbl_core {
+	struct nbl_hw_mgt *hw_mgt;
+	struct nbl_resource_mgt *res_mgt;
+	struct nbl_dispatch_mgt *disp_mgt;
+	struct nbl_dev_mgt *dev_mgt;
+	struct nbl_channel_mgt *chan_mgt;
+};
+
+struct nbl_adapter {
+	struct pci_dev *pdev;
+	struct nbl_core core;
+	struct nbl_interface intf;
+	struct nbl_common_info common;
+	struct nbl_product_base_ops *product_base_ops;
+};
+
+struct nbl_adapter *nbl_core_init(struct pci_dev *pdev,
+				  struct nbl_init_param *param);
+void nbl_core_remove(struct nbl_adapter *adapter);
+int nbl_core_start(struct nbl_adapter *adapter);
+void nbl_core_stop(struct nbl_adapter *adapter);
+
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
new file mode 100644
index 000000000000..5deb21e35f8e
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include "nbl_dev.h"
+
+static struct nbl_dev_mgt *nbl_dev_setup_dev_mgt(struct nbl_common_info *common)
+{
+	struct nbl_dev_mgt *dev_mgt;
+
+	dev_mgt = devm_kzalloc(common->dev, sizeof(*dev_mgt), GFP_KERNEL);
+	if (!dev_mgt)
+		return ERR_PTR(-ENOMEM);
+
+	dev_mgt->common = common;
+	return dev_mgt;
+}
+
+int nbl_dev_init(struct nbl_adapter *adapter)
+{
+	struct nbl_common_info *common = &adapter->common;
+	struct nbl_dispatch_ops_tbl *disp_ops_tbl =
+		adapter->intf.dispatch_ops_tbl;
+	struct nbl_channel_ops_tbl *chan_ops_tbl =
+		adapter->intf.channel_ops_tbl;
+	struct nbl_dev_mgt *dev_mgt;
+	int ret;
+
+	dev_mgt = nbl_dev_setup_dev_mgt(common);
+	if (IS_ERR(dev_mgt)) {
+		ret = PTR_ERR(dev_mgt);
+		return ret;
+	}
+
+	dev_mgt->disp_ops_tbl = disp_ops_tbl;
+	dev_mgt->chan_ops_tbl = chan_ops_tbl;
+	adapter->core.dev_mgt = dev_mgt;
+
+	return 0;
+}
+
+void nbl_dev_remove(struct nbl_adapter *adapter)
+{
+}
+
+/* ----------  Dev start process  ---------- */
+int nbl_dev_start(struct nbl_adapter *adapter)
+{
+	return 0;
+}
+
+void nbl_dev_stop(struct nbl_adapter *adapter)
+{
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
new file mode 100644
index 000000000000..9b71092b99a0
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dev.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEV_H_
+#define _NBL_DEV_H_
+
+#include <linux/types.h>
+
+#include "../nbl_include/nbl_include.h"
+#include "../nbl_include/nbl_product_base.h"
+#include "../nbl_include/nbl_def_channel.h"
+#include "../nbl_include/nbl_def_hw.h"
+#include "../nbl_include/nbl_def_resource.h"
+#include "../nbl_include/nbl_def_dispatch.h"
+#include "../nbl_include/nbl_def_dev.h"
+#include "../nbl_include/nbl_def_common.h"
+#include "../nbl_core.h"
+
+struct nbl_dev_mgt {
+	struct nbl_common_info *common;
+	struct nbl_dispatch_ops_tbl *disp_ops_tbl;
+	struct nbl_channel_ops_tbl *chan_ops_tbl;
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.c
new file mode 100644
index 000000000000..f0b4406ca560
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include "nbl_dispatch.h"
+
+static struct nbl_dispatch_mgt *
+nbl_disp_setup_disp_mgt(struct nbl_common_info *common)
+{
+	struct nbl_dispatch_mgt *disp_mgt;
+	struct device *dev = common->dev;
+
+	disp_mgt = devm_kzalloc(dev, sizeof(*disp_mgt), GFP_KERNEL);
+	if (!disp_mgt)
+		return ERR_PTR(-ENOMEM);
+
+	disp_mgt->common = common;
+	return disp_mgt;
+}
+
+static struct nbl_dispatch_ops_tbl *
+nbl_disp_setup_ops(struct device *dev, struct nbl_dispatch_mgt *disp_mgt)
+{
+	struct nbl_dispatch_ops_tbl *disp_ops_tbl;
+	struct nbl_dispatch_ops *disp_ops;
+
+	disp_ops_tbl = devm_kzalloc(dev, sizeof(*disp_ops_tbl), GFP_KERNEL);
+	if (!disp_ops_tbl)
+		return ERR_PTR(-ENOMEM);
+
+	disp_ops = devm_kzalloc(dev, sizeof(*disp_ops), GFP_KERNEL);
+	if (!disp_ops)
+		return ERR_PTR(-ENOMEM);
+
+	disp_ops_tbl->ops = disp_ops;
+	disp_ops_tbl->priv = disp_mgt;
+
+	return disp_ops_tbl;
+}
+
+int nbl_disp_init(struct nbl_adapter *adapter)
+{
+	struct nbl_common_info *common = &adapter->common;
+	struct nbl_dispatch_ops_tbl *disp_ops_tbl;
+	struct nbl_resource_ops_tbl *res_ops_tbl =
+		adapter->intf.resource_ops_tbl;
+	struct nbl_channel_ops_tbl *chan_ops_tbl =
+		adapter->intf.channel_ops_tbl;
+	struct device *dev = &adapter->pdev->dev;
+	struct nbl_dispatch_mgt *disp_mgt;
+	int ret;
+
+	disp_mgt = nbl_disp_setup_disp_mgt(common);
+	if (IS_ERR(disp_mgt)) {
+		ret = PTR_ERR(disp_mgt);
+		return ret;
+	}
+
+	disp_ops_tbl = nbl_disp_setup_ops(dev, disp_mgt);
+	if (IS_ERR(disp_ops_tbl)) {
+		ret = PTR_ERR(disp_ops_tbl);
+		return ret;
+	}
+
+	disp_mgt->res_ops_tbl = res_ops_tbl;
+	disp_mgt->chan_ops_tbl = chan_ops_tbl;
+	disp_mgt->disp_ops_tbl = disp_ops_tbl;
+	adapter->core.disp_mgt = disp_mgt;
+	adapter->intf.dispatch_ops_tbl = disp_ops_tbl;
+
+	return 0;
+}
+
+void nbl_disp_remove(struct nbl_adapter *adapter)
+{
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.h
new file mode 100644
index 000000000000..fa7f4597febe
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_core/nbl_dispatch.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DISPATCH_H_
+#define _NBL_DISPATCH_H_
+#include "../nbl_include/nbl_include.h"
+#include "../nbl_include/nbl_product_base.h"
+#include "../nbl_include/nbl_def_channel.h"
+#include "../nbl_include/nbl_def_hw.h"
+#include "../nbl_include/nbl_def_resource.h"
+#include "../nbl_include/nbl_def_dispatch.h"
+#include "../nbl_include/nbl_def_common.h"
+#include "../nbl_core.h"
+
+struct nbl_dispatch_mgt {
+	struct nbl_common_info *common;
+	struct nbl_resource_ops_tbl *res_ops_tbl;
+	struct nbl_channel_ops_tbl *chan_ops_tbl;
+	struct nbl_dispatch_ops_tbl *disp_ops_tbl;
+	DECLARE_BITMAP(ctrl_lvl, NBL_DISP_CTRL_LVL_MAX);
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
new file mode 100644
index 000000000000..08ddbf5b0eb2
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/bits.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include "nbl_hw_leonis.h"
+
+static struct nbl_hw_ops hw_ops = {
+};
+
+/* Structure starts here, adding an op should not modify anything below */
+static struct nbl_hw_mgt *nbl_hw_setup_hw_mgt(struct nbl_common_info *common)
+{
+	struct device *dev = common->dev;
+	struct nbl_hw_mgt *hw_mgt;
+
+	hw_mgt = devm_kzalloc(dev, sizeof(*hw_mgt), GFP_KERNEL);
+	if (!hw_mgt)
+		return ERR_PTR(-ENOMEM);
+
+	hw_mgt->common = common;
+
+	return hw_mgt;
+}
+
+static struct nbl_hw_ops_tbl *nbl_hw_setup_ops(struct nbl_common_info *common,
+					       struct nbl_hw_mgt *hw_mgt)
+{
+	struct nbl_hw_ops_tbl *hw_ops_tbl;
+	struct device *dev;
+
+	dev = common->dev;
+	hw_ops_tbl = devm_kzalloc(dev, sizeof(*hw_ops_tbl), GFP_KERNEL);
+	if (!hw_ops_tbl)
+		return ERR_PTR(-ENOMEM);
+
+	hw_ops_tbl->ops = &hw_ops;
+	hw_ops_tbl->priv = hw_mgt;
+
+	return hw_ops_tbl;
+}
+
+int nbl_hw_init_leonis(struct nbl_adapter *adapter)
+{
+	struct nbl_common_info *common = &adapter->common;
+	struct pci_dev *pdev = common->pdev;
+	struct nbl_hw_ops_tbl *hw_ops_tbl;
+	struct nbl_hw_mgt *hw_mgt;
+	resource_size_t bar_start;
+	resource_size_t bar_len;
+	int bar_mask;
+	int ret;
+
+	hw_mgt = nbl_hw_setup_hw_mgt(common);
+	if (IS_ERR(hw_mgt)) {
+		ret = PTR_ERR(hw_mgt);
+		goto setup_mgt_fail;
+	}
+	bar_mask = BIT(NBL_MEMORY_BAR) | BIT(NBL_MAILBOX_BAR);
+	ret = pci_request_selected_regions(pdev, bar_mask, NBL_DRIVER_NAME);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Request memory bar and mailbox bar failed, err = %d\n",
+			ret);
+		goto request_bar_region_fail;
+	}
+
+	bar_len = pci_resource_len(pdev, NBL_MEMORY_BAR);
+	bar_start = pci_resource_start(pdev, NBL_MEMORY_BAR);
+	if (!(pci_resource_flags(pdev, NBL_MEMORY_BAR) & IORESOURCE_MEM) ||
+	    bar_len < NBL_RDMA_NOTIFY_OFF) {
+		dev_err(&pdev->dev,
+			"Invalid BAR: unassigned or length too small\n");
+		ret = -EINVAL;
+		goto ioremap_err;
+	}
+	if (common->has_ctrl) {
+		if (bar_len < NBL_HW_REG_SPACE_SIZE) {
+			dev_err(&pdev->dev,
+				"Invalid BAR: unassigned or length too small\n");
+			ret = -EINVAL;
+			goto ioremap_err;
+		}
+		hw_mgt->hw_addr =
+			ioremap(bar_start, bar_len - NBL_RDMA_NOTIFY_OFF);
+		hw_mgt->hw_size = bar_len - NBL_RDMA_NOTIFY_OFF;
+	} else {
+		hw_mgt->hw_addr = ioremap(bar_start, NBL_RDMA_NOTIFY_OFF);
+		hw_mgt->hw_size = NBL_RDMA_NOTIFY_OFF;
+	}
+	if (!hw_mgt->hw_addr) {
+		dev_err(&pdev->dev, "Memory bar ioremap failed\n");
+		ret = -EIO;
+		goto ioremap_err;
+	}
+
+	hw_mgt->mailbox_bar_hw_addr = pci_ioremap_bar(pdev, NBL_MAILBOX_BAR);
+	if (!hw_mgt->mailbox_bar_hw_addr) {
+		dev_err(&pdev->dev, "Mailbox bar ioremap failed\n");
+		ret = -EIO;
+		goto mailbox_ioremap_err;
+	}
+
+	hw_ops_tbl = nbl_hw_setup_ops(common, hw_mgt);
+	if (IS_ERR(hw_ops_tbl)) {
+		ret = PTR_ERR(hw_ops_tbl);
+		goto setup_ops_fail;
+	}
+	hw_mgt->notify_offset = 0;
+	adapter->intf.hw_ops_tbl = hw_ops_tbl;
+	adapter->core.hw_mgt = hw_mgt;
+
+	return 0;
+
+setup_ops_fail:
+	iounmap(hw_mgt->mailbox_bar_hw_addr);
+mailbox_ioremap_err:
+	iounmap(hw_mgt->hw_addr);
+ioremap_err:
+	pci_release_selected_regions(pdev, bar_mask);
+request_bar_region_fail:
+setup_mgt_fail:
+	return ret;
+}
+
+void nbl_hw_remove_leonis(struct nbl_adapter *adapter)
+{
+	int bar_mask = BIT(NBL_MEMORY_BAR) | BIT(NBL_MAILBOX_BAR);
+	struct nbl_common_info *common = &adapter->common;
+	struct nbl_hw_mgt *hw_mgt = adapter->core.hw_mgt;
+	u8 __iomem *hw_addr = hw_mgt->hw_addr;
+	struct pci_dev *pdev = common->pdev;
+	u8 __iomem *mailbox_bar_hw_addr;
+
+	mailbox_bar_hw_addr = hw_mgt->mailbox_bar_hw_addr;
+
+	iounmap(mailbox_bar_hw_addr);
+	iounmap(hw_addr);
+	pci_release_selected_regions(pdev, bar_mask);
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
new file mode 100644
index 000000000000..77c67b67ba31
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_hw_leonis.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_HW_LEONIS_H_
+#define _NBL_HW_LEONIS_H_
+
+#include <linux/types.h>
+
+#include "../../nbl_include/nbl_include.h"
+#include "../nbl_hw_reg.h"
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
new file mode 100644
index 000000000000..4b4f8e2e7fe7
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include "nbl_resource_leonis.h"
+
+static struct nbl_resource_ops res_ops = {
+};
+
+static struct nbl_resource_mgt *
+nbl_res_setup_res_mgt(struct nbl_common_info *common)
+{
+	struct nbl_resource_info *resource_info;
+	struct nbl_resource_mgt *res_mgt;
+	struct device *dev = common->dev;
+
+	res_mgt = devm_kzalloc(dev, sizeof(*res_mgt), GFP_KERNEL);
+	if (!res_mgt)
+		return ERR_PTR(-ENOMEM);
+	res_mgt->common = common;
+
+	resource_info =
+		devm_kzalloc(dev, sizeof(*resource_info), GFP_KERNEL);
+	if (!resource_info)
+		return ERR_PTR(-ENOMEM);
+	res_mgt->resource_info = resource_info;
+
+	return res_mgt;
+}
+
+static struct nbl_resource_ops_tbl *
+nbl_res_setup_ops(struct device *dev, struct nbl_resource_mgt *res_mgt)
+{
+	struct nbl_resource_ops_tbl *res_ops_tbl;
+
+	res_ops_tbl = devm_kzalloc(dev, sizeof(*res_ops_tbl), GFP_KERNEL);
+	if (!res_ops_tbl)
+		return ERR_PTR(-ENOMEM);
+
+	res_ops_tbl->ops = &res_ops;
+	res_ops_tbl->priv = res_mgt;
+
+	return res_ops_tbl;
+}
+
+static int nbl_res_start(struct nbl_resource_mgt *res_mgt)
+{
+	return 0;
+}
+
+int nbl_res_init_leonis(struct nbl_adapter *adap)
+{
+	struct nbl_channel_ops_tbl *chan_ops_tbl = adap->intf.channel_ops_tbl;
+	struct nbl_hw_ops_tbl *hw_ops_tbl = adap->intf.hw_ops_tbl;
+	struct nbl_common_info *common = &adap->common;
+	struct nbl_resource_ops_tbl *res_ops_tbl;
+	struct device *dev = &adap->pdev->dev;
+	struct nbl_resource_mgt *res_mgt;
+	int ret;
+
+	res_mgt = nbl_res_setup_res_mgt(common);
+	if (IS_ERR(res_mgt)) {
+		ret = PTR_ERR(res_mgt);
+		return ret;
+	}
+	res_mgt->chan_ops_tbl = chan_ops_tbl;
+	res_mgt->hw_ops_tbl = hw_ops_tbl;
+
+	ret = nbl_res_start(res_mgt);
+	if (ret)
+		return ret;
+
+	res_ops_tbl = nbl_res_setup_ops(dev, res_mgt);
+	if (IS_ERR(res_ops_tbl)) {
+		ret = PTR_ERR(res_ops_tbl);
+		return ret;
+	}
+	adap->intf.resource_ops_tbl = res_ops_tbl;
+	adap->core.res_mgt = res_mgt;
+	return 0;
+}
+
+void nbl_res_remove_leonis(struct nbl_adapter *adap)
+{
+}
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.h
new file mode 100644
index 000000000000..4e61a5c141e5
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_leonis/nbl_resource_leonis.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_RESOURCE_LEONIS_H_
+#define _NBL_RESOURCE_LEONIS_H_
+
+#include "../nbl_resource.h"
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
new file mode 100644
index 000000000000..1828251e8c2a
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_hw_reg.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_HW_REG_H_
+#define _NBL_HW_REG_H_
+
+#include <linux/types.h>
+
+#include "../nbl_include/nbl_product_base.h"
+#include "../nbl_include/nbl_def_channel.h"
+#include "../nbl_include/nbl_def_hw.h"
+#include "../nbl_include/nbl_def_common.h"
+#include "../nbl_core.h"
+
+#define NBL_MEMORY_BAR				0
+#define NBL_MAILBOX_BAR				2
+#define NBL_RDMA_NOTIFY_OFF			8192
+#define NBL_HW_DUMMY_REG			0x1300904
+#define NBL_HW_REG_SPACE_SIZE (32 * 1024 * 1024)
+
+struct nbl_hw_mgt {
+	struct nbl_common_info *common;
+	u8 __iomem *hw_addr;
+	u8 __iomem *mailbox_bar_hw_addr;
+	u64 notify_offset;
+	resource_size_t hw_size;
+};
+
+static inline u32 rd32(u8 __iomem *addr, u64 reg)
+{
+	return readl(addr + reg);
+}
+
+static inline void wr32(u8 __iomem *addr, u64 reg, u32 value)
+{
+	writel(value, addr + reg);
+}
+
+static inline void nbl_hw_wr32(struct nbl_hw_mgt *hw_mgt, u64 reg, u32 value)
+{
+	/* Used for emu, make sure that we won't write too frequently */
+	wr32(hw_mgt->hw_addr, reg, value);
+}
+
+static inline u32 nbl_hw_rd32(struct nbl_hw_mgt *hw_mgt, u64 reg)
+{
+	return rd32(hw_mgt->hw_addr, reg);
+}
+
+static inline void nbl_mbx_wr32(struct nbl_hw_mgt *hw_mgt, u64 reg, u32 value)
+{
+	writel(value, hw_mgt->mailbox_bar_hw_addr + reg);
+}
+
+/*
+ * Only call this when has_ctrl=true, which maps enough space
+ * (bar_len - 8192) to cover NBL_HW_DUMMY_REG (0x1300904).
+ * The flow/design guarantees this is only called in the
+ * has_ctrl path.
+ */
+static inline void nbl_flush_writes(struct nbl_hw_mgt *hw_mgt)
+{
+	nbl_hw_rd32(hw_mgt, NBL_HW_DUMMY_REG);
+}
+
+static inline u32 nbl_mbx_rd32(struct nbl_hw_mgt *hw_mgt, u64 reg)
+{
+	return readl(hw_mgt->mailbox_bar_hw_addr + reg);
+}
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
new file mode 100644
index 000000000000..5bfd0ddd1cec
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_hw/nbl_resource.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_RESOURCE_H_
+#define _NBL_RESOURCE_H_
+
+#include <linux/types.h>
+
+#include "../nbl_include/nbl_include.h"
+#include "../nbl_include/nbl_product_base.h"
+#include "../nbl_include/nbl_def_channel.h"
+#include "../nbl_include/nbl_def_hw.h"
+#include "../nbl_include/nbl_def_resource.h"
+#include "../nbl_include/nbl_def_common.h"
+#include "../nbl_core.h"
+
+struct nbl_resource_info {
+	void *reserved;  /* placeholder to be replaced in the future*/
+};
+
+struct nbl_resource_mgt {
+	struct nbl_common_info *common;
+	struct nbl_resource_info *resource_info;
+	struct nbl_channel_ops_tbl *chan_ops_tbl;
+	struct nbl_hw_ops_tbl *hw_ops_tbl;
+	struct nbl_interrupt_mgt *intr_mgt;
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
new file mode 100644
index 000000000000..ff03a53b9f5d
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_channel.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_CHANNEL_H_
+#define _NBL_DEF_CHANNEL_H_
+
+struct nbl_channel_mgt;
+struct nbl_adapter;
+enum nbl_channel_type {
+	NBL_CHAN_TYPE_MAILBOX,
+	NBL_CHAN_TYPE_MAX
+};
+
+struct nbl_channel_ops {
+};
+
+struct nbl_channel_ops_tbl {
+	struct nbl_channel_ops *ops;
+	struct nbl_channel_mgt *priv;
+};
+
+int nbl_chan_init_common(struct nbl_adapter *adapter);
+void nbl_chan_remove_common(struct nbl_adapter *adapter);
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
new file mode 100644
index 000000000000..03c19e1c8c3c
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_common.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_COMMON_H_
+#define _NBL_DEF_COMMON_H_
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include "nbl_include.h"
+
+struct nbl_common_info {
+	struct pci_dev *pdev;
+	struct device *dev;
+	u32 msg_enable;
+	u16 vsi_id;
+	u8 eth_id;
+	u8 logic_eth_id;
+	u8 eth_num;
+
+	u8 function;
+	u8 devid;
+	u8 bus;
+	u8 hw_bus;
+	u16 mgt_pf;
+
+	enum nbl_product_type product_type;
+	u8 has_ctrl;
+	u8 has_net;
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dev.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dev.h
new file mode 100644
index 000000000000..32e6cce38d39
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dev.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_DEV_H_
+#define _NBL_DEF_DEV_H_
+
+struct nbl_adapter;
+
+int nbl_dev_init(struct nbl_adapter *adapter);
+void nbl_dev_remove(struct nbl_adapter *adapter);
+int nbl_dev_start(struct nbl_adapter *adapter);
+void nbl_dev_stop(struct nbl_adapter *adapter);
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dispatch.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dispatch.h
new file mode 100644
index 000000000000..f8d40cf3e029
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_dispatch.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_DISPATCH_H_
+#define _NBL_DEF_DISPATCH_H_
+
+struct nbl_dispatch_mgt;
+struct nbl_adapter;
+enum {
+	NBL_DISP_CTRL_LVL_NEVER = 0,
+	NBL_DISP_CTRL_LVL_MGT,
+	NBL_DISP_CTRL_LVL_NET,
+	NBL_DISP_CTRL_LVL_ALWAYS,
+	NBL_DISP_CTRL_LVL_MAX,
+};
+
+struct nbl_dispatch_ops {
+	void *reserved;  /* placeholder to be replaced in the future*/
+};
+
+struct nbl_dispatch_ops_tbl {
+	struct nbl_dispatch_ops *ops;
+	struct nbl_dispatch_mgt *priv;
+};
+
+int nbl_disp_init(struct nbl_adapter *adapter);
+void nbl_disp_remove(struct nbl_adapter *adapter);
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
new file mode 100644
index 000000000000..168504b30973
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_hw.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_HW_H_
+#define _NBL_DEF_HW_H_
+
+struct nbl_hw_mgt;
+struct nbl_adapter;
+struct nbl_hw_ops {
+};
+
+struct nbl_hw_ops_tbl {
+	struct nbl_hw_ops *ops;
+	struct nbl_hw_mgt *priv;
+};
+
+int nbl_hw_init_leonis(struct nbl_adapter *adapter);
+void nbl_hw_remove_leonis(struct nbl_adapter *adapter);
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
new file mode 100644
index 000000000000..d55934af5a9a
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_def_resource.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_RESOURCE_H_
+#define _NBL_DEF_RESOURCE_H_
+
+struct nbl_resource_mgt;
+struct nbl_adapter;
+
+struct nbl_resource_ops {
+};
+
+struct nbl_resource_ops_tbl {
+	struct nbl_resource_ops *ops;
+	struct nbl_resource_mgt *priv;
+};
+
+int nbl_res_init_leonis(struct nbl_adapter *adapter);
+void nbl_res_remove_leonis(struct nbl_adapter *adapter);
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
index 1046e6517b15..65fe9a42ee2c 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_include.h
@@ -11,6 +11,11 @@
 /*  ------  Basic definitions  -------  */
 #define NBL_DRIVER_NAME					"nbl"
 
+enum nbl_product_type {
+	NBL_LEONIS_TYPE,
+	NBL_PRODUCT_MAX,
+};
+
 struct nbl_func_caps {
 	u32 has_ctrl:1;
 	u32 has_net:1;
@@ -18,4 +23,9 @@ struct nbl_func_caps {
 	u32 rsv:29;
 };
 
+struct nbl_init_param {
+	struct nbl_func_caps caps;
+	enum nbl_product_type product_type;
+};
+
 #endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_product_base.h b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_product_base.h
new file mode 100644
index 000000000000..fe4245d0ca99
--- /dev/null
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_include/nbl_product_base.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Nebula Matrix Limited.
+ */
+
+#ifndef _NBL_DEF_PRODUCT_BASE_H_
+#define _NBL_DEF_PRODUCT_BASE_H_
+
+struct nbl_adapter;
+struct nbl_product_base_ops {
+	int (*hw_init)(struct nbl_adapter *p);
+	void (*hw_remove)(struct nbl_adapter *p);
+	int (*res_init)(struct nbl_adapter *p);
+	void (*res_remove)(struct nbl_adapter *p);
+	int (*chan_init)(struct nbl_adapter *p);
+	void (*chan_remove)(struct nbl_adapter *p);
+};
+
+#endif
diff --git a/drivers/net/ethernet/nebula-matrix/nbl/nbl_main.c b/drivers/net/ethernet/nebula-matrix/nbl/nbl_main.c
index 10c3536b327b..d4cbbe9da9cc 100644
--- a/drivers/net/ethernet/nebula-matrix/nbl/nbl_main.c
+++ b/drivers/net/ethernet/nebula-matrix/nbl/nbl_main.c
@@ -6,17 +6,208 @@
 #include <linux/device.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/bits.h>
 #include "nbl_include/nbl_include.h"
+#include "nbl_include/nbl_product_base.h"
+#include "nbl_include/nbl_def_channel.h"
+#include "nbl_include/nbl_def_hw.h"
+#include "nbl_include/nbl_def_resource.h"
+#include "nbl_include/nbl_def_dispatch.h"
+#include "nbl_include/nbl_def_dev.h"
+#include "nbl_include/nbl_def_common.h"
 #include "nbl_core.h"
 
+static struct nbl_product_base_ops nbl_product_base_ops[NBL_PRODUCT_MAX] = {
+	{
+		.hw_init	= nbl_hw_init_leonis,
+		.hw_remove	= nbl_hw_remove_leonis,
+		.res_init	= nbl_res_init_leonis,
+		.res_remove	= nbl_res_remove_leonis,
+		.chan_init	= nbl_chan_init_common,
+		.chan_remove	= nbl_chan_remove_common,
+	},
+};
+
+int nbl_core_start(struct nbl_adapter *adapter)
+{
+	return nbl_dev_start(adapter);
+}
+
+void nbl_core_stop(struct nbl_adapter *adapter)
+{
+	nbl_dev_stop(adapter);
+}
+
+static struct nbl_product_base_ops *
+nbl_core_setup_product_ops(struct nbl_adapter *adapter,
+			   struct nbl_init_param *param)
+{
+	if (param->product_type >= NBL_PRODUCT_MAX) {
+		dev_err(&adapter->pdev->dev, "Unsupported product type\n");
+		return NULL;
+	}
+	adapter->product_base_ops = &nbl_product_base_ops[param->product_type];
+	return adapter->product_base_ops;
+}
+
+struct nbl_adapter *nbl_core_init(struct pci_dev *pdev,
+				  struct nbl_init_param *param)
+{
+	struct nbl_product_base_ops *product_base_ops;
+	struct nbl_common_info *common;
+	struct nbl_adapter *adapter;
+	int ret;
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return ERR_PTR(-ENOMEM);
+
+	adapter->pdev = pdev;
+	common = &adapter->common;
+
+	common->pdev = pdev;
+	common->dev = &pdev->dev;
+	common->has_ctrl = param->caps.has_ctrl;
+	common->has_net = param->caps.has_net;
+	common->function = PCI_FUNC(pdev->devfn);
+	common->devid = PCI_SLOT(pdev->devfn);
+	common->bus = pdev->bus->number;
+	common->product_type = param->product_type;
+
+	product_base_ops = nbl_core_setup_product_ops(adapter, param);
+	if (!product_base_ops)
+		return ERR_PTR(-ENOENT);
+	/*
+	 *every product's hw/chan/res layer has a great difference,
+	 *so call their own init ops
+	 */
+	ret = product_base_ops->hw_init(adapter);
+	if (ret)
+		goto hw_init_fail;
+
+	ret = product_base_ops->chan_init(adapter);
+	if (ret)
+		goto chan_init_fail;
+
+	ret = product_base_ops->res_init(adapter);
+	if (ret)
+		goto res_init_fail;
+
+	ret = nbl_disp_init(adapter);
+	if (ret)
+		goto disp_init_fail;
+
+	ret = nbl_dev_init(adapter);
+	if (ret)
+		goto dev_init_fail;
+	return adapter;
+
+dev_init_fail:
+	nbl_disp_remove(adapter);
+disp_init_fail:
+	product_base_ops->res_remove(adapter);
+res_init_fail:
+	product_base_ops->chan_remove(adapter);
+chan_init_fail:
+	product_base_ops->hw_remove(adapter);
+hw_init_fail:
+	return ERR_PTR(ret);
+}
+
+void nbl_core_remove(struct nbl_adapter *adapter)
+{
+	struct nbl_product_base_ops *product_base_ops;
+
+	product_base_ops = adapter->product_base_ops;
+	nbl_dev_remove(adapter);
+	nbl_disp_remove(adapter);
+	product_base_ops->res_remove(adapter);
+	product_base_ops->chan_remove(adapter);
+	product_base_ops->hw_remove(adapter);
+}
+
+static void nbl_get_func_param(struct pci_dev *pdev, kernel_ulong_t driver_data,
+			       struct nbl_init_param *param)
+{
+	param->caps.has_ctrl = !!(driver_data & BIT(NBL_CAP_HAS_CTRL_BIT));
+	param->caps.has_net = !!(driver_data & BIT(NBL_CAP_HAS_NET_BIT));
+	param->caps.is_nic = !!(driver_data & BIT(NBL_CAP_IS_NIC_BIT));
+
+	if (!!(driver_data & BIT(NBL_CAP_IS_LEONIS_BIT)))
+		param->product_type = NBL_LEONIS_TYPE;
+	else
+		param->product_type = NBL_PRODUCT_MAX;
+
+	/*
+	 * Leonis only PF0 has ctrl capability, but PF0's pcie device_id
+	 * is same with other PF.So handle it special.
+	 */
+	if (param->product_type == NBL_LEONIS_TYPE &&
+	    (PCI_FUNC(pdev->devfn) == 0) && !pdev->is_virtfn)
+		param->caps.has_ctrl = 1;
+}
+
 static int nbl_probe(struct pci_dev *pdev,
 		     const struct pci_device_id *id)
 {
+	struct nbl_init_param param = { { 0 } };
+	struct device *dev = &pdev->dev;
+	struct nbl_adapter *adapter;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to enable PCI dev, err=%d\n", err);
+		return err;
+	}
+
+	nbl_get_func_param(pdev, id->driver_data, &param);
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_dbg(dev, "Configure DMA 64 bit mask failed, err = %d\n",
+			err);
+		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(dev,
+				"Configure DMA 32 bit mask failed, err = %d\n",
+				err);
+			goto configure_dma_err;
+		}
+	}
+	pci_set_master(pdev);
+
+	adapter = nbl_core_init(pdev, &param);
+	if (IS_ERR(adapter)) {
+		dev_err(dev, "Nbl adapter init fail: %pe\n", adapter);
+		err = PTR_ERR(adapter);
+		goto adapter_init_err;
+	}
+	pci_set_drvdata(pdev, adapter);
+	err = nbl_core_start(adapter);
+	if (err)
+		goto core_start_err;
+
 	return 0;
+core_start_err:
+	pci_set_drvdata(pdev, NULL);
+	nbl_core_remove(adapter);
+adapter_init_err:
+	pci_clear_master(pdev);
+configure_dma_err:
+	pci_disable_device(pdev);
+	return err;
 }
 
 static void nbl_remove(struct pci_dev *pdev)
 {
+	struct nbl_adapter *adapter = pci_get_drvdata(pdev);
+
+	nbl_core_stop(adapter);
+	nbl_core_remove(adapter);
+
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
 }
 
 /*
-- 
2.47.3


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox