public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform
@ 2026-05-05 16:09 Lizhi Hou
  2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: Lizhi Hou, linux-kernel, max.zhen, sonal.santan

This series extends amdxdna with initial support for AIE4 Virtual Functions
and the user-visible execution path.

Move message buffer allocation and device metadata collection to aie.c,
allowing both AIE2 and AIE4 devices to reuse common code.

Testing:
  PF + VF: Probe AIE4 PF, enable SR-IOV, bind VF devices, and run
  end-to-end workloads from a userspace application.

  Regression: Validate existing AIE2 regression tests.

David Zhang (5):
  accel/amdxdna: Add initial support for AIE4 VF
  accel/amdxdna: Init AIE4 device partition
  accel/amdxdna: Add AIE4 VF hardware context create and destroy
  accel/amdxdna: Add command doorbell and wait support
  accel/amdxdna: Add AIE4 metadata query support

Nishad Saraf (1):
  accel/amdxdna: Add AIE4 work buffer initialization

 drivers/accel/amdxdna/Makefile          |   1 +
 drivers/accel/amdxdna/aie.c             |  79 ++++++
 drivers/accel/amdxdna/aie.h             |  31 +++
 drivers/accel/amdxdna/aie2_ctx.c        |   4 +-
 drivers/accel/amdxdna/aie2_error.c      |   7 +-
 drivers/accel/amdxdna/aie2_message.c    |  51 +---
 drivers/accel/amdxdna/aie2_pci.c        |  54 +---
 drivers/accel/amdxdna/aie2_pci.h        |  28 --
 drivers/accel/amdxdna/aie4_ctx.c        | 333 ++++++++++++++++++++++++
 drivers/accel/amdxdna/aie4_host_queue.h |  24 ++
 drivers/accel/amdxdna/aie4_message.c    |  55 ++++
 drivers/accel/amdxdna/aie4_msg_priv.h   |  98 +++++++
 drivers/accel/amdxdna/aie4_pci.c        | 310 +++++++++++++++++-----
 drivers/accel/amdxdna/aie4_pci.h        |  37 ++-
 drivers/accel/amdxdna/amdxdna_ctx.c     |  40 +++
 drivers/accel/amdxdna/amdxdna_ctx.h     |   7 +-
 drivers/accel/amdxdna/amdxdna_gem.c     |   5 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  25 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
 drivers/accel/amdxdna/npu3_regs.c       |  25 +-
 include/uapi/drm/amdxdna_accel.h        |  24 +-
 21 files changed, 1042 insertions(+), 200 deletions(-)
 create mode 100644 drivers/accel/amdxdna/aie4_ctx.c
 create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h

-- 
2.34.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 19:37   ` Mario Limonciello
  2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue, Lizhi Hou

From: David Zhang <yidong.zhang@amd.com>

Add basic device initialization support for AIE4 Virtual Functions (PCI
device IDs 0x17F3 and 0x1B0C).

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie4_pci.c        | 160 +++++++++++++-----------
 drivers/accel/amdxdna/aie4_pci.h        |   3 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c |   4 +
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   1 +
 drivers/accel/amdxdna/npu3_regs.c       |  20 ++-
 include/uapi/drm/amdxdna_accel.h        |   1 +
 6 files changed, 113 insertions(+), 76 deletions(-)

diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 87f80f804f91..a967e2db7ebd 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -196,8 +196,9 @@ static int aie4_mailbox_start(struct amdxdna_dev *xdna,
 	return ret;
 }
 
-static int aie4_mailbox_init(struct amdxdna_dev *xdna)
+static int aie4_mailbox_init(struct amdxdna_dev_hdl *ndev)
 {
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
 	struct mailbox_info mbox_info;
 	int ret;
 
@@ -208,13 +209,13 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
 	return aie4_mailbox_start(xdna, &mbox_info);
 }
 
-static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
+static void aie4_fw_stop(struct amdxdna_dev_hdl *ndev)
 {
 	aie_psp_stop(ndev->aie.psp_hdl);
 	aie_smu_fini(ndev->aie.smu_hdl);
 }
 
-static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
+static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
 {
 	int ret;
 
@@ -233,49 +234,49 @@ static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
 	return ret;
 }
 
-static int aie4_hw_start(struct amdxdna_dev *xdna)
+static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
 {
-	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
 	int ret;
 
-	ret = aie4_fw_load(ndev);
+	ret = aie4_fw_start(ndev);
 	if (ret)
 		return ret;
 
-	ret = aie4_mailbox_init(xdna);
+	ret = aie4_mailbox_init(ndev);
 	if (ret)
-		goto fw_unload;
+		goto stop_fw;
 
 	return 0;
 
-fw_unload:
-	aie4_fw_unload(ndev);
+stop_fw:
+	aie4_fw_stop(ndev);
 
 	return ret;
 }
 
-static void aie4_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
+static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
 {
-	int ret;
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
 
-	/* No paired resume needed, fw is stateless */
-	ret = aie4_suspend_fw(ndev);
-	if (ret)
-		XDNA_ERR(ndev->aie.xdna, "suspend_fw failed, ret %d", ret);
-	else
-		XDNA_DBG(ndev->aie.xdna, "npu firmware suspended");
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	aie4_suspend_fw(ndev);
+	aie4_mailbox_fini(ndev);
+	aie4_fw_stop(ndev);
 }
 
-static void aie4_hw_stop(struct amdxdna_dev *xdna)
+static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
 {
-	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+	return aie4_mailbox_init(ndev);
+}
+
+static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
+{
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
 
-	aie4_mgmt_fw_fini(ndev);
 	aie4_mailbox_fini(ndev);
-
-	aie4_fw_unload(ndev);
 }
 
 static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
@@ -365,15 +366,41 @@ static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
 	return 0;
 }
 
-static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
+static int aie4_load_fw(struct amdxdna_dev_hdl *ndev,
+			void __iomem *tbl[PCI_NUM_RESOURCES])
+{
+	const struct firmware *npufw, *certfw;
+	int ret;
+
+	if (!ndev->priv->npufw_path && !ndev->priv->certfw_path)
+		return 0;
+
+	ret = aie4_request_firmware(ndev, &npufw, &certfw);
+	if (ret)
+		return ret;
+
+	ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
+	aie4_release_firmware(ndev, npufw, certfw);
+
+	return ret;
+}
+
+static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
 {
-	struct amdxdna_dev *xdna = ndev->aie.xdna;
 	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	struct amdxdna_dev_hdl *ndev;
 	void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
-	const struct firmware *npufw, *certfw;
 	unsigned long bars = 0;
 	int ret, i;
 
+	ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		return -ENOMEM;
+
+	ndev->priv = xdna->dev_info->dev_priv;
+	ndev->aie.xdna = xdna;
+	xdna->dev_handle = ndev;
+
 	/* Enable managed PCI device */
 	ret = pcim_enable_device(pdev);
 	if (ret) {
@@ -409,75 +436,60 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
 
 	pci_set_master(pdev);
 
-	ret = aie4_request_firmware(ndev, &npufw, &certfw);
-	if (ret)
-		goto clear_master;
-
-	ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
-	aie4_release_firmware(ndev, npufw, certfw);
+	ret = aie4_load_fw(ndev, tbl);
 	if (ret)
-		goto clear_master;
+		return ret;
 
 	ret = aie4_irq_init(xdna);
 	if (ret)
-		goto clear_master;
+		return ret;
 
-	ret = aie4_hw_start(xdna);
-	if (ret)
-		goto clear_master;
+	amdxdna_vbnv_init(xdna);
+	XDNA_DBG(xdna, "init finished");
 
 	return 0;
-
-clear_master:
-	pci_clear_master(pdev);
-
-	return ret;
 }
 
-static void aie4_pcidev_fini(struct amdxdna_dev_hdl *ndev)
+static int aie4_pf_init(struct amdxdna_dev *xdna)
 {
-	struct amdxdna_dev *xdna = ndev->aie.xdna;
-	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
-
-	aie4_hw_stop(xdna);
-
-	pci_clear_master(pdev);
-}
+	int ret;
 
-static void aie4_fini(struct amdxdna_dev *xdna)
-{
-	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+	ret = aie4m_pcidev_init(xdna);
+	if (ret)
+		return ret;
 
-	aie4_sriov_stop(ndev);
-	aie4_pcidev_fini(ndev);
+	return aie4_pf_hw_start(xdna->dev_handle);
 }
 
-static int aie4_init(struct amdxdna_dev *xdna)
+static int aie4_vf_init(struct amdxdna_dev *xdna)
 {
-	struct amdxdna_dev_hdl *ndev;
 	int ret;
 
-	ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
-	if (!ndev)
-		return -ENOMEM;
+	ret = aie4m_pcidev_init(xdna);
+	if (ret)
+		return ret;
 
-	ndev->priv = xdna->dev_info->dev_priv;
-	ndev->aie.xdna = xdna;
-	xdna->dev_handle = ndev;
+	return aie4_vf_hw_start(xdna->dev_handle);
+}
 
-	ret = aie4_pcidev_init(ndev);
-	if (ret) {
-		XDNA_ERR(xdna, "Setup PCI device failed, ret %d", ret);
-		return ret;
-	}
+static void aie4_pf_fini(struct amdxdna_dev *xdna)
+{
+	aie4_sriov_stop(xdna->dev_handle);
+	aie4_pf_hw_stop(xdna->dev_handle);
+}
 
-	amdxdna_vbnv_init(xdna);
-	XDNA_DBG(xdna, "aie4 init finished");
-	return 0;
+static void aie4_vf_fini(struct amdxdna_dev *xdna)
+{
+	aie4_vf_hw_stop(xdna->dev_handle);
 }
 
-const struct amdxdna_dev_ops aie4_ops = {
-	.init			= aie4_init,
-	.fini			= aie4_fini,
+const struct amdxdna_dev_ops aie4_pf_ops = {
+	.init			= aie4_pf_init,
+	.fini			= aie4_pf_fini,
 	.sriov_configure        = aie4_sriov_configure,
 };
+
+const struct amdxdna_dev_ops aie4_vf_ops = {
+	.init			= aie4_vf_init,
+	.fini			= aie4_vf_fini,
+};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index aa1495c3370b..cbf3424a4341 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -48,6 +48,7 @@ static inline int aie4_sriov_stop(struct amdxdna_dev_hdl *ndev)
 }
 #endif
 
-extern const struct amdxdna_dev_ops aie4_ops;
+extern const struct amdxdna_dev_ops aie4_pf_ops;
+extern const struct amdxdna_dev_ops aie4_vf_ops;
 
 #endif /* _AIE4_PCI_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 1b08a08343cf..39ad081ac082 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -53,7 +53,9 @@ static const struct pci_device_id pci_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0B) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0C) },
 	{0}
 };
 
@@ -65,7 +67,9 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
 	{ 0x17f0, 0x11, &dev_npu5_info },
 	{ 0x17f0, 0x20, &dev_npu6_info },
 	{ 0x17f2, 0x10, &dev_npu3_pf_info },
+	{ 0x17f3, 0x10, &dev_npu3_vf_info },
 	{ 0x1B0B, 0x10, &dev_npu3_pf_info },
+	{ 0x1B0C, 0x10, &dev_npu3_vf_info },
 	{0}
 };
 
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index b1548cf16f59..caed11c09e55 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -167,6 +167,7 @@ struct amdxdna_client {
 /* Add device info below */
 extern const struct amdxdna_dev_info dev_npu1_info;
 extern const struct amdxdna_dev_info dev_npu3_pf_info;
+extern const struct amdxdna_dev_info dev_npu3_vf_info;
 extern const struct amdxdna_dev_info dev_npu4_info;
 extern const struct amdxdna_dev_info dev_npu5_info;
 extern const struct amdxdna_dev_info dev_npu6_info;
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
index acece0faddf2..6d5da779232b 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -64,6 +64,14 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
 	},
 };
 
+static const struct amdxdna_dev_priv npu3_dev_vf_priv = {
+	/* vf device does not load firmware */
+	.mbox_bar		= NPU3_MBOX_BAR,
+	.mbox_rbuf_bar		= NPU3_MBOX_BUFFER_BAR,
+	.mbox_info_off		= NPU3_MBOX_INFO_OFF,
+	/* vf device does not have smu and psp */
+};
+
 const struct amdxdna_dev_info dev_npu3_pf_info = {
 	.mbox_bar		= NPU3_MBOX_BAR,
 	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
@@ -73,5 +81,15 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
 	.device_type		= AMDXDNA_DEV_TYPE_PF,
 	.dev_priv		= &npu3_dev_priv,
 	.fw_feature_tbl		= npu3_fw_feature_table,
-	.ops			= &aie4_ops,
+	.ops			= &aie4_pf_ops,
+};
+
+const struct amdxdna_dev_info dev_npu3_vf_info = {
+	.mbox_bar		= NPU3_MBOX_BAR,
+	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
+	.default_vbnv		= "RyzenAI-npu3-vf",
+	.device_type		= AMDXDNA_DEV_TYPE_UMQ,
+	.dev_priv		= &npu3_dev_vf_priv,
+	.fw_feature_tbl		= npu3_fw_feature_table,
+	.ops			= &aie4_vf_ops,
 };
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 0b11e8e3ea5d..34212feee15c 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -30,6 +30,7 @@ extern "C" {
 enum amdxdna_device_type {
 	AMDXDNA_DEV_TYPE_UNKNOWN = -1,
 	AMDXDNA_DEV_TYPE_KMQ = 0,
+	AMDXDNA_DEV_TYPE_UMQ = 1,
 	AMDXDNA_DEV_TYPE_PF = 2,
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
  2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 19:53   ` Mario Limonciello
  2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue, Lizhi Hou

From: David Zhang <yidong.zhang@amd.com>

Send partition creation command to firmware during VF initialization.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie4_msg_priv.h | 21 +++++++++++
 drivers/accel/amdxdna/aie4_pci.c      | 52 ++++++++++++++++++++++++++-
 drivers/accel/amdxdna/aie4_pci.h      |  1 +
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 88463cc3a98a..cada53257921 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -13,6 +13,9 @@ enum aie4_msg_opcode {
 
 	AIE4_MSG_OP_CREATE_VFS                       = 0x20001,
 	AIE4_MSG_OP_DESTROY_VFS                      = 0x20002,
+
+	AIE4_MSG_OP_CREATE_PARTITION                 = 0x30001,
+	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
 };
 
 enum aie4_msg_status {
@@ -46,4 +49,22 @@ struct aie4_msg_destroy_vfs_resp {
 	enum aie4_msg_status status;
 } __packed;
 
+struct aie4_msg_create_partition_req {
+	__u32 partition_col_start;
+	__u32 partition_col_count;
+} __packed;
+
+struct aie4_msg_create_partition_resp {
+	enum aie4_msg_status status;
+	__u32 partition_id;
+} __packed;
+
+struct aie4_msg_destroy_partition_req {
+	__u32 partition_id;
+} __packed;
+
+struct aie4_msg_destroy_partition_resp {
+	enum aie4_msg_status status;
+} __packed;
+
 #endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index a967e2db7ebd..13f5d45e388d 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -9,11 +9,16 @@
 #include <linux/firmware.h>
 #include <linux/sizes.h>
 
+#include "aie.h"
+#include "aie4_msg_priv.h"
 #include "aie4_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
 #include "amdxdna_pci_drv.h"
 
 #define NO_IOHUB		0
 #define PSP_NOTIFY_INTR		0xD007BE11
+#define AIE4_TOTAL_COLUMN	3
 
 /*
  * The management mailbox channel is allocated by firmware.
@@ -234,6 +239,36 @@ static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
 	return ret;
 }
 
+static int aie4_partition_init(struct amdxdna_dev_hdl *ndev)
+{
+	DECLARE_AIE_MSG(aie4_msg_create_partition, AIE4_MSG_OP_CREATE_PARTITION);
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	int ret;
+
+	req.partition_col_start = 0;
+	req.partition_col_count = AIE4_TOTAL_COLUMN;
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret) {
+		XDNA_ERR(xdna, "partition init failed: %d", ret);
+		return ret;
+	}
+
+	ndev->partition_id = resp.partition_id;
+	return 0;
+}
+
+static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
+{
+	DECLARE_AIE_MSG(aie4_msg_destroy_partition, AIE4_MSG_OP_DESTROY_PARTITION);
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	int ret;
+
+	req.partition_id = ndev->partition_id;
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret)
+		XDNA_ERR(xdna, "partition fini failed: %d", ret);
+}
+
 static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
 {
 	int ret;
@@ -267,7 +302,21 @@ static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
 
 static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
 {
-	return aie4_mailbox_init(ndev);
+	int ret;
+
+	ret = aie4_mailbox_init(ndev);
+	if (ret)
+		return ret;
+
+	ret = aie4_partition_init(ndev);
+	if (ret)
+		goto mailbox_fini;
+
+	return 0;
+
+mailbox_fini:
+	aie4_mailbox_fini(ndev);
+	return ret;
 }
 
 static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
@@ -276,6 +325,7 @@ static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
 
+	aie4_partition_fini(ndev);
 	aie4_mailbox_fini(ndev);
 }
 
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index cbf3424a4341..620fb5bd23e4 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -31,6 +31,7 @@ struct amdxdna_dev_hdl {
 	void			__iomem *rbuf_base;
 
 	struct mailbox			*mbox;
+	u32				partition_id;
 };
 
 /* aie4_message.c */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
  2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
  2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 20:28   ` Mario Limonciello
  2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue, Lizhi Hou

From: David Zhang <yidong.zhang@amd.com>

Implement hardware context creation and destruction for AIE4 VF devices.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/Makefile          |   1 +
 drivers/accel/amdxdna/aie4_ctx.c        | 258 ++++++++++++++++++++++++
 drivers/accel/amdxdna/aie4_host_queue.h |  22 ++
 drivers/accel/amdxdna/aie4_msg_priv.h   |  29 +++
 drivers/accel/amdxdna/aie4_pci.c        |   5 +
 drivers/accel/amdxdna/aie4_pci.h        |  24 +++
 drivers/accel/amdxdna/amdxdna_ctx.c     |   6 +
 drivers/accel/amdxdna/amdxdna_ctx.h     |   3 +
 include/uapi/drm/amdxdna_accel.h        |   1 +
 9 files changed, 349 insertions(+)
 create mode 100644 drivers/accel/amdxdna/aie4_ctx.c
 create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index d7720c8c8a98..05cce0a38692 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -10,6 +10,7 @@ amdxdna-y := \
 	aie2_pci.o \
 	aie2_pm.o \
 	aie2_solver.o \
+	aie4_ctx.o \
 	aie4_message.o \
 	aie4_pci.o \
 	amdxdna_cbuf.o \
diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
new file mode 100644
index 000000000000..84ac706d0ffb
--- /dev/null
+++ b/drivers/accel/amdxdna/aie4_ctx.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "aie.h"
+#include "aie4_host_queue.h"
+#include "aie4_msg_priv.h"
+#include "aie4_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+static irqreturn_t cert_comp_isr(int irq, void *p)
+{
+	struct cert_comp *cert_comp = p;
+
+	wake_up_all(&cert_comp->waitq);
+	return IRQ_HANDLED;
+}
+
+static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx)
+{
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	struct cert_comp *cert_comp;
+	int ret;
+
+	guard(mutex)(&ndev->cert_comp_lock);
+
+	cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx);
+	if (cert_comp) {
+		kref_get(&cert_comp->kref);
+		return cert_comp;
+	}
+
+	cert_comp = kzalloc_obj(*cert_comp);
+	if (!cert_comp)
+		return NULL;
+
+	cert_comp->ndev = ndev;
+	cert_comp->msix_idx = msix_idx;
+	init_waitqueue_head(&cert_comp->waitq);
+	kref_init(&cert_comp->kref);
+
+	ret = pci_irq_vector(pdev, cert_comp->msix_idx);
+	if (ret < 0) {
+		XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret);
+		goto free_cert_comp;
+	}
+	cert_comp->irq = ret;
+
+	ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp);
+	if (ret) {
+		XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret);
+		goto free_cert_comp;
+	}
+
+	ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL));
+	if (ret) {
+		XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret);
+		goto free_irq;
+	}
+
+	return cert_comp;
+
+free_irq:
+	free_irq(cert_comp->irq, cert_comp);
+free_cert_comp:
+	kfree(cert_comp);
+	return NULL;
+}
+
+static void cert_comp_release(struct kref *kref)
+{
+	struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref);
+	struct amdxdna_dev_hdl *ndev = cert_comp->ndev;
+
+	drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock));
+
+	xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx);
+	free_irq(cert_comp->irq, cert_comp);
+	kfree(cert_comp);
+}
+
+static void aie4_put_cert_comp(struct cert_comp *cert_comp)
+{
+	struct amdxdna_dev_hdl *ndev;
+
+	ndev = cert_comp->ndev;
+	guard(mutex)(&ndev->cert_comp_lock);
+	kref_put(&cert_comp->kref, cert_comp_release);
+}
+
+static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id)
+{
+	DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT);
+
+	req.hw_context_id = hw_context_id;
+	return aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+}
+
+static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx)
+{
+	DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT);
+	struct amdxdna_client *client = hwctx->client;
+	struct amdxdna_hwctx_priv *priv = hwctx->priv;
+	struct amdxdna_dev *xdna = hwctx->client->xdna;
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+	int ret;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	if (!ndev->partition_id || !hwctx->num_tiles) {
+		XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d",
+			 ndev->partition_id, hwctx->num_tiles);
+		return -EINVAL;
+	}
+
+	req.partition_id = ndev->partition_id;
+	req.request_num_tiles = hwctx->num_tiles;
+	req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) |
+		FIELD_PREP(AIE4_MSG_PASID_VLD, 1);
+	req.priority_band = hwctx->qos.priority;
+
+	req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+	req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+
+	XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]",
+		 req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low);
+
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret) {
+		XDNA_ERR(xdna, "create ctx failed: %d", ret);
+		return ret;
+	}
+
+	XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d",
+		 resp.job_complete_msix_idx,
+		 resp.hw_context_id,
+		 resp.doorbell_offset);
+
+	/* setup interrupt completion per msix index */
+	priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx);
+	if (!priv->cert_comp) {
+		aie4_msg_destroy_context(ndev, resp.hw_context_id);
+		return -EINVAL;
+	}
+
+	priv->hw_ctx_id = resp.hw_context_id;
+	hwctx->doorbell_offset = resp.doorbell_offset;
+
+	return 0;
+}
+
+static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_client *client = hwctx->client;
+	struct amdxdna_hwctx_priv *priv = hwctx->priv;
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	aie4_msg_destroy_context(ndev, priv->hw_ctx_id);
+	aie4_put_cert_comp(priv->cert_comp);
+}
+
+static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx)
+{
+	if (hwctx->priv && hwctx->priv->umq_bo)
+		amdxdna_gem_put_obj(hwctx->priv->umq_bo);
+}
+
+static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_hwctx_priv *priv = hwctx->priv;
+	struct amdxdna_dev *xdna = hwctx->client->xdna;
+	struct amdxdna_gem_obj *umq_bo;
+	struct host_queue_header *qhdr;
+	int ret;
+
+	umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE);
+	if (!umq_bo) {
+		XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl);
+		return -ENOENT;
+	}
+	if (umq_bo->mem.size < sizeof(*qhdr)) {
+		XDNA_ERR(xdna, "umq_bo size is too small");
+		ret = -EINVAL;
+		goto put_umq_bo;
+	}
+
+	/* get kva address for host queue read index and write index */
+	qhdr = amdxdna_gem_vmap(umq_bo);
+	if (!qhdr) {
+		ret = -ENOMEM;
+		goto put_umq_bo;
+	}
+
+	priv->umq_bo = umq_bo;
+	priv->umq_read_index = &qhdr->read_index;
+	priv->umq_write_index = &qhdr->write_index;
+
+	return 0;
+
+put_umq_bo:
+	amdxdna_gem_put_obj(umq_bo);
+	return ret;
+}
+
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_client *client = hwctx->client;
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_hwctx_priv *priv;
+	int ret;
+
+	priv = kzalloc_obj(*priv);
+	if (!priv)
+		return -ENOMEM;
+	hwctx->priv = priv;
+
+	ret = aie4_hwctx_umq_init(hwctx);
+	if (ret)
+		goto free_priv;
+
+	ret = aie4_hwctx_create(hwctx);
+	if (ret)
+		goto umq_fini;
+
+	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+	return 0;
+
+umq_fini:
+	aie4_hwctx_umq_fini(hwctx);
+free_priv:
+	kfree(priv);
+	hwctx->priv = NULL;
+	return ret;
+}
+
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+	aie4_hwctx_destroy(hwctx);
+	aie4_hwctx_umq_fini(hwctx);
+	kfree(hwctx->priv);
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
new file mode 100644
index 000000000000..eb6a38dfb53e
--- /dev/null
+++ b/drivers/accel/amdxdna/aie4_host_queue.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE4_HOST_QUEUE_H_
+#define _AIE4_HOST_QUEUE_H_
+
+#include <linux/types.h>
+
+struct host_queue_header {
+	__u64 read_index;
+	struct {
+		__u16 major;
+		__u16 minor;
+	} version;
+	__u32 capacity; /* Queue capacity, must be power of two. */
+	__u64 write_index;
+	__u64 data_address; /* The xdna dev addr for payload. */
+};
+
+#endif /* _AIE4_HOST_QUEUE_H_ */
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index cada53257921..7faa01ca3436 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -16,6 +16,8 @@ enum aie4_msg_opcode {
 
 	AIE4_MSG_OP_CREATE_PARTITION                 = 0x30001,
 	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
+	AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
+	AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
 };
 
 enum aie4_msg_status {
@@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp {
 	enum aie4_msg_status status;
 } __packed;
 
+struct aie4_msg_create_hw_context_req {
+	__u32 partition_id;
+	__u32 request_num_tiles;
+	__u32 hsa_addr_high;
+	__u32 hsa_addr_low;
+#define AIE4_MSG_PASID GENMASK(19, 0)
+#define AIE4_MSG_PASID_VLD GENMASK(31, 31)
+	__u32 pasid;
+	__u32 priority_band;
+} __packed;
+
+struct aie4_msg_create_hw_context_resp {
+	enum aie4_msg_status status;
+	__u32 hw_context_id;
+	__u32 doorbell_offset;
+	__u32 job_complete_msix_idx;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_req {
+	__u32 hw_context_id;
+	__u32 resvd1;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_resp {
+	enum aie4_msg_status status;
+} __packed;
+
 #endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 13f5d45e388d..3be9066b7178 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
 	ndev->aie.xdna = xdna;
 	xdna->dev_handle = ndev;
 
+	xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC);
+	mutex_init(&ndev->cert_comp_lock);
+
 	/* Enable managed PCI device */
 	ret = pcim_enable_device(pdev);
 	if (ret) {
@@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = {
 const struct amdxdna_dev_ops aie4_vf_ops = {
 	.init			= aie4_vf_init,
 	.fini			= aie4_vf_fini,
+	.hwctx_init		= aie4_hwctx_init,
+	.hwctx_fini		= aie4_hwctx_fini,
 };
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 620fb5bd23e4..6103007e6d2f 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -13,6 +13,23 @@
 #include "aie.h"
 #include "amdxdna_mailbox.h"
 
+struct cert_comp {
+	struct amdxdna_dev_hdl          *ndev;
+	u32                             msix_idx;
+	int                             irq;
+	struct kref                     kref;
+	wait_queue_head_t               waitq;
+};
+
+struct amdxdna_hwctx_priv {
+	struct amdxdna_gem_obj          *umq_bo;
+	u64                             *umq_read_index;
+	u64                             *umq_write_index;
+
+	struct cert_comp                *cert_comp;
+	u32                             hw_ctx_id;
+};
+
 struct amdxdna_dev_priv {
 	const char              *npufw_path;
 	const char              *certfw_path;
@@ -32,11 +49,18 @@ struct amdxdna_dev_hdl {
 
 	struct mailbox			*mbox;
 	u32				partition_id;
+
+	struct xarray                   cert_comp_xa; /* device level indexed by msix id */
+	struct mutex                    cert_comp_lock; /* protects cert_comp operations*/
 };
 
 /* aie4_message.c */
 int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
 
+/* aie4_ctx.c */
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+
 /* aie4_sriov.c */
 #if IS_ENABLED(CONFIG_PCI_IOV)
 int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 2c2c21992c87..b5ad60d4b734 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 	if (args->ext || args->ext_flags)
 		return -EINVAL;
 
+	if (!xdna->dev_info->ops->hwctx_init)
+		return -EOPNOTSUPP;
+
 	hwctx = kzalloc_obj(*hwctx);
 	if (!hwctx)
 		return -ENOMEM;
@@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 	hwctx->client = client;
 	hwctx->fw_ctx_id = -1;
 	hwctx->num_tiles = args->num_tiles;
+	hwctx->umq_bo_hdl = args->umq_bo;
+	hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET;
 	hwctx->mem_size = args->mem_size;
 	hwctx->max_opc = args->max_opc;
 
@@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 
 	args->handle = hwctx->id;
 	args->syncobj_handle = hwctx->syncobj_hdl;
+	args->umq_doorbell = hwctx->doorbell_offset;
 
 	atomic64_set(&hwctx->job_submit_cnt, 0);
 	atomic64_set(&hwctx->job_free_cnt, 0);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 355798687376..c5622718b4d5 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv;
 
 enum ert_cmd_opcode {
 	ERT_START_CU = 0,
+	ERT_START_DPU = 18,
 	ERT_CMD_CHAIN = 19,
 	ERT_START_NPU = 20,
 	ERT_START_NPU_PREEMPT = 21,
@@ -105,6 +106,8 @@ struct amdxdna_hwctx {
 	u32				*col_list;
 	u32				start_col;
 	u32				num_col;
+	u32				umq_bo_hdl;
+	u32				doorbell_offset;
 	u32				num_unused_col;
 
 	struct amdxdna_qos_info		     qos;
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 34212feee15c..ad9b33dd7b13 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -18,6 +18,7 @@ extern "C" {
 #define AMDXDNA_INVALID_CTX_HANDLE	0
 #define AMDXDNA_INVALID_BO_HANDLE	0
 #define AMDXDNA_INVALID_FENCE_HANDLE	0
+#define AMDXDNA_INVALID_DOORBELL_OFFSET	(~0U)
 
 /*
  * Define hardware context priority
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
                   ` (2 preceding siblings ...)
  2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 20:31   ` Mario Limonciello
  2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
  2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue, Lizhi Hou

From: David Zhang <yidong.zhang@amd.com>

Expose the command doorbell register to userspace on a per-hardware
context basis, enabling applications to notify the firmware of pending
commands via doorbell writes.

Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
completion of individual commands.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie4_ctx.c        | 75 +++++++++++++++++++++++++
 drivers/accel/amdxdna/aie4_host_queue.h |  2 +
 drivers/accel/amdxdna/aie4_pci.c        | 34 +++++++++++
 drivers/accel/amdxdna/aie4_pci.h        |  3 +
 drivers/accel/amdxdna/amdxdna_ctx.c     | 34 +++++++++++
 drivers/accel/amdxdna/amdxdna_ctx.h     |  4 +-
 drivers/accel/amdxdna/amdxdna_gem.c     |  5 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |  3 +
 drivers/accel/amdxdna/npu3_regs.c       |  5 ++
 include/uapi/drm/amdxdna_accel.h        | 22 +++++++-
 11 files changed, 198 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
index 84ac706d0ffb..8408b0d2696f 100644
--- a/drivers/accel/amdxdna/aie4_ctx.c
+++ b/drivers/accel/amdxdna/aie4_ctx.c
@@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
 	aie4_hwctx_umq_fini(hwctx);
 	kfree(hwctx->priv);
 }
+
+static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
+{
+	return (write >= read) && ((write - read) <= capacity);
+}
+
+static u64 get_read_index(struct amdxdna_hwctx *hwctx)
+{
+	u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
+	u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
+	struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+	/*
+	 * CERT cannot update read index as uint64 atomically. Driver may read
+	 * half-updated read index when it has bits in high 32bit. In case read
+	 * index is not valid, wait for some time and retry once. It should
+	 * allow CERT to complete the read index update.
+	 */
+	if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+		XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
+		usleep_range(100, 200);
+		ri = READ_ONCE(*hwctx->priv->umq_read_index);
+		if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+			XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
+			ri = 0;
+		}
+	}
+
+	return ri;
+}
+
+static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+	u64 read_idx = get_read_index(hwctx);
+
+	return read_idx > seq;
+}
+
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
+{
+	unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
+	struct amdxdna_hwctx_priv *priv = hwctx->priv;
+	struct cert_comp *cert_comp = priv->cert_comp;
+	long ret;
+
+	if (timeout)
+		wait_jifs = msecs_to_jiffies(timeout);
+
+	ret = wait_event_interruptible_timeout(cert_comp->waitq,
+					       (check_cmd_done(hwctx, seq)),
+					       wait_jifs);
+
+	if (!ret)
+		ret = -ETIME;
+
+	return ret <= 0 ? ret : 0;
+}
+
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
+{
+	struct amdxdna_hwctx *hwctx;
+	unsigned long hwctx_id;
+	int idx;
+
+	idx = srcu_read_lock(&client->hwctx_srcu);
+	amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+		if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
+			srcu_read_unlock(&client->hwctx_srcu, idx);
+			return 1;
+		}
+	}
+	srcu_read_unlock(&client->hwctx_srcu, idx);
+
+	return 0;
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
index eb6a38dfb53e..1b33eda3f727 100644
--- a/drivers/accel/amdxdna/aie4_host_queue.h
+++ b/drivers/accel/amdxdna/aie4_host_queue.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define CTX_MAX_CMDS                    32
+
 struct host_queue_header {
 	__u64 read_index;
 	struct {
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 3be9066b7178..9ff34ce57fcb 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
 	return 0;
 }
 
+static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
+	phys_addr_t res_start;
+	unsigned long pfn;
+	int ret;
+
+	if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
+		XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff);
+		return -EINVAL;
+	}
+
+	if (vma_pages(vma) != 1) {
+		XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
+		return -EINVAL;
+	}
+
+	res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
+	pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
+	ret = io_remap_pfn_range(vma, vma->vm_start,
+				 pfn,
+				 PAGE_SIZE,
+				 vma->vm_page_prot);
+
+	XDNA_DBG(xdna, "doorbell ret %d", ret);
+	return ret;
+}
+
 static int aie4_pf_init(struct amdxdna_dev *xdna)
 {
 	int ret;
@@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
 	.fini			= aie4_vf_fini,
 	.hwctx_init		= aie4_hwctx_init,
 	.hwctx_fini		= aie4_hwctx_fini,
+	.mmap			= aie4_doorbell_mmap,
+	.cmd_wait		= aie4_cmd_wait,
 };
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 6103007e6d2f..b69489acd53d 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
 	u32			mbox_bar;
 	u32			mbox_rbuf_bar;
 	u64			mbox_info_off;
+	u32			doorbell_off;
 
 	struct aie_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
 	struct aie_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
@@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
 /* aie4_ctx.c */
 int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
 void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
 
 /* aie4_sriov.c */
 #if IS_ENABLED(CONFIG_PCI_IOV)
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index b5ad60d4b734..b79229a63af3 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
 	XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
 	return -EINVAL;
 }
+
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct amdxdna_client *client = filp->driver_priv;
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	struct amdxdna_drm_wait_cmd *args = data;
+	struct amdxdna_hwctx *hwctx;
+	int ret, idx;
+
+	XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
+		 client->pid, args->hwctx, args->timeout, args->seq);
+
+	if (!xdna->dev_info->ops->cmd_wait)
+		return -EOPNOTSUPP;
+
+	idx = srcu_read_lock(&client->hwctx_srcu);
+	hwctx = xa_load(&client->hwctx_xa, args->hwctx);
+	if (!hwctx) {
+		XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
+		ret = -EINVAL;
+		goto unlock_ctx_srcu;
+	}
+
+	ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout);
+
+	XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
+		 client->pid, args->hwctx, args->seq, ret);
+
+	trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
+
+unlock_ctx_srcu:
+	srcu_read_unlock(&client->hwctx_srcu, idx);
+	return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index c5622718b4d5..6e3c6371a088 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
 		       u32 *arg_bo_hdls, u32 arg_bo_cnt,
 		       u32 hwctx_hdl, u64 *seq);
 
-int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
-		     u64 seq, u32 timeout);
-
 int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 #endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index ebfc472aa9e7..319d2064fafa 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
 	mmu_interval_set_seq(&mapp->notifier, cur_seq);
 	up_write(&xdna->notifier_lock);
 
-	xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+	if (xdna->dev_info->ops->hmm_invalidate)
+		xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
 
 	if (range->event == MMU_NOTIFY_UNMAP) {
 		down_write(&xdna->notifier_lock);
@@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
 	u32 nr_pages;
 	int ret;
 
-	if (!xdna->dev_info->ops->hmm_invalidate)
+	if (!amdxdna_pasid_on(abo->client))
 		return 0;
 
 	mapp = kzalloc_obj(*mapp);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 39ad081ac082..c0d00db25cde 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
 	return ret;
 }
 
+static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *drm_filp = filp->private_data;
+	struct amdxdna_client *client = drm_filp->driver_priv;
+	struct amdxdna_dev *xdna = client->xdna;
+
+	if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
+		return drm_gem_mmap(filp, vma);
+
+	if (!xdna->dev_info->ops->mmap)
+		return -EOPNOTSUPP;
+
+	return xdna->dev_info->ops->mmap(client, vma);
+}
+
 static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
 	/* Context */
 	DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
 	/* Execution */
 	DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
 	/* AIE hardware */
 	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
@@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
 	.poll		= drm_poll,
 	.read		= drm_read,
 	.llseek		= noop_llseek,
-	.mmap		= drm_gem_mmap,
+	.mmap		= amdxdna_drm_gem_mmap,
 	.show_fdinfo	= drm_show_fdinfo,
 	.fop_flags	= FOP_UNSIGNED_OFFSET,
 };
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index caed11c09e55..471b72299aee 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
 	int (*resume)(struct amdxdna_dev *xdna);
 	int (*suspend)(struct amdxdna_dev *xdna);
 	int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
+	int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
 	int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
 	void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
 	int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
 	int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
 	void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
 	int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+	int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
 	int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
 	int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
 	int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
@@ -85,6 +87,7 @@ struct amdxdna_dev_info {
 	int				sram_bar;
 	int				psp_bar;
 	int				smu_bar;
+	int				doorbell_bar;
 	int				device_type;
 	int				first_col;
 	u32				dev_mem_buf_shift;
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
index 6d5da779232b..d76b2e99c308 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -14,6 +14,9 @@
 #define NPU3_MBOX_BUFFER_BAR	2
 #define NPU3_MBOX_INFO_OFF	0x0
 
+#define NPU3_DOORBELL_BAR       2
+#define NPU3_DOORBELL_OFF       0x0
+
 /* PCIe BAR Index for NPU3 */
 #define NPU3_REG_BAR_INDEX	0
 #define NPU3_PSP_BAR_INDEX      4
@@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
 	.mbox_bar		= NPU3_MBOX_BAR,
 	.mbox_rbuf_bar		= NPU3_MBOX_BUFFER_BAR,
 	.mbox_info_off		= NPU3_MBOX_INFO_OFF,
+	.doorbell_off		= NPU3_DOORBELL_OFF,
 	.psp_regs_off   = {
 		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
 		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
@@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
 const struct amdxdna_dev_info dev_npu3_vf_info = {
 	.mbox_bar		= NPU3_MBOX_BAR,
 	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
+	.doorbell_bar		= NPU3_DOORBELL_BAR,
 	.default_vbnv		= "RyzenAI-npu3-vf",
 	.device_type		= AMDXDNA_DEV_TYPE_UMQ,
 	.dev_priv		= &npu3_dev_vf_priv,
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index ad9b33dd7b13..51a507561df6 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
 	DRM_AMDXDNA_EXEC_CMD,
 	DRM_AMDXDNA_GET_INFO,
 	DRM_AMDXDNA_SET_STATE,
-	DRM_AMDXDNA_GET_ARRAY = 10,
+	DRM_AMDXDNA_WAIT_CMD,
+	DRM_AMDXDNA_GET_ARRAY,
 };
 
 /**
@@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
 	__u64 seq;
 };
 
+/**
+ * struct amdxdna_drm_wait_cmd - Wait execution command.
+ *
+ * @hwctx: Context handle.
+ * @timeout: timeout in ms, 0 implies infinite wait.
+ * @seq: sequence number of the command returned by execute command.
+ *
+ * Wait a command specified by seq to be completed.
+ */
+struct amdxdna_drm_wait_cmd {
+	__u32 hwctx;
+	__u32 timeout;
+	__u64 seq;
+};
+
 /**
  * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
  * @buffer: The user space buffer that will return the AIE status.
@@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
 		 struct amdxdna_drm_get_array)
 
+#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
+	DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
+		struct amdxdna_drm_wait_cmd)
+
 #if defined(__cplusplus)
 } /* extern c end */
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
                   ` (3 preceding siblings ...)
  2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 17:14   ` Mario Limonciello
  2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue, Lizhi Hou

From: David Zhang <yidong.zhang@amd.com>

Add support for querying device metadata on AIE4 via a mailbox message.
Refactor aie2_get_aie_metadata() into a common helper by moving it to
aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
and AIE4 to reuse the implementation.

Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie.c           | 45 ++++++++++++++++++++++
 drivers/accel/amdxdna/aie.h           | 27 ++++++++++++++
 drivers/accel/amdxdna/aie2_ctx.c      |  4 +-
 drivers/accel/amdxdna/aie2_message.c  |  2 +-
 drivers/accel/amdxdna/aie2_pci.c      | 54 ++-------------------------
 drivers/accel/amdxdna/aie2_pci.h      | 24 ------------
 drivers/accel/amdxdna/aie4_message.c  | 37 ++++++++++++++++++
 drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
 drivers/accel/amdxdna/aie4_pci.c      | 30 +++++++++++++++
 drivers/accel/amdxdna/aie4_pci.h      |  1 +
 10 files changed, 181 insertions(+), 77 deletions(-)

diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
index 66849ba9026a..a31051cc1ec8 100644
--- a/drivers/accel/amdxdna/aie.c
+++ b/drivers/accel/amdxdna/aie.c
@@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
 
 	amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
 }
+
+int amdxdna_get_metadata(struct aie_device *aie,
+			 struct amdxdna_client *client,
+			 struct amdxdna_drm_get_info *args)
+{
+	struct amdxdna_drm_query_aie_metadata *meta;
+	int ret = 0;
+	u32 buf_sz;
+
+	meta = kzalloc_obj(*meta);
+	if (!meta)
+		return -ENOMEM;
+
+	meta->col_size = aie->metadata.size;
+	meta->cols = aie->metadata.cols;
+	meta->rows = aie->metadata.rows;
+
+	meta->version.major = aie->metadata.version.major;
+	meta->version.minor = aie->metadata.version.minor;
+
+	meta->core.row_count = aie->metadata.core.row_count;
+	meta->core.row_start = aie->metadata.core.row_start;
+	meta->core.dma_channel_count = aie->metadata.core.dma_channel_count;
+	meta->core.lock_count = aie->metadata.core.lock_count;
+	meta->core.event_reg_count = aie->metadata.core.event_reg_count;
+
+	meta->mem.row_count = aie->metadata.mem.row_count;
+	meta->mem.row_start = aie->metadata.mem.row_start;
+	meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
+	meta->mem.lock_count = aie->metadata.mem.lock_count;
+	meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
+
+	meta->shim.row_count = aie->metadata.shim.row_count;
+	meta->shim.row_start = aie->metadata.shim.row_start;
+	meta->shim.dma_channel_count = aie->metadata.shim.dma_channel_count;
+	meta->shim.lock_count = aie->metadata.shim.lock_count;
+	meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;
+
+	buf_sz = min(args->buffer_size, sizeof(*meta));
+	if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
+		ret = -EFAULT;
+
+	kfree(meta);
+	return ret;
+}
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 7a68b114f235..4bb3719ee0c0 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -14,6 +14,29 @@
 struct psp_device;
 struct smu_device;
 
+struct aie_version {
+	u16 major;
+	u16 minor;
+};
+
+struct aie_tile_metadata {
+	u16 row_count;
+	u16 row_start;
+	u16 dma_channel_count;
+	u16 lock_count;
+	u16 event_reg_count;
+};
+
+struct aie_metadata {
+	u32 size;
+	u16 cols;
+	u16 rows;
+	struct aie_version version;
+	struct aie_tile_metadata core;
+	struct aie_tile_metadata mem;
+	struct aie_tile_metadata shim;
+};
+
 struct aie_device {
 	struct amdxdna_dev *xdna;
 	struct mailbox_channel *mgmt_chann;
@@ -26,6 +49,8 @@ struct aie_device {
 
 	struct psp_device *psp_hdl;
 	struct smu_device *smu_hdl;
+
+	struct aie_metadata metadata;
 };
 
 #define DECLARE_AIE_MSG(name, op) \
@@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie, struct mailbox_channel **chann);
 int aie_send_mgmt_msg_wait(struct aie_device *aie, struct xdna_mailbox_msg *msg);
 int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
 void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
+int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
+			 struct amdxdna_drm_get_info *args);
 
 /* aie_psp.c */
 struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 139825ac8515..7d6094aefb6f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
 	}
 
 	ndev = xdna->dev_handle;
-	if (unlikely(!ndev->metadata.core.row_count)) {
+	if (unlikely(!ndev->aie.metadata.core.row_count)) {
 		XDNA_WARN(xdna, "Core tile row count is zero");
 		return -EINVAL;
 	}
 
-	hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
+	hwctx->num_col = hwctx->num_tiles / ndev->aie.metadata.core.row_count;
 	if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
 		XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
 		return -EINVAL;
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 6e98af7b74db..f555ffecea6f 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	u8 *buff_addr;
 	int ret;
 
-	buf_sz = ndev->metadata.cols * ndev->metadata.size;
+	buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
 	buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
 	if (IS_ERR(buff_addr))
 		return PTR_ERR(buff_addr);
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index f0ddb843eb21..6c8a0f70b73d 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
 		return ret;
 	}
 
-	ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
+	ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
 	if (ret) {
 		XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
 		return ret;
 	}
 
-	ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+	ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
 
 	return 0;
 }
@@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct amdxdna_client *client,
 	return 0;
 }
 
-static int aie2_get_aie_metadata(struct amdxdna_client *client,
-				 struct amdxdna_drm_get_info *args)
-{
-	struct amdxdna_drm_query_aie_metadata *meta;
-	struct amdxdna_dev *xdna = client->xdna;
-	struct amdxdna_dev_hdl *ndev;
-	int ret = 0;
-	u32 buf_sz;
-
-	ndev = xdna->dev_handle;
-	meta = kzalloc_obj(*meta);
-	if (!meta)
-		return -ENOMEM;
-
-	meta->col_size = ndev->metadata.size;
-	meta->cols = ndev->metadata.cols;
-	meta->rows = ndev->metadata.rows;
-
-	meta->version.major = ndev->metadata.version.major;
-	meta->version.minor = ndev->metadata.version.minor;
-
-	meta->core.row_count = ndev->metadata.core.row_count;
-	meta->core.row_start = ndev->metadata.core.row_start;
-	meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
-	meta->core.lock_count = ndev->metadata.core.lock_count;
-	meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
-
-	meta->mem.row_count = ndev->metadata.mem.row_count;
-	meta->mem.row_start = ndev->metadata.mem.row_start;
-	meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
-	meta->mem.lock_count = ndev->metadata.mem.lock_count;
-	meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
-
-	meta->shim.row_count = ndev->metadata.shim.row_count;
-	meta->shim.row_start = ndev->metadata.shim.row_start;
-	meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
-	meta->shim.lock_count = ndev->metadata.shim.lock_count;
-	meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
-
-	buf_sz = min(args->buffer_size, sizeof(*meta));
-	if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
-		ret = -EFAULT;
-
-	kfree(meta);
-	return ret;
-}
-
 static int aie2_get_aie_version(struct amdxdna_client *client,
 				struct amdxdna_drm_get_info *args)
 {
@@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct amdxdna_client *client,
 static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
 {
 	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
 	int ret, idx;
 
 	if (!drm_dev_enter(&xdna->ddev, &idx))
@@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
 		ret = aie2_get_aie_status(client, args);
 		break;
 	case DRM_AMDXDNA_QUERY_AIE_METADATA:
-		ret = aie2_get_aie_metadata(client, args);
+		ret = amdxdna_get_metadata(&ndev->aie, client, args);
 		break;
 	case DRM_AMDXDNA_QUERY_AIE_VERSION:
 		ret = aie2_get_aie_version(client, args);
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index f12073175676..c884fed610f9 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
 struct amdxdna_hwctx;
 struct amdxdna_sched_job;
 
-struct aie_version {
-	u16 major;
-	u16 minor;
-};
-
-struct aie_tile_metadata {
-	u16 row_count;
-	u16 row_start;
-	u16 dma_channel_count;
-	u16 lock_count;
-	u16 event_reg_count;
-};
-
-struct aie_metadata {
-	u32 size;
-	u16 cols;
-	u16 rows;
-	struct aie_version version;
-	struct aie_tile_metadata core;
-	struct aie_tile_metadata mem;
-	struct aie_tile_metadata shim;
-};
-
 enum rt_config_category {
 	AIE2_RT_CFG_INIT,
 	AIE2_RT_CFG_CLK_GATING,
@@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
 
 	u32				total_col;
 	struct aie_version		version;
-	struct aie_metadata		metadata;
 	struct aie2_exec_msg_ops	*exec_msg_ops;
 
 	/* power management and clock*/
diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
index d621dd32ac40..ac89a9a842b2 100644
--- a/drivers/accel/amdxdna/aie4_message.c
+++ b/drivers/accel/amdxdna/aie4_message.c
@@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
 
 	return ret;
 }
+
+int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
+{
+	DECLARE_AIE_MSG(aie4_msg_aie4_tile_info, AIE4_MSG_OP_AIE_TILE_INFO);
+	int ret;
+
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret)
+		return ret;
+
+	metadata->size = resp.info.size;
+	metadata->cols = resp.info.cols;
+	metadata->rows = resp.info.rows;
+
+	metadata->version.major = resp.info.major;
+	metadata->version.minor = resp.info.minor;
+
+	metadata->core.row_count = resp.info.core_rows;
+	metadata->core.row_start = resp.info.core_row_start;
+	metadata->core.dma_channel_count = resp.info.core_dma_channels;
+	metadata->core.lock_count = resp.info.core_locks;
+	metadata->core.event_reg_count = resp.info.core_events;
+
+	metadata->mem.row_count = resp.info.mem_rows;
+	metadata->mem.row_start = resp.info.mem_row_start;
+	metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
+	metadata->mem.lock_count = resp.info.mem_locks;
+	metadata->mem.event_reg_count = resp.info.mem_events;
+
+	metadata->shim.row_count = resp.info.shim_rows;
+	metadata->shim.row_start = resp.info.shim_row_start;
+	metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
+	metadata->shim.lock_count = resp.info.shim_locks;
+	metadata->shim.event_reg_count = resp.info.shim_events;
+
+	return 0;
+}
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 7faa01ca3436..69e220e40900 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -18,6 +18,7 @@ enum aie4_msg_opcode {
 	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
 	AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
 	AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
+	AIE4_MSG_OP_AIE_TILE_INFO                    = 0x30006,
 };
 
 enum aie4_msg_status {
@@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
 	enum aie4_msg_status status;
 } __packed;
 
+struct aie4_tile_info {
+	__u32 size;
+	__u16 major;
+	__u16 minor;
+	__u16 cols;
+	__u16 rows;
+	__u16 core_rows;
+	__u16 mem_rows;
+	__u16 shim_rows;
+	__u16 core_row_start;
+	__u16 mem_row_start;
+	__u16 shim_row_start;
+	__u16 core_dma_channels;
+	__u16 mem_dma_channels;
+	__u16 shim_dma_channels;
+	__u16 core_locks;
+	__u16 mem_locks;
+	__u16 shim_locks;
+	__u16 core_events;
+	__u16 mem_events;
+	__u16 shim_events;
+	__u16 resvd;
+} __packed;
+
+struct aie4_msg_aie4_tile_info_req {
+	__u32 resvd;
+} __packed;
+
+struct aie4_msg_aie4_tile_info_resp {
+	enum aie4_msg_status status;
+	struct aie4_tile_info info;
+} __packed;
+
 #endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 9ff34ce57fcb..8b5eff0e45c1 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -269,6 +269,11 @@ static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
 		XDNA_ERR(xdna, "partition fini failed: %d", ret);
 }
 
+static int aie4_query(struct amdxdna_dev_hdl *ndev)
+{
+	return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
+}
+
 static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
 {
 	int ret;
@@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
 	if (ret)
 		return ret;
 
+	ret = aie4_query(ndev);
+	if (ret)
+		goto mailbox_fini;
+
 	ret = aie4_partition_init(ndev);
 	if (ret)
 		goto mailbox_fini;
@@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_stru
 	return ret;
 }
 
+static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+	int ret;
+
+	switch (args->param) {
+	case DRM_AMDXDNA_QUERY_AIE_METADATA:
+		ret = amdxdna_get_metadata(&ndev->aie, client, args);
+		break;
+	default:
+		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+		ret = -EOPNOTSUPP;
+	}
+
+	XDNA_DBG(xdna, "Got param %d", args->param);
+
+	return ret;
+}
+
 static int aie4_pf_init(struct amdxdna_dev *xdna)
 {
 	int ret;
@@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
 	.hwctx_fini		= aie4_hwctx_fini,
 	.mmap			= aie4_doorbell_mmap,
 	.cmd_wait		= aie4_cmd_wait,
+	.get_aie_info		= aie4_get_info,
 };
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index b69489acd53d..1886cffc62db 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
 };
 
 /* aie4_message.c */
+int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
 int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
 
 /* aie4_ctx.c */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization
  2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
                   ` (4 preceding siblings ...)
  2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
  2026-05-05 20:36   ` Mario Limonciello
  5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	karol.wachowski
  Cc: Nishad Saraf, linux-kernel, max.zhen, sonal.santan, Lizhi Hou

From: Nishad Saraf <nishads@amd.com>

NPU firmware requires a host-allocated work buffer for hardware contexts.
Allocate a 4 MB host buffer and attach it to device during device init.

Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common
helpers by moving them to aie.c and renaming them to
amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both
AIE2 and AIE4 to reuse the implementation.

Signed-off-by: Nishad Saraf <nishads@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie.c             | 34 +++++++++++++++
 drivers/accel/amdxdna/aie.h             |  4 ++
 drivers/accel/amdxdna/aie2_error.c      |  7 ++--
 drivers/accel/amdxdna/aie2_message.c    | 49 +++-------------------
 drivers/accel/amdxdna/aie2_pci.h        |  4 --
 drivers/accel/amdxdna/aie4_message.c    | 18 ++++++++
 drivers/accel/amdxdna/aie4_msg_priv.h   | 14 +++++++
 drivers/accel/amdxdna/aie4_pci.c        | 55 ++++++++++++++++++++++++-
 drivers/accel/amdxdna/aie4_pci.h        |  5 +++
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  3 +-
 10 files changed, 141 insertions(+), 52 deletions(-)

diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
index a31051cc1ec8..4db2fd80a032 100644
--- a/drivers/accel/amdxdna/aie.c
+++ b/drivers/accel/amdxdna/aie.c
@@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie,
 	kfree(meta);
 	return ret;
 }
+
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+			       dma_addr_t *dma_addr)
+{
+	void *vaddr;
+	int order;
+
+	*size = max_t(u32, *size, SZ_8K);
+	order = get_order(*size);
+	if (order > MAX_PAGE_ORDER)
+		return ERR_PTR(-EINVAL);
+	*size = PAGE_SIZE << order;
+
+	if (amdxdna_iova_on(xdna))
+		return amdxdna_iommu_alloc(xdna, *size, dma_addr);
+
+	vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
+				      DMA_FROM_DEVICE, GFP_KERNEL);
+	if (!vaddr)
+		return ERR_PTR(-ENOMEM);
+
+	return vaddr;
+}
+
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+			     void *cpu_addr, dma_addr_t dma_addr)
+{
+	if (amdxdna_iova_on(xdna)) {
+		amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
+		return;
+	}
+
+	dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
+}
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 4bb3719ee0c0..70618204c0ab 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
 void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
 int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
 			 struct amdxdna_drm_get_info *args);
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+			       dma_addr_t *dma_addr);
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+			     void *cpu_addr, dma_addr_t dma_addr);
 
 /* aie_psp.c */
 struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
index 70007b4363cd..babdac0157ab 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -11,6 +11,7 @@
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 
+#include "aie.h"
 #include "aie2_msg_priv.h"
 #include "aie2_pci.h"
 #include "amdxdna_error.h"
@@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
 	destroy_workqueue(events->wq);
 	mutex_lock(&xdna->dev_lock);
 
-	aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+	amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
 	kfree(events);
 }
 
@@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
 	if (!events)
 		return -ENOMEM;
 
-	events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr);
+	events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, &events->addr);
 	if (IS_ERR(events->buf)) {
 		ret = PTR_ERR(events->buf);
 		goto free_events;
@@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
 free_wq:
 	destroy_workqueue(events->wq);
 free_buf:
-	aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+	amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
 free_events:
 	kfree(events);
 	return ret;
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index f555ffecea6f..0417c6a4c80a 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,43 +27,6 @@
 
 #define EXEC_MSG_OPS(xdna)	((xdna)->dev_handle->exec_msg_ops)
 
-void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
-			    dma_addr_t *dma_addr)
-{
-	struct amdxdna_dev *xdna = ndev->aie.xdna;
-	void *vaddr;
-	int order;
-
-	*size = max(*size, SZ_8K);
-	order = get_order(*size);
-	if (order > MAX_PAGE_ORDER)
-		return ERR_PTR(-EINVAL);
-	*size = PAGE_SIZE << order;
-
-	if (amdxdna_iova_on(xdna))
-		return amdxdna_iommu_alloc(xdna, *size, dma_addr);
-
-	vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
-				      DMA_FROM_DEVICE, GFP_KERNEL);
-	if (!vaddr)
-		return ERR_PTR(-ENOMEM);
-
-	return vaddr;
-}
-
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr)
-{
-	struct amdxdna_dev *xdna = ndev->aie.xdna;
-
-	if (amdxdna_iova_on(xdna)) {
-		amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
-		return;
-	}
-
-	dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
-}
-
 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
 {
 	DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND);
@@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	int ret;
 
 	buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
-	buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+	buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
 	if (IS_ERR(buff_addr))
 		return PTR_ERR(buff_addr);
 
@@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	*cols_filled = aie_bitmap;
 
 fail:
-	aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
+	amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr);
 	return ret;
 }
 
@@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
 		return -EINVAL;
 
 	buf_sz = min(size, SZ_4M);
-	addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+	addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
 
@@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
 	header->minor = resp.minor;
 
 free_buf:
-	aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
+	amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr);
 	return ret;
 }
 
@@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
 	}
 
 	buf_size = sizeof(*report);
-	buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
+	buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr);
 	if (IS_ERR(buf)) {
 		XDNA_ERR(xdna, "Failed to allocate buffer for app health");
 		return PTR_ERR(buf);
@@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
 	memcpy(report, buf, sizeof(*report));
 
 free_buf:
-	aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
+	amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr);
 	return ret;
 }
 
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index c884fed610f9..33b6c84e8b6e 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 			 int (*notify_cb)(void *, void __iomem *, size_t));
 int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
-void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
-			    dma_addr_t *dma_addr);
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr);
 
 /* aie2_hwctx.c */
 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
index ac89a9a842b2..d85df04c5f6b 100644
--- a/drivers/accel/amdxdna/aie4_message.c
+++ b/drivers/accel/amdxdna/aie4_message.c
@@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *m
 
 	return 0;
 }
+
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+	DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, AIE4_MSG_OP_ATTACH_WORK_BUFFER);
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	int ret;
+
+	req.buff_addr = ndev->work_buf_addr;
+	req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+	if (ret)
+		XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret);
+	else
+		XDNA_DBG(xdna, "Attached work buffer");
+
+	return ret;
+}
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 69e220e40900..af0866045b91 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -6,10 +6,12 @@
 #ifndef _AIE4_MSG_PRIV_H_
 #define _AIE4_MSG_PRIV_H_
 
+#include <linux/sizes.h>
 #include <linux/types.h>
 
 enum aie4_msg_opcode {
 	AIE4_MSG_OP_SUSPEND                          = 0x10003,
+	AIE4_MSG_OP_ATTACH_WORK_BUFFER               = 0x1000D,
 
 	AIE4_MSG_OP_CREATE_VFS                       = 0x20001,
 	AIE4_MSG_OP_DESTROY_VFS                      = 0x20002,
@@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp {
 	struct aie4_tile_info info;
 } __packed;
 
+#define AIE4_WORK_BUFFER_MIN_SIZE      SZ_4M
+
+struct aie4_msg_attach_work_buffer_req {
+	__u64 buff_addr;
+	__u32 reserved;
+	__u32 buff_size;
+} __packed;
+
+struct aie4_msg_attach_work_buffer_resp {
+	enum aie4_msg_status status;
+} __packed;
+
 #endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 8b5eff0e45c1..a58a83af42a4 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
 	if (ret)
 		goto stop_fw;
 
+	ret = aie4_attach_work_buffer(ndev);
+	if (ret)
+		goto mbox_fini;
+
 	return 0;
 
+mbox_fini:
+	aie4_mailbox_fini(ndev);
 stop_fw:
 	aie4_fw_stop(ndev);
 
@@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
 	return ret;
 }
 
+static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+	u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+	ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size,
+						  &ndev->work_buf_addr);
+	if (IS_ERR(ndev->work_buf)) {
+		int ret = PTR_ERR(ndev->work_buf);
+
+		XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x",
+			 AIE4_WORK_BUFFER_MIN_SIZE);
+		ndev->work_buf = NULL;
+		return ret;
+	}
+
+	ndev->work_buf_size = buf_size;
+	XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size);
+
+	return 0;
+}
+
+static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+	struct amdxdna_dev *xdna = ndev->aie.xdna;
+
+	if (!ndev->work_buf)
+		return;
+
+	amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf,
+				ndev->work_buf_addr);
+	ndev->work_buf = NULL;
+}
+
 static int aie4_pf_init(struct amdxdna_dev *xdna)
 {
 	int ret;
@@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna)
 	if (ret)
 		return ret;
 
-	return aie4_pf_hw_start(xdna->dev_handle);
+	ret = aie4_alloc_work_buffer(xdna->dev_handle);
+	if (ret)
+		return ret;
+
+	ret = aie4_pf_hw_start(xdna->dev_handle);
+	if (ret)
+		goto free_work_buf;
+
+	return 0;
+
+free_work_buf:
+	aie4_free_work_buffer(xdna->dev_handle);
+	return ret;
 }
 
 static int aie4_vf_init(struct amdxdna_dev *xdna)
@@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna)
 {
 	aie4_sriov_stop(xdna->dev_handle);
 	aie4_pf_hw_stop(xdna->dev_handle);
+	aie4_free_work_buffer(xdna->dev_handle);
 }
 
 static void aie4_vf_fini(struct amdxdna_dev *xdna)
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 1886cffc62db..390864876ca5 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -53,11 +53,16 @@ struct amdxdna_dev_hdl {
 
 	struct xarray                   cert_comp_xa; /* device level indexed by msix id */
 	struct mutex                    cert_comp_lock; /* protects cert_comp operations*/
+
+	void				*work_buf;
+	dma_addr_t			work_buf_addr;
+	u32				work_buf_size;
 };
 
 /* aie4_message.c */
 int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
 int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev);
 
 /* aie4_ctx.c */
 int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index c0d00db25cde..a6e9be7960c2 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
  * 0.7: Support getting power and utilization data
  * 0.8: Support BO usage query
  * 0.9: Add new device type AMDXDNA_DEV_TYPE_PF
+ * 0.10: Support AIE4 UMQ
  */
 #define AMDXDNA_DRIVER_MAJOR		0
-#define AMDXDNA_DRIVER_MINOR		9
+#define AMDXDNA_DRIVER_MINOR		10
 
 /*
  * Bind the driver base on (vendor_id, device_id) pair and later use the
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
  2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
@ 2026-05-05 17:14   ` Mario Limonciello
  2026-05-05 18:03     ` Lizhi Hou
  0 siblings, 1 reply; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 17:14 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue



On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
> 
> Add support for querying device metadata on AIE4 via a mailbox message.
> Refactor aie2_get_aie_metadata() into a common helper by moving it to
> aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
> and AIE4 to reuse the implementation.
> 
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie.c           | 45 ++++++++++++++++++++++
>   drivers/accel/amdxdna/aie.h           | 27 ++++++++++++++
>   drivers/accel/amdxdna/aie2_ctx.c      |  4 +-
>   drivers/accel/amdxdna/aie2_message.c  |  2 +-
>   drivers/accel/amdxdna/aie2_pci.c      | 54 ++-------------------------
>   drivers/accel/amdxdna/aie2_pci.h      | 24 ------------
>   drivers/accel/amdxdna/aie4_message.c  | 37 ++++++++++++++++++
>   drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
>   drivers/accel/amdxdna/aie4_pci.c      | 30 +++++++++++++++
>   drivers/accel/amdxdna/aie4_pci.h      |  1 +
>   10 files changed, 181 insertions(+), 77 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
> index 66849ba9026a..a31051cc1ec8 100644
> --- a/drivers/accel/amdxdna/aie.c
> +++ b/drivers/accel/amdxdna/aie.c
> @@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
>   
>   	amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
>   }
> +
> +int amdxdna_get_metadata(struct aie_device *aie,
> +			 struct amdxdna_client *client,
> +			 struct amdxdna_drm_get_info *args)
> +{
> +	struct amdxdna_drm_query_aie_metadata *meta;
> +	int ret = 0;
> +	u32 buf_sz;
> +
> +	meta = kzalloc_obj(*meta);
> +	if (!meta)
> +		return -ENOMEM;
> +
> +	meta->col_size = aie->metadata.size;
> +	meta->cols = aie->metadata.cols;
> +	meta->rows = aie->metadata.rows;
> +
> +	meta->version.major = aie->metadata.version.major;
> +	meta->version.minor = aie->metadata.version.minor;
> +
> +	meta->core.row_count = aie->metadata.core.row_count;
> +	meta->core.row_start = aie->metadata.core.row_start;
> +	meta->core.dma_channel_count = aie->metadata.core.dma_channel_count;
> +	meta->core.lock_count = aie->metadata.core.lock_count;
> +	meta->core.event_reg_count = aie->metadata.core.event_reg_count;
> +
> +	meta->mem.row_count = aie->metadata.mem.row_count;
> +	meta->mem.row_start = aie->metadata.mem.row_start;
> +	meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
> +	meta->mem.lock_count = aie->metadata.mem.lock_count;
> +	meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
> +
> +	meta->shim.row_count = aie->metadata.shim.row_count;
> +	meta->shim.row_start = aie->metadata.shim.row_start;
> +	meta->shim.dma_channel_count = aie->metadata.shim.dma_channel_count;
> +	meta->shim.lock_count = aie->metadata.shim.lock_count;
> +	meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;

Looking at the code the structures for

struct amdxdna_drm_query_aie_metadata
and
struct aie_metadata

Look identical.  Rather than copying every member, can you just use copy 
everything from aie->metadata to args->buffer directly?

That could let you save the kzalloc/kfree call.


> +
> +	buf_sz = min(args->buffer_size, sizeof(*meta));
> +	if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
> +		ret = -EFAULT;
> +
> +	kfree(meta);
> +	return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
> index 7a68b114f235..4bb3719ee0c0 100644
> --- a/drivers/accel/amdxdna/aie.h
> +++ b/drivers/accel/amdxdna/aie.h
> @@ -14,6 +14,29 @@
>   struct psp_device;
>   struct smu_device;
>   
> +struct aie_version {
> +	u16 major;
> +	u16 minor;
> +};
> +
> +struct aie_tile_metadata {
> +	u16 row_count;
> +	u16 row_start;
> +	u16 dma_channel_count;
> +	u16 lock_count;
> +	u16 event_reg_count;
> +};
> +
> +struct aie_metadata {
> +	u32 size;
> +	u16 cols;
> +	u16 rows;
> +	struct aie_version version;
> +	struct aie_tile_metadata core;
> +	struct aie_tile_metadata mem;
> +	struct aie_tile_metadata shim;
> +};
> +
>   struct aie_device {
>   	struct amdxdna_dev *xdna;
>   	struct mailbox_channel *mgmt_chann;
> @@ -26,6 +49,8 @@ struct aie_device {
>   
>   	struct psp_device *psp_hdl;
>   	struct smu_device *smu_hdl;
> +
> +	struct aie_metadata metadata;
>   };
>   
>   #define DECLARE_AIE_MSG(name, op) \
> @@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie, struct mailbox_channel **chann);
>   int aie_send_mgmt_msg_wait(struct aie_device *aie, struct xdna_mailbox_msg *msg);
>   int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
>   void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
> +int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
> +			 struct amdxdna_drm_get_info *args);
>   
>   /* aie_psp.c */
>   struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 139825ac8515..7d6094aefb6f 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
>   	}
>   
>   	ndev = xdna->dev_handle;
> -	if (unlikely(!ndev->metadata.core.row_count)) {
> +	if (unlikely(!ndev->aie.metadata.core.row_count)) {
>   		XDNA_WARN(xdna, "Core tile row count is zero");
>   		return -EINVAL;
>   	}
>   
> -	hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
> +	hwctx->num_col = hwctx->num_tiles / ndev->aie.metadata.core.row_count;
>   	if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
>   		XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
>   		return -EINVAL;
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 6e98af7b74db..f555ffecea6f 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
>   	u8 *buff_addr;
>   	int ret;
>   
> -	buf_sz = ndev->metadata.cols * ndev->metadata.size;
> +	buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
>   	buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
>   	if (IS_ERR(buff_addr))
>   		return PTR_ERR(buff_addr);
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index f0ddb843eb21..6c8a0f70b73d 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
>   		return ret;
>   	}
>   
> -	ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
> +	ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
>   	if (ret) {
>   		XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
>   		return ret;
>   	}
>   
> -	ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
> +	ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
>   
>   	return 0;
>   }
> @@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct amdxdna_client *client,
>   	return 0;
>   }
>   
> -static int aie2_get_aie_metadata(struct amdxdna_client *client,
> -				 struct amdxdna_drm_get_info *args)
> -{
> -	struct amdxdna_drm_query_aie_metadata *meta;
> -	struct amdxdna_dev *xdna = client->xdna;
> -	struct amdxdna_dev_hdl *ndev;
> -	int ret = 0;
> -	u32 buf_sz;
> -
> -	ndev = xdna->dev_handle;
> -	meta = kzalloc_obj(*meta);
> -	if (!meta)
> -		return -ENOMEM;
> -
> -	meta->col_size = ndev->metadata.size;
> -	meta->cols = ndev->metadata.cols;
> -	meta->rows = ndev->metadata.rows;
> -
> -	meta->version.major = ndev->metadata.version.major;
> -	meta->version.minor = ndev->metadata.version.minor;
> -
> -	meta->core.row_count = ndev->metadata.core.row_count;
> -	meta->core.row_start = ndev->metadata.core.row_start;
> -	meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
> -	meta->core.lock_count = ndev->metadata.core.lock_count;
> -	meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
> -
> -	meta->mem.row_count = ndev->metadata.mem.row_count;
> -	meta->mem.row_start = ndev->metadata.mem.row_start;
> -	meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
> -	meta->mem.lock_count = ndev->metadata.mem.lock_count;
> -	meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
> -
> -	meta->shim.row_count = ndev->metadata.shim.row_count;
> -	meta->shim.row_start = ndev->metadata.shim.row_start;
> -	meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
> -	meta->shim.lock_count = ndev->metadata.shim.lock_count;
> -	meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
> -
> -	buf_sz = min(args->buffer_size, sizeof(*meta));
> -	if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
> -		ret = -EFAULT;
> -
> -	kfree(meta);
> -	return ret;
> -}
> -
>   static int aie2_get_aie_version(struct amdxdna_client *client,
>   				struct amdxdna_drm_get_info *args)
>   {
> @@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct amdxdna_client *client,
>   static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
>   {
>   	struct amdxdna_dev *xdna = client->xdna;
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>   	int ret, idx;
>   
>   	if (!drm_dev_enter(&xdna->ddev, &idx))
> @@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>   		ret = aie2_get_aie_status(client, args);
>   		break;
>   	case DRM_AMDXDNA_QUERY_AIE_METADATA:
> -		ret = aie2_get_aie_metadata(client, args);
> +		ret = amdxdna_get_metadata(&ndev->aie, client, args);
>   		break;
>   	case DRM_AMDXDNA_QUERY_AIE_VERSION:
>   		ret = aie2_get_aie_version(client, args);
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index f12073175676..c884fed610f9 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
>   struct amdxdna_hwctx;
>   struct amdxdna_sched_job;
>   
> -struct aie_version {
> -	u16 major;
> -	u16 minor;
> -};
> -
> -struct aie_tile_metadata {
> -	u16 row_count;
> -	u16 row_start;
> -	u16 dma_channel_count;
> -	u16 lock_count;
> -	u16 event_reg_count;
> -};
> -
> -struct aie_metadata {
> -	u32 size;
> -	u16 cols;
> -	u16 rows;
> -	struct aie_version version;
> -	struct aie_tile_metadata core;
> -	struct aie_tile_metadata mem;
> -	struct aie_tile_metadata shim;
> -};
> -
>   enum rt_config_category {
>   	AIE2_RT_CFG_INIT,
>   	AIE2_RT_CFG_CLK_GATING,
> @@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
>   
>   	u32				total_col;
>   	struct aie_version		version;
> -	struct aie_metadata		metadata;
>   	struct aie2_exec_msg_ops	*exec_msg_ops;
>   
>   	/* power management and clock*/
> diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
> index d621dd32ac40..ac89a9a842b2 100644
> --- a/drivers/accel/amdxdna/aie4_message.c
> +++ b/drivers/accel/amdxdna/aie4_message.c
> @@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
>   
>   	return ret;
>   }
> +
> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_aie4_tile_info, AIE4_MSG_OP_AIE_TILE_INFO);
> +	int ret;
> +
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret)
> +		return ret;
> +
> +	metadata->size = resp.info.size;
> +	metadata->cols = resp.info.cols;
> +	metadata->rows = resp.info.rows;
> +
> +	metadata->version.major = resp.info.major;
> +	metadata->version.minor = resp.info.minor;
> +
> +	metadata->core.row_count = resp.info.core_rows;
> +	metadata->core.row_start = resp.info.core_row_start;
> +	metadata->core.dma_channel_count = resp.info.core_dma_channels;
> +	metadata->core.lock_count = resp.info.core_locks;
> +	metadata->core.event_reg_count = resp.info.core_events;
> +
> +	metadata->mem.row_count = resp.info.mem_rows;
> +	metadata->mem.row_start = resp.info.mem_row_start;
> +	metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
> +	metadata->mem.lock_count = resp.info.mem_locks;
> +	metadata->mem.event_reg_count = resp.info.mem_events;
> +
> +	metadata->shim.row_count = resp.info.shim_rows;
> +	metadata->shim.row_start = resp.info.shim_row_start;
> +	metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
> +	metadata->shim.lock_count = resp.info.shim_locks;
> +	metadata->shim.event_reg_count = resp.info.shim_events;
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 7faa01ca3436..69e220e40900 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -18,6 +18,7 @@ enum aie4_msg_opcode {
>   	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
>   	AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
>   	AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
> +	AIE4_MSG_OP_AIE_TILE_INFO                    = 0x30006,
>   };
>   
>   enum aie4_msg_status {
> @@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
>   	enum aie4_msg_status status;
>   } __packed;
>   
> +struct aie4_tile_info {
> +	__u32 size;
> +	__u16 major;
> +	__u16 minor;
> +	__u16 cols;
> +	__u16 rows;
> +	__u16 core_rows;
> +	__u16 mem_rows;
> +	__u16 shim_rows;
> +	__u16 core_row_start;
> +	__u16 mem_row_start;
> +	__u16 shim_row_start;
> +	__u16 core_dma_channels;
> +	__u16 mem_dma_channels;
> +	__u16 shim_dma_channels;
> +	__u16 core_locks;
> +	__u16 mem_locks;
> +	__u16 shim_locks;
> +	__u16 core_events;
> +	__u16 mem_events;
> +	__u16 shim_events;
> +	__u16 resvd;
> +} __packed;
> +
> +struct aie4_msg_aie4_tile_info_req {
> +	__u32 resvd;
> +} __packed;
> +
> +struct aie4_msg_aie4_tile_info_resp {
> +	enum aie4_msg_status status;
> +	struct aie4_tile_info info;
> +} __packed;
> +
>   #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 9ff34ce57fcb..8b5eff0e45c1 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -269,6 +269,11 @@ static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
>   		XDNA_ERR(xdna, "partition fini failed: %d", ret);
>   }
>   
> +static int aie4_query(struct amdxdna_dev_hdl *ndev)
> +{
> +	return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
> +}
> +
>   static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>   {
>   	int ret;
> @@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
>   	if (ret)
>   		return ret;
>   
> +	ret = aie4_query(ndev);
> +	if (ret)
> +		goto mailbox_fini;
> +
>   	ret = aie4_partition_init(ndev);
>   	if (ret)
>   		goto mailbox_fini;
> @@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_stru
>   	return ret;
>   }
>   
> +static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
> +{
> +	struct amdxdna_dev *xdna = client->xdna;
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +	int ret;
> +
> +	switch (args->param) {
> +	case DRM_AMDXDNA_QUERY_AIE_METADATA:
> +		ret = amdxdna_get_metadata(&ndev->aie, client, args);
> +		break;
> +	default:
> +		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> +		ret = -EOPNOTSUPP;
> +	}
> +
> +	XDNA_DBG(xdna, "Got param %d", args->param);
> +
> +	return ret;
> +}
> +
>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>   {
>   	int ret;
> @@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>   	.hwctx_fini		= aie4_hwctx_fini,
>   	.mmap			= aie4_doorbell_mmap,
>   	.cmd_wait		= aie4_cmd_wait,
> +	.get_aie_info		= aie4_get_info,
>   };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index b69489acd53d..1886cffc62db 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
>   };
>   
>   /* aie4_message.c */
> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
>   int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>   
>   /* aie4_ctx.c */


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
  2026-05-05 17:14   ` Mario Limonciello
@ 2026-05-05 18:03     ` Lizhi Hou
  0 siblings, 0 replies; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 18:03 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue


On 5/5/26 10:14, Mario Limonciello wrote:
>
>
> On 5/5/26 11:09, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@amd.com>
>>
>> Add support for querying device metadata on AIE4 via a mailbox message.
>> Refactor aie2_get_aie_metadata() into a common helper by moving it to
>> aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
>> and AIE4 to reuse the implementation.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie.c           | 45 ++++++++++++++++++++++
>>   drivers/accel/amdxdna/aie.h           | 27 ++++++++++++++
>>   drivers/accel/amdxdna/aie2_ctx.c      |  4 +-
>>   drivers/accel/amdxdna/aie2_message.c  |  2 +-
>>   drivers/accel/amdxdna/aie2_pci.c      | 54 ++-------------------------
>>   drivers/accel/amdxdna/aie2_pci.h      | 24 ------------
>>   drivers/accel/amdxdna/aie4_message.c  | 37 ++++++++++++++++++
>>   drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
>>   drivers/accel/amdxdna/aie4_pci.c      | 30 +++++++++++++++
>>   drivers/accel/amdxdna/aie4_pci.h      |  1 +
>>   10 files changed, 181 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
>> index 66849ba9026a..a31051cc1ec8 100644
>> --- a/drivers/accel/amdxdna/aie.c
>> +++ b/drivers/accel/amdxdna/aie.c
>> @@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
>>         amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
>>   }
>> +
>> +int amdxdna_get_metadata(struct aie_device *aie,
>> +             struct amdxdna_client *client,
>> +             struct amdxdna_drm_get_info *args)
>> +{
>> +    struct amdxdna_drm_query_aie_metadata *meta;
>> +    int ret = 0;
>> +    u32 buf_sz;
>> +
>> +    meta = kzalloc_obj(*meta);
>> +    if (!meta)
>> +        return -ENOMEM;
>> +
>> +    meta->col_size = aie->metadata.size;
>> +    meta->cols = aie->metadata.cols;
>> +    meta->rows = aie->metadata.rows;
>> +
>> +    meta->version.major = aie->metadata.version.major;
>> +    meta->version.minor = aie->metadata.version.minor;
>> +
>> +    meta->core.row_count = aie->metadata.core.row_count;
>> +    meta->core.row_start = aie->metadata.core.row_start;
>> +    meta->core.dma_channel_count = 
>> aie->metadata.core.dma_channel_count;
>> +    meta->core.lock_count = aie->metadata.core.lock_count;
>> +    meta->core.event_reg_count = aie->metadata.core.event_reg_count;
>> +
>> +    meta->mem.row_count = aie->metadata.mem.row_count;
>> +    meta->mem.row_start = aie->metadata.mem.row_start;
>> +    meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
>> +    meta->mem.lock_count = aie->metadata.mem.lock_count;
>> +    meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
>> +
>> +    meta->shim.row_count = aie->metadata.shim.row_count;
>> +    meta->shim.row_start = aie->metadata.shim.row_start;
>> +    meta->shim.dma_channel_count = 
>> aie->metadata.shim.dma_channel_count;
>> +    meta->shim.lock_count = aie->metadata.shim.lock_count;
>> +    meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;
>
> Looking at the code the structures for
>
> struct amdxdna_drm_query_aie_metadata
> and
> struct aie_metadata
>
> Look identical.  Rather than copying every member, can you just use 
> copy everything from aie->metadata to args->buffer directly?
>
> That could let you save the kzalloc/kfree call.

Agree. I will just remove the redundant structures in V2.

Thanks,

Lizhi

>
>
>> +
>> +    buf_sz = min(args->buffer_size, sizeof(*meta));
>> +    if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
>> +        ret = -EFAULT;
>> +
>> +    kfree(meta);
>> +    return ret;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
>> index 7a68b114f235..4bb3719ee0c0 100644
>> --- a/drivers/accel/amdxdna/aie.h
>> +++ b/drivers/accel/amdxdna/aie.h
>> @@ -14,6 +14,29 @@
>>   struct psp_device;
>>   struct smu_device;
>>   +struct aie_version {
>> +    u16 major;
>> +    u16 minor;
>> +};
>> +
>> +struct aie_tile_metadata {
>> +    u16 row_count;
>> +    u16 row_start;
>> +    u16 dma_channel_count;
>> +    u16 lock_count;
>> +    u16 event_reg_count;
>> +};
>> +
>> +struct aie_metadata {
>> +    u32 size;
>> +    u16 cols;
>> +    u16 rows;
>> +    struct aie_version version;
>> +    struct aie_tile_metadata core;
>> +    struct aie_tile_metadata mem;
>> +    struct aie_tile_metadata shim;
>> +};
>> +
>>   struct aie_device {
>>       struct amdxdna_dev *xdna;
>>       struct mailbox_channel *mgmt_chann;
>> @@ -26,6 +49,8 @@ struct aie_device {
>>         struct psp_device *psp_hdl;
>>       struct smu_device *smu_hdl;
>> +
>> +    struct aie_metadata metadata;
>>   };
>>     #define DECLARE_AIE_MSG(name, op) \
>> @@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie, 
>> struct mailbox_channel **chann);
>>   int aie_send_mgmt_msg_wait(struct aie_device *aie, struct 
>> xdna_mailbox_msg *msg);
>>   int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 
>> fw_minor);
>>   void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
>> +int amdxdna_get_metadata(struct aie_device *aie, struct 
>> amdxdna_client *client,
>> +             struct amdxdna_drm_get_info *args);
>>     /* aie_psp.c */
>>   struct psp_device *aiem_psp_create(struct drm_device *ddev, struct 
>> psp_config *conf);
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c 
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 139825ac8515..7d6094aefb6f 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct 
>> amdxdna_hwctx *hwctx)
>>       }
>>         ndev = xdna->dev_handle;
>> -    if (unlikely(!ndev->metadata.core.row_count)) {
>> +    if (unlikely(!ndev->aie.metadata.core.row_count)) {
>>           XDNA_WARN(xdna, "Core tile row count is zero");
>>           return -EINVAL;
>>       }
>>   -    hwctx->num_col = hwctx->num_tiles / 
>> ndev->metadata.core.row_count;
>> +    hwctx->num_col = hwctx->num_tiles / 
>> ndev->aie.metadata.core.row_count;
>>       if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
>>           XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
>>           return -EINVAL;
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 6e98af7b74db..f555ffecea6f 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl 
>> *ndev, char __user *buf,
>>       u8 *buff_addr;
>>       int ret;
>>   -    buf_sz = ndev->metadata.cols * ndev->metadata.size;
>> +    buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
>>       buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
>>       if (IS_ERR(buff_addr))
>>           return PTR_ERR(buff_addr);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index f0ddb843eb21..6c8a0f70b73d 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct 
>> amdxdna_dev_hdl *ndev)
>>           return ret;
>>       }
>>   -    ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
>> +    ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
>>       if (ret) {
>>           XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
>>           return ret;
>>       }
>>   -    ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>> +    ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
>>         return 0;
>>   }
>> @@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct 
>> amdxdna_client *client,
>>       return 0;
>>   }
>>   -static int aie2_get_aie_metadata(struct amdxdna_client *client,
>> -                 struct amdxdna_drm_get_info *args)
>> -{
>> -    struct amdxdna_drm_query_aie_metadata *meta;
>> -    struct amdxdna_dev *xdna = client->xdna;
>> -    struct amdxdna_dev_hdl *ndev;
>> -    int ret = 0;
>> -    u32 buf_sz;
>> -
>> -    ndev = xdna->dev_handle;
>> -    meta = kzalloc_obj(*meta);
>> -    if (!meta)
>> -        return -ENOMEM;
>> -
>> -    meta->col_size = ndev->metadata.size;
>> -    meta->cols = ndev->metadata.cols;
>> -    meta->rows = ndev->metadata.rows;
>> -
>> -    meta->version.major = ndev->metadata.version.major;
>> -    meta->version.minor = ndev->metadata.version.minor;
>> -
>> -    meta->core.row_count = ndev->metadata.core.row_count;
>> -    meta->core.row_start = ndev->metadata.core.row_start;
>> -    meta->core.dma_channel_count = 
>> ndev->metadata.core.dma_channel_count;
>> -    meta->core.lock_count = ndev->metadata.core.lock_count;
>> -    meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
>> -
>> -    meta->mem.row_count = ndev->metadata.mem.row_count;
>> -    meta->mem.row_start = ndev->metadata.mem.row_start;
>> -    meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
>> -    meta->mem.lock_count = ndev->metadata.mem.lock_count;
>> -    meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
>> -
>> -    meta->shim.row_count = ndev->metadata.shim.row_count;
>> -    meta->shim.row_start = ndev->metadata.shim.row_start;
>> -    meta->shim.dma_channel_count = 
>> ndev->metadata.shim.dma_channel_count;
>> -    meta->shim.lock_count = ndev->metadata.shim.lock_count;
>> -    meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
>> -
>> -    buf_sz = min(args->buffer_size, sizeof(*meta));
>> -    if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
>> -        ret = -EFAULT;
>> -
>> -    kfree(meta);
>> -    return ret;
>> -}
>> -
>>   static int aie2_get_aie_version(struct amdxdna_client *client,
>>                   struct amdxdna_drm_get_info *args)
>>   {
>> @@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct 
>> amdxdna_client *client,
>>   static int aie2_get_info(struct amdxdna_client *client, struct 
>> amdxdna_drm_get_info *args)
>>   {
>>       struct amdxdna_dev *xdna = client->xdna;
>> +    struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>>       int ret, idx;
>>         if (!drm_dev_enter(&xdna->ddev, &idx))
>> @@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client 
>> *client, struct amdxdna_drm_get_i
>>           ret = aie2_get_aie_status(client, args);
>>           break;
>>       case DRM_AMDXDNA_QUERY_AIE_METADATA:
>> -        ret = aie2_get_aie_metadata(client, args);
>> +        ret = amdxdna_get_metadata(&ndev->aie, client, args);
>>           break;
>>       case DRM_AMDXDNA_QUERY_AIE_VERSION:
>>           ret = aie2_get_aie_version(client, args);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index f12073175676..c884fed610f9 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
>>   struct amdxdna_hwctx;
>>   struct amdxdna_sched_job;
>>   -struct aie_version {
>> -    u16 major;
>> -    u16 minor;
>> -};
>> -
>> -struct aie_tile_metadata {
>> -    u16 row_count;
>> -    u16 row_start;
>> -    u16 dma_channel_count;
>> -    u16 lock_count;
>> -    u16 event_reg_count;
>> -};
>> -
>> -struct aie_metadata {
>> -    u32 size;
>> -    u16 cols;
>> -    u16 rows;
>> -    struct aie_version version;
>> -    struct aie_tile_metadata core;
>> -    struct aie_tile_metadata mem;
>> -    struct aie_tile_metadata shim;
>> -};
>> -
>>   enum rt_config_category {
>>       AIE2_RT_CFG_INIT,
>>       AIE2_RT_CFG_CLK_GATING,
>> @@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
>>         u32                total_col;
>>       struct aie_version        version;
>> -    struct aie_metadata        metadata;
>>       struct aie2_exec_msg_ops    *exec_msg_ops;
>>         /* power management and clock*/
>> diff --git a/drivers/accel/amdxdna/aie4_message.c 
>> b/drivers/accel/amdxdna/aie4_message.c
>> index d621dd32ac40..ac89a9a842b2 100644
>> --- a/drivers/accel/amdxdna/aie4_message.c
>> +++ b/drivers/accel/amdxdna/aie4_message.c
>> @@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
>>         return ret;
>>   }
>> +
>> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct 
>> aie_metadata *metadata)
>> +{
>> +    DECLARE_AIE_MSG(aie4_msg_aie4_tile_info, 
>> AIE4_MSG_OP_AIE_TILE_INFO);
>> +    int ret;
>> +
>> +    ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>> +    if (ret)
>> +        return ret;
>> +
>> +    metadata->size = resp.info.size;
>> +    metadata->cols = resp.info.cols;
>> +    metadata->rows = resp.info.rows;
>> +
>> +    metadata->version.major = resp.info.major;
>> +    metadata->version.minor = resp.info.minor;
>> +
>> +    metadata->core.row_count = resp.info.core_rows;
>> +    metadata->core.row_start = resp.info.core_row_start;
>> +    metadata->core.dma_channel_count = resp.info.core_dma_channels;
>> +    metadata->core.lock_count = resp.info.core_locks;
>> +    metadata->core.event_reg_count = resp.info.core_events;
>> +
>> +    metadata->mem.row_count = resp.info.mem_rows;
>> +    metadata->mem.row_start = resp.info.mem_row_start;
>> +    metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
>> +    metadata->mem.lock_count = resp.info.mem_locks;
>> +    metadata->mem.event_reg_count = resp.info.mem_events;
>> +
>> +    metadata->shim.row_count = resp.info.shim_rows;
>> +    metadata->shim.row_start = resp.info.shim_row_start;
>> +    metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
>> +    metadata->shim.lock_count = resp.info.shim_locks;
>> +    metadata->shim.event_reg_count = resp.info.shim_events;
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h 
>> b/drivers/accel/amdxdna/aie4_msg_priv.h
>> index 7faa01ca3436..69e220e40900 100644
>> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
>> @@ -18,6 +18,7 @@ enum aie4_msg_opcode {
>>       AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
>>       AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
>>       AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
>> +    AIE4_MSG_OP_AIE_TILE_INFO                    = 0x30006,
>>   };
>>     enum aie4_msg_status {
>> @@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
>>       enum aie4_msg_status status;
>>   } __packed;
>>   +struct aie4_tile_info {
>> +    __u32 size;
>> +    __u16 major;
>> +    __u16 minor;
>> +    __u16 cols;
>> +    __u16 rows;
>> +    __u16 core_rows;
>> +    __u16 mem_rows;
>> +    __u16 shim_rows;
>> +    __u16 core_row_start;
>> +    __u16 mem_row_start;
>> +    __u16 shim_row_start;
>> +    __u16 core_dma_channels;
>> +    __u16 mem_dma_channels;
>> +    __u16 shim_dma_channels;
>> +    __u16 core_locks;
>> +    __u16 mem_locks;
>> +    __u16 shim_locks;
>> +    __u16 core_events;
>> +    __u16 mem_events;
>> +    __u16 shim_events;
>> +    __u16 resvd;
>> +} __packed;
>> +
>> +struct aie4_msg_aie4_tile_info_req {
>> +    __u32 resvd;
>> +} __packed;
>> +
>> +struct aie4_msg_aie4_tile_info_resp {
>> +    enum aie4_msg_status status;
>> +    struct aie4_tile_info info;
>> +} __packed;
>> +
>>   #endif /* _AIE4_MSG_PRIV_H_ */
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c 
>> b/drivers/accel/amdxdna/aie4_pci.c
>> index 9ff34ce57fcb..8b5eff0e45c1 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -269,6 +269,11 @@ static void aie4_partition_fini(struct 
>> amdxdna_dev_hdl *ndev)
>>           XDNA_ERR(xdna, "partition fini failed: %d", ret);
>>   }
>>   +static int aie4_query(struct amdxdna_dev_hdl *ndev)
>> +{
>> +    return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
>> +}
>> +
>>   static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>>   {
>>       int ret;
>> @@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct 
>> amdxdna_dev_hdl *ndev)
>>       if (ret)
>>           return ret;
>>   +    ret = aie4_query(ndev);
>> +    if (ret)
>> +        goto mailbox_fini;
>> +
>>       ret = aie4_partition_init(ndev);
>>       if (ret)
>>           goto mailbox_fini;
>> @@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct 
>> amdxdna_client *client, struct vm_area_stru
>>       return ret;
>>   }
>>   +static int aie4_get_info(struct amdxdna_client *client, struct 
>> amdxdna_drm_get_info *args)
>> +{
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +    struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> +    int ret;
>> +
>> +    switch (args->param) {
>> +    case DRM_AMDXDNA_QUERY_AIE_METADATA:
>> +        ret = amdxdna_get_metadata(&ndev->aie, client, args);
>> +        break;
>> +    default:
>> +        XDNA_ERR(xdna, "Not supported request parameter %u", 
>> args->param);
>> +        ret = -EOPNOTSUPP;
>> +    }
>> +
>> +    XDNA_DBG(xdna, "Got param %d", args->param);
>> +
>> +    return ret;
>> +}
>> +
>>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>>   {
>>       int ret;
>> @@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>>       .hwctx_fini        = aie4_hwctx_fini,
>>       .mmap            = aie4_doorbell_mmap,
>>       .cmd_wait        = aie4_cmd_wait,
>> +    .get_aie_info        = aie4_get_info,
>>   };
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h 
>> b/drivers/accel/amdxdna/aie4_pci.h
>> index b69489acd53d..1886cffc62db 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
>>   };
>>     /* aie4_message.c */
>> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct 
>> aie_metadata *metadata);
>>   int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>     /* aie4_ctx.c */
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF
  2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
@ 2026-05-05 19:37   ` Mario Limonciello
  0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 19:37 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue



On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
> 
> Add basic device initialization support for AIE4 Virtual Functions (PCI
> device IDs 0x17F3 and 0x1B0C).
> 
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie4_pci.c        | 160 +++++++++++++-----------
>   drivers/accel/amdxdna/aie4_pci.h        |   3 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |   4 +
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   1 +
>   drivers/accel/amdxdna/npu3_regs.c       |  20 ++-
>   include/uapi/drm/amdxdna_accel.h        |   1 +
>   6 files changed, 113 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 87f80f804f91..a967e2db7ebd 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -196,8 +196,9 @@ static int aie4_mailbox_start(struct amdxdna_dev *xdna,
>   	return ret;
>   }
>   
> -static int aie4_mailbox_init(struct amdxdna_dev *xdna)
> +static int aie4_mailbox_init(struct amdxdna_dev_hdl *ndev)
>   {
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
>   	struct mailbox_info mbox_info;
>   	int ret;
>   
> @@ -208,13 +209,13 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
>   	return aie4_mailbox_start(xdna, &mbox_info);
>   }
>   
> -static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
> +static void aie4_fw_stop(struct amdxdna_dev_hdl *ndev)
>   {
>   	aie_psp_stop(ndev->aie.psp_hdl);
>   	aie_smu_fini(ndev->aie.smu_hdl);
>   }
>   
> -static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
> +static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
>   {
>   	int ret;
>   
> @@ -233,49 +234,49 @@ static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
>   	return ret;
>   }
>   
> -static int aie4_hw_start(struct amdxdna_dev *xdna)
> +static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>   {
> -	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>   	int ret;
>   
> -	ret = aie4_fw_load(ndev);
> +	ret = aie4_fw_start(ndev);
>   	if (ret)
>   		return ret;
>   
> -	ret = aie4_mailbox_init(xdna);
> +	ret = aie4_mailbox_init(ndev);
>   	if (ret)
> -		goto fw_unload;
> +		goto stop_fw;
>   
>   	return 0;
>   
> -fw_unload:
> -	aie4_fw_unload(ndev);
> +stop_fw:
> +	aie4_fw_stop(ndev);
>   
>   	return ret;
>   }
>   
> -static void aie4_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
> +static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
>   {
> -	int ret;
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
>   
> -	/* No paired resume needed, fw is stateless */
> -	ret = aie4_suspend_fw(ndev);
> -	if (ret)
> -		XDNA_ERR(ndev->aie.xdna, "suspend_fw failed, ret %d", ret);
> -	else
> -		XDNA_DBG(ndev->aie.xdna, "npu firmware suspended");
> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> +	aie4_suspend_fw(ndev);
> +	aie4_mailbox_fini(ndev);
> +	aie4_fw_stop(ndev);
>   }
>   
> -static void aie4_hw_stop(struct amdxdna_dev *xdna)
> +static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
>   {
> -	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +	return aie4_mailbox_init(ndev);
> +}
> +
> +static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
> +{
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
>   
>   	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>   
> -	aie4_mgmt_fw_fini(ndev);
>   	aie4_mailbox_fini(ndev);
> -
> -	aie4_fw_unload(ndev);
>   }
>   
>   static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
> @@ -365,15 +366,41 @@ static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
>   	return 0;
>   }
>   
> -static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
> +static int aie4_load_fw(struct amdxdna_dev_hdl *ndev,
> +			void __iomem *tbl[PCI_NUM_RESOURCES])
> +{
> +	const struct firmware *npufw, *certfw;
> +	int ret;
> +
> +	if (!ndev->priv->npufw_path && !ndev->priv->certfw_path)
> +		return 0;
> +
> +	ret = aie4_request_firmware(ndev, &npufw, &certfw);
> +	if (ret)
> +		return ret;
> +
> +	ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
> +	aie4_release_firmware(ndev, npufw, certfw);
> +
> +	return ret;
> +}
> +
> +static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
>   {
> -	struct amdxdna_dev *xdna = ndev->aie.xdna;
>   	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	struct amdxdna_dev_hdl *ndev;
>   	void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
> -	const struct firmware *npufw, *certfw;
>   	unsigned long bars = 0;
>   	int ret, i;
>   
> +	ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
> +	if (!ndev)
> +		return -ENOMEM;
> +
> +	ndev->priv = xdna->dev_info->dev_priv;
> +	ndev->aie.xdna = xdna;
> +	xdna->dev_handle = ndev;
> +
>   	/* Enable managed PCI device */
>   	ret = pcim_enable_device(pdev);
>   	if (ret) {
> @@ -409,75 +436,60 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>   
>   	pci_set_master(pdev);
>   
> -	ret = aie4_request_firmware(ndev, &npufw, &certfw);
> -	if (ret)
> -		goto clear_master;
> -
> -	ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
> -	aie4_release_firmware(ndev, npufw, certfw);
> +	ret = aie4_load_fw(ndev, tbl);
>   	if (ret)
> -		goto clear_master;
> +		return ret;
>   
>   	ret = aie4_irq_init(xdna);
>   	if (ret)
> -		goto clear_master;
> +		return ret;
>   
> -	ret = aie4_hw_start(xdna);
> -	if (ret)
> -		goto clear_master;
> +	amdxdna_vbnv_init(xdna);
> +	XDNA_DBG(xdna, "init finished");
>   
>   	return 0;
> -
> -clear_master:
> -	pci_clear_master(pdev);
> -
> -	return ret;
>   }
>   
> -static void aie4_pcidev_fini(struct amdxdna_dev_hdl *ndev)
> +static int aie4_pf_init(struct amdxdna_dev *xdna)
>   {
> -	struct amdxdna_dev *xdna = ndev->aie.xdna;
> -	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> -
> -	aie4_hw_stop(xdna);
> -
> -	pci_clear_master(pdev);
> -}
> +	int ret;
>   
> -static void aie4_fini(struct amdxdna_dev *xdna)
> -{
> -	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +	ret = aie4m_pcidev_init(xdna);
> +	if (ret)
> +		return ret;
>   
> -	aie4_sriov_stop(ndev);
> -	aie4_pcidev_fini(ndev);
> +	return aie4_pf_hw_start(xdna->dev_handle);
>   }
>   
> -static int aie4_init(struct amdxdna_dev *xdna)
> +static int aie4_vf_init(struct amdxdna_dev *xdna)
>   {
> -	struct amdxdna_dev_hdl *ndev;
>   	int ret;
>   
> -	ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
> -	if (!ndev)
> -		return -ENOMEM;
> +	ret = aie4m_pcidev_init(xdna);
> +	if (ret)
> +		return ret;
>   
> -	ndev->priv = xdna->dev_info->dev_priv;
> -	ndev->aie.xdna = xdna;
> -	xdna->dev_handle = ndev;
> +	return aie4_vf_hw_start(xdna->dev_handle);
> +}
>   
> -	ret = aie4_pcidev_init(ndev);
> -	if (ret) {
> -		XDNA_ERR(xdna, "Setup PCI device failed, ret %d", ret);
> -		return ret;
> -	}
> +static void aie4_pf_fini(struct amdxdna_dev *xdna)
> +{
> +	aie4_sriov_stop(xdna->dev_handle);
> +	aie4_pf_hw_stop(xdna->dev_handle);
> +}
>   
> -	amdxdna_vbnv_init(xdna);
> -	XDNA_DBG(xdna, "aie4 init finished");
> -	return 0;
> +static void aie4_vf_fini(struct amdxdna_dev *xdna)
> +{
> +	aie4_vf_hw_stop(xdna->dev_handle);
>   }
>   
> -const struct amdxdna_dev_ops aie4_ops = {
> -	.init			= aie4_init,
> -	.fini			= aie4_fini,
> +const struct amdxdna_dev_ops aie4_pf_ops = {
> +	.init			= aie4_pf_init,
> +	.fini			= aie4_pf_fini,
>   	.sriov_configure        = aie4_sriov_configure,
>   };
> +
> +const struct amdxdna_dev_ops aie4_vf_ops = {
> +	.init			= aie4_vf_init,
> +	.fini			= aie4_vf_fini,
> +};
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index aa1495c3370b..cbf3424a4341 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -48,6 +48,7 @@ static inline int aie4_sriov_stop(struct amdxdna_dev_hdl *ndev)
>   }
>   #endif
>   
> -extern const struct amdxdna_dev_ops aie4_ops;
> +extern const struct amdxdna_dev_ops aie4_pf_ops;
> +extern const struct amdxdna_dev_ops aie4_vf_ops;
>   
>   #endif /* _AIE4_PCI_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 1b08a08343cf..39ad081ac082 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -53,7 +53,9 @@ static const struct pci_device_id pci_ids[] = {
>   	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
>   	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
>   	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f2) },
> +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f3) },
>   	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0B) },
> +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0C) },
>   	{0}
>   };
>   
> @@ -65,7 +67,9 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
>   	{ 0x17f0, 0x11, &dev_npu5_info },
>   	{ 0x17f0, 0x20, &dev_npu6_info },
>   	{ 0x17f2, 0x10, &dev_npu3_pf_info },
> +	{ 0x17f3, 0x10, &dev_npu3_vf_info },
>   	{ 0x1B0B, 0x10, &dev_npu3_pf_info },
> +	{ 0x1B0C, 0x10, &dev_npu3_vf_info },
>   	{0}
>   };
>   
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index b1548cf16f59..caed11c09e55 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -167,6 +167,7 @@ struct amdxdna_client {
>   /* Add device info below */
>   extern const struct amdxdna_dev_info dev_npu1_info;
>   extern const struct amdxdna_dev_info dev_npu3_pf_info;
> +extern const struct amdxdna_dev_info dev_npu3_vf_info;
>   extern const struct amdxdna_dev_info dev_npu4_info;
>   extern const struct amdxdna_dev_info dev_npu5_info;
>   extern const struct amdxdna_dev_info dev_npu6_info;
> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
> index acece0faddf2..6d5da779232b 100644
> --- a/drivers/accel/amdxdna/npu3_regs.c
> +++ b/drivers/accel/amdxdna/npu3_regs.c
> @@ -64,6 +64,14 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>   	},
>   };
>   
> +static const struct amdxdna_dev_priv npu3_dev_vf_priv = {
> +	/* vf device does not load firmware */
> +	.mbox_bar		= NPU3_MBOX_BAR,
> +	.mbox_rbuf_bar		= NPU3_MBOX_BUFFER_BAR,
> +	.mbox_info_off		= NPU3_MBOX_INFO_OFF,
> +	/* vf device does not have smu and psp */
> +};
> +
>   const struct amdxdna_dev_info dev_npu3_pf_info = {
>   	.mbox_bar		= NPU3_MBOX_BAR,
>   	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
> @@ -73,5 +81,15 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>   	.device_type		= AMDXDNA_DEV_TYPE_PF,
>   	.dev_priv		= &npu3_dev_priv,
>   	.fw_feature_tbl		= npu3_fw_feature_table,
> -	.ops			= &aie4_ops,
> +	.ops			= &aie4_pf_ops,
> +};
> +
> +const struct amdxdna_dev_info dev_npu3_vf_info = {
> +	.mbox_bar		= NPU3_MBOX_BAR,
> +	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
> +	.default_vbnv		= "RyzenAI-npu3-vf",
> +	.device_type		= AMDXDNA_DEV_TYPE_UMQ,
> +	.dev_priv		= &npu3_dev_vf_priv,
> +	.fw_feature_tbl		= npu3_fw_feature_table,
> +	.ops			= &aie4_vf_ops,
>   };
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 0b11e8e3ea5d..34212feee15c 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -30,6 +30,7 @@ extern "C" {
>   enum amdxdna_device_type {
>   	AMDXDNA_DEV_TYPE_UNKNOWN = -1,
>   	AMDXDNA_DEV_TYPE_KMQ = 0,
> +	AMDXDNA_DEV_TYPE_UMQ = 1,
>   	AMDXDNA_DEV_TYPE_PF = 2,
>   };
>   


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition
  2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
@ 2026-05-05 19:53   ` Mario Limonciello
  0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 19:53 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue



On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
> 
> Send partition creation command to firmware during VF initialization.
> 
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>   drivers/accel/amdxdna/aie4_msg_priv.h | 21 +++++++++++
>   drivers/accel/amdxdna/aie4_pci.c      | 52 ++++++++++++++++++++++++++-
>   drivers/accel/amdxdna/aie4_pci.h      |  1 +
>   3 files changed, 73 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 88463cc3a98a..cada53257921 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -13,6 +13,9 @@ enum aie4_msg_opcode {
>   
>   	AIE4_MSG_OP_CREATE_VFS                       = 0x20001,
>   	AIE4_MSG_OP_DESTROY_VFS                      = 0x20002,
> +
> +	AIE4_MSG_OP_CREATE_PARTITION                 = 0x30001,
> +	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
>   };
>   
>   enum aie4_msg_status {
> @@ -46,4 +49,22 @@ struct aie4_msg_destroy_vfs_resp {
>   	enum aie4_msg_status status;
>   } __packed;
>   
> +struct aie4_msg_create_partition_req {
> +	__u32 partition_col_start;
> +	__u32 partition_col_count;
> +} __packed;
> +
> +struct aie4_msg_create_partition_resp {
> +	enum aie4_msg_status status;
> +	__u32 partition_id;
> +} __packed;
> +
> +struct aie4_msg_destroy_partition_req {
> +	__u32 partition_id;
> +} __packed;
> +
> +struct aie4_msg_destroy_partition_resp {
> +	enum aie4_msg_status status;
> +} __packed;
> +
>   #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index a967e2db7ebd..13f5d45e388d 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -9,11 +9,16 @@
>   #include <linux/firmware.h>
>   #include <linux/sizes.h>
>   
> +#include "aie.h"
> +#include "aie4_msg_priv.h"
>   #include "aie4_pci.h"
> +#include "amdxdna_mailbox.h"
> +#include "amdxdna_mailbox_helper.h"
>   #include "amdxdna_pci_drv.h"
>   
>   #define NO_IOHUB		0
>   #define PSP_NOTIFY_INTR		0xD007BE11
> +#define AIE4_TOTAL_COLUMN	3
>   
>   /*
>    * The management mailbox channel is allocated by firmware.
> @@ -234,6 +239,36 @@ static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
>   	return ret;
>   }
>   
> +static int aie4_partition_init(struct amdxdna_dev_hdl *ndev)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_create_partition, AIE4_MSG_OP_CREATE_PARTITION);
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	int ret;
> +
> +	req.partition_col_start = 0;
> +	req.partition_col_count = AIE4_TOTAL_COLUMN;
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret) {
> +		XDNA_ERR(xdna, "partition init failed: %d", ret);
> +		return ret;
> +	}
> +
> +	ndev->partition_id = resp.partition_id;
> +	return 0;
> +}
> +
> +static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_destroy_partition, AIE4_MSG_OP_DESTROY_PARTITION);
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	int ret;
> +
> +	req.partition_id = ndev->partition_id;
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret)
> +		XDNA_ERR(xdna, "partition fini failed: %d", ret);
> +}
> +
>   static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>   {
>   	int ret;
> @@ -267,7 +302,21 @@ static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
>   
>   static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
>   {
> -	return aie4_mailbox_init(ndev);
> +	int ret;
> +
> +	ret = aie4_mailbox_init(ndev);
> +	if (ret)
> +		return ret;
> +
> +	ret = aie4_partition_init(ndev);
> +	if (ret)
> +		goto mailbox_fini;
> +
> +	return 0;
> +
> +mailbox_fini:
> +	aie4_mailbox_fini(ndev);
> +	return ret;
>   }
>   
>   static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
> @@ -276,6 +325,7 @@ static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
>   
>   	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>   
> +	aie4_partition_fini(ndev);
>   	aie4_mailbox_fini(ndev);
>   }
>   
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index cbf3424a4341..620fb5bd23e4 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -31,6 +31,7 @@ struct amdxdna_dev_hdl {
>   	void			__iomem *rbuf_base;
>   
>   	struct mailbox			*mbox;
> +	u32				partition_id;
>   };
>   
>   /* aie4_message.c */


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy
  2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
@ 2026-05-05 20:28   ` Mario Limonciello
  0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:28 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue



On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
> 
> Implement hardware context creation and destruction for AIE4 VF devices.
> 
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/Makefile          |   1 +
>   drivers/accel/amdxdna/aie4_ctx.c        | 258 ++++++++++++++++++++++++
>   drivers/accel/amdxdna/aie4_host_queue.h |  22 ++
>   drivers/accel/amdxdna/aie4_msg_priv.h   |  29 +++
>   drivers/accel/amdxdna/aie4_pci.c        |   5 +
>   drivers/accel/amdxdna/aie4_pci.h        |  24 +++
>   drivers/accel/amdxdna/amdxdna_ctx.c     |   6 +
>   drivers/accel/amdxdna/amdxdna_ctx.h     |   3 +
>   include/uapi/drm/amdxdna_accel.h        |   1 +
>   9 files changed, 349 insertions(+)
>   create mode 100644 drivers/accel/amdxdna/aie4_ctx.c
>   create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h
> 
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index d7720c8c8a98..05cce0a38692 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -10,6 +10,7 @@ amdxdna-y := \
>   	aie2_pci.o \
>   	aie2_pm.o \
>   	aie2_solver.o \
> +	aie4_ctx.o \
>   	aie4_message.o \
>   	aie4_pci.o \
>   	amdxdna_cbuf.o \
> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
> new file mode 100644
> index 000000000000..84ac706d0ffb
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie4_ctx.c
> @@ -0,0 +1,258 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_gem.h>
> +#include <drm/drm_gem_shmem_helper.h>
> +#include <drm/drm_print.h>
> +#include <drm/gpu_scheduler.h>
> +#include <linux/types.h>
> +
> +#include "aie.h"
> +#include "aie4_host_queue.h"
> +#include "aie4_msg_priv.h"
> +#include "aie4_pci.h"
> +#include "amdxdna_ctx.h"
> +#include "amdxdna_gem.h"
> +#include "amdxdna_mailbox.h"
> +#include "amdxdna_mailbox_helper.h"
> +#include "amdxdna_pci_drv.h"
> +
> +static irqreturn_t cert_comp_isr(int irq, void *p)
> +{
> +	struct cert_comp *cert_comp = p;
> +
> +	wake_up_all(&cert_comp->waitq);
> +	return IRQ_HANDLED;
> +}
> +
> +static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx)
> +{
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	struct cert_comp *cert_comp;
> +	int ret;
> +
> +	guard(mutex)(&ndev->cert_comp_lock);
> +
> +	cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx);
> +	if (cert_comp) {
> +		kref_get(&cert_comp->kref);
> +		return cert_comp;
> +	}
> +
> +	cert_comp = kzalloc_obj(*cert_comp);
> +	if (!cert_comp)
> +		return NULL;
> +
> +	cert_comp->ndev = ndev;
> +	cert_comp->msix_idx = msix_idx;
> +	init_waitqueue_head(&cert_comp->waitq);
> +	kref_init(&cert_comp->kref);
> +
> +	ret = pci_irq_vector(pdev, cert_comp->msix_idx);
> +	if (ret < 0) {
> +		XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret);
> +		goto free_cert_comp;
> +	}
> +	cert_comp->irq = ret;
> +
> +	ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp);
> +	if (ret) {
> +		XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret);
> +		goto free_cert_comp;
> +	}
> +
> +	ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL));
> +	if (ret) {
> +		XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret);
> +		goto free_irq;
> +	}
> +
> +	return cert_comp;
> +
> +free_irq:
> +	free_irq(cert_comp->irq, cert_comp);
> +free_cert_comp:
> +	kfree(cert_comp);
> +	return NULL;
> +}
> +
> +static void cert_comp_release(struct kref *kref)
> +{
> +	struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref);
> +	struct amdxdna_dev_hdl *ndev = cert_comp->ndev;
> +
> +	drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock));
> +
> +	xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx);
> +	free_irq(cert_comp->irq, cert_comp);
> +	kfree(cert_comp);
> +}
> +
> +static void aie4_put_cert_comp(struct cert_comp *cert_comp)
> +{
> +	struct amdxdna_dev_hdl *ndev;
> +
> +	ndev = cert_comp->ndev;
> +	guard(mutex)(&ndev->cert_comp_lock);
> +	kref_put(&cert_comp->kref, cert_comp_release);
> +}
> +
> +static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT);
> +
> +	req.hw_context_id = hw_context_id;
> +	return aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +}
> +
> +static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT);
> +	struct amdxdna_client *client = hwctx->client;
> +	struct amdxdna_hwctx_priv *priv = hwctx->priv;
> +	struct amdxdna_dev *xdna = hwctx->client->xdna;
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +	int ret;
> +
> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> +	if (!ndev->partition_id || !hwctx->num_tiles) {
> +		XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d",
> +			 ndev->partition_id, hwctx->num_tiles);
> +		return -EINVAL;
> +	}
> +
> +	req.partition_id = ndev->partition_id;
> +	req.request_num_tiles = hwctx->num_tiles;
> +	req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) |
> +		FIELD_PREP(AIE4_MSG_PASID_VLD, 1);
> +	req.priority_band = hwctx->qos.priority;
> +
> +	req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
> +	req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
> +
> +	XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]",
> +		 req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low);
> +
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret) {
> +		XDNA_ERR(xdna, "create ctx failed: %d", ret);
> +		return ret;
> +	}
> +
> +	XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d",
> +		 resp.job_complete_msix_idx,
> +		 resp.hw_context_id,
> +		 resp.doorbell_offset);
> +
> +	/* setup interrupt completion per msix index */
> +	priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx);
> +	if (!priv->cert_comp) {
> +		aie4_msg_destroy_context(ndev, resp.hw_context_id);
> +		return -EINVAL;
> +	}
> +
> +	priv->hw_ctx_id = resp.hw_context_id;
> +	hwctx->doorbell_offset = resp.doorbell_offset;
> +
> +	return 0;
> +}
> +
> +static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx)
> +{
> +	struct amdxdna_client *client = hwctx->client;
> +	struct amdxdna_hwctx_priv *priv = hwctx->priv;
> +	struct amdxdna_dev *xdna = client->xdna;
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +
> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> +	aie4_msg_destroy_context(ndev, priv->hw_ctx_id);
> +	aie4_put_cert_comp(priv->cert_comp);
> +}
> +
> +static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx)
> +{
> +	if (hwctx->priv && hwctx->priv->umq_bo)
> +		amdxdna_gem_put_obj(hwctx->priv->umq_bo);
> +}
> +
> +static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx)
> +{
> +	struct amdxdna_hwctx_priv *priv = hwctx->priv;
> +	struct amdxdna_dev *xdna = hwctx->client->xdna;
> +	struct amdxdna_gem_obj *umq_bo;
> +	struct host_queue_header *qhdr;
> +	int ret;
> +
> +	umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE);
> +	if (!umq_bo) {
> +		XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl);
> +		return -ENOENT;
> +	}
> +	if (umq_bo->mem.size < sizeof(*qhdr)) {
> +		XDNA_ERR(xdna, "umq_bo size is too small");
> +		ret = -EINVAL;
> +		goto put_umq_bo;
> +	}
> +
> +	/* get kva address for host queue read index and write index */
> +	qhdr = amdxdna_gem_vmap(umq_bo);
> +	if (!qhdr) {
> +		ret = -ENOMEM;
> +		goto put_umq_bo;
> +	}
> +
> +	priv->umq_bo = umq_bo;
> +	priv->umq_read_index = &qhdr->read_index;
> +	priv->umq_write_index = &qhdr->write_index;
> +
> +	return 0;
> +
> +put_umq_bo:
> +	amdxdna_gem_put_obj(umq_bo);
> +	return ret;
> +}
> +
> +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx)
> +{
> +	struct amdxdna_client *client = hwctx->client;
> +	struct amdxdna_dev *xdna = client->xdna;
> +	struct amdxdna_hwctx_priv *priv;
> +	int ret;
> +
> +	priv = kzalloc_obj(*priv);
> +	if (!priv)
> +		return -ENOMEM;
> +	hwctx->priv = priv;
> +
> +	ret = aie4_hwctx_umq_init(hwctx);
> +	if (ret)
> +		goto free_priv;
> +
> +	ret = aie4_hwctx_create(hwctx);
> +	if (ret)
> +		goto umq_fini;
> +
> +	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
> +	return 0;
> +
> +umq_fini:
> +	aie4_hwctx_umq_fini(hwctx);
> +free_priv:
> +	kfree(priv);
> +	hwctx->priv = NULL;
> +	return ret;
> +}
> +
> +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
> +{
> +	aie4_hwctx_destroy(hwctx);
> +	aie4_hwctx_umq_fini(hwctx);
> +	kfree(hwctx->priv);
> +}
> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
> new file mode 100644
> index 000000000000..eb6a38dfb53e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AIE4_HOST_QUEUE_H_
> +#define _AIE4_HOST_QUEUE_H_
> +
> +#include <linux/types.h>
> +
> +struct host_queue_header {
> +	__u64 read_index;
> +	struct {
> +		__u16 major;
> +		__u16 minor;
> +	} version;
> +	__u32 capacity; /* Queue capacity, must be power of two. */
> +	__u64 write_index;
> +	__u64 data_address; /* The xdna dev addr for payload. */
> +};
> +
> +#endif /* _AIE4_HOST_QUEUE_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index cada53257921..7faa01ca3436 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -16,6 +16,8 @@ enum aie4_msg_opcode {
>   
>   	AIE4_MSG_OP_CREATE_PARTITION                 = 0x30001,
>   	AIE4_MSG_OP_DESTROY_PARTITION                = 0x30002,
> +	AIE4_MSG_OP_CREATE_HW_CONTEXT                = 0x30003,
> +	AIE4_MSG_OP_DESTROY_HW_CONTEXT               = 0x30004,
>   };
>   
>   enum aie4_msg_status {
> @@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp {
>   	enum aie4_msg_status status;
>   } __packed;
>   
> +struct aie4_msg_create_hw_context_req {
> +	__u32 partition_id;
> +	__u32 request_num_tiles;
> +	__u32 hsa_addr_high;
> +	__u32 hsa_addr_low;
> +#define AIE4_MSG_PASID GENMASK(19, 0)
> +#define AIE4_MSG_PASID_VLD GENMASK(31, 31)
> +	__u32 pasid;
> +	__u32 priority_band;
> +} __packed;
> +
> +struct aie4_msg_create_hw_context_resp {
> +	enum aie4_msg_status status;
> +	__u32 hw_context_id;
> +	__u32 doorbell_offset;
> +	__u32 job_complete_msix_idx;
> +} __packed;
> +
> +struct aie4_msg_destroy_hw_context_req {
> +	__u32 hw_context_id;
> +	__u32 resvd1;
> +} __packed;
> +
> +struct aie4_msg_destroy_hw_context_resp {
> +	enum aie4_msg_status status;
> +} __packed;
> +
>   #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 13f5d45e388d..3be9066b7178 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
>   	ndev->aie.xdna = xdna;
>   	xdna->dev_handle = ndev;
>   
> +	xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC);
> +	mutex_init(&ndev->cert_comp_lock);
> +
>   	/* Enable managed PCI device */
>   	ret = pcim_enable_device(pdev);
>   	if (ret) {
> @@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = {
>   const struct amdxdna_dev_ops aie4_vf_ops = {
>   	.init			= aie4_vf_init,
>   	.fini			= aie4_vf_fini,
> +	.hwctx_init		= aie4_hwctx_init,
> +	.hwctx_fini		= aie4_hwctx_fini,
>   };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 620fb5bd23e4..6103007e6d2f 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -13,6 +13,23 @@
>   #include "aie.h"
>   #include "amdxdna_mailbox.h"
>   
> +struct cert_comp {
> +	struct amdxdna_dev_hdl          *ndev;
> +	u32                             msix_idx;
> +	int                             irq;
> +	struct kref                     kref;
> +	wait_queue_head_t               waitq;
> +};
> +
> +struct amdxdna_hwctx_priv {
> +	struct amdxdna_gem_obj          *umq_bo;
> +	u64                             *umq_read_index;
> +	u64                             *umq_write_index;
> +
> +	struct cert_comp                *cert_comp;
> +	u32                             hw_ctx_id;
> +};
> +
>   struct amdxdna_dev_priv {
>   	const char              *npufw_path;
>   	const char              *certfw_path;
> @@ -32,11 +49,18 @@ struct amdxdna_dev_hdl {
>   
>   	struct mailbox			*mbox;
>   	u32				partition_id;
> +
> +	struct xarray                   cert_comp_xa; /* device level indexed by msix id */
> +	struct mutex                    cert_comp_lock; /* protects cert_comp operations*/
>   };
>   
>   /* aie4_message.c */
>   int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>   
> +/* aie4_ctx.c */
> +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
> +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
> +
>   /* aie4_sriov.c */
>   #if IS_ENABLED(CONFIG_PCI_IOV)
>   int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index 2c2c21992c87..b5ad60d4b734 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
>   	if (args->ext || args->ext_flags)
>   		return -EINVAL;
>   
> +	if (!xdna->dev_info->ops->hwctx_init)
> +		return -EOPNOTSUPP;
> +
>   	hwctx = kzalloc_obj(*hwctx);
>   	if (!hwctx)
>   		return -ENOMEM;
> @@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
>   	hwctx->client = client;
>   	hwctx->fw_ctx_id = -1;
>   	hwctx->num_tiles = args->num_tiles;
> +	hwctx->umq_bo_hdl = args->umq_bo;
> +	hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET;
>   	hwctx->mem_size = args->mem_size;
>   	hwctx->max_opc = args->max_opc;
>   
> @@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
>   
>   	args->handle = hwctx->id;
>   	args->syncobj_handle = hwctx->syncobj_hdl;
> +	args->umq_doorbell = hwctx->doorbell_offset;
>   
>   	atomic64_set(&hwctx->job_submit_cnt, 0);
>   	atomic64_set(&hwctx->job_free_cnt, 0);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index 355798687376..c5622718b4d5 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv;
>   
>   enum ert_cmd_opcode {
>   	ERT_START_CU = 0,
> +	ERT_START_DPU = 18,
>   	ERT_CMD_CHAIN = 19,
>   	ERT_START_NPU = 20,
>   	ERT_START_NPU_PREEMPT = 21,
> @@ -105,6 +106,8 @@ struct amdxdna_hwctx {
>   	u32				*col_list;
>   	u32				start_col;
>   	u32				num_col;
> +	u32				umq_bo_hdl;
> +	u32				doorbell_offset;
>   	u32				num_unused_col;
>   
>   	struct amdxdna_qos_info		     qos;
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 34212feee15c..ad9b33dd7b13 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -18,6 +18,7 @@ extern "C" {
>   #define AMDXDNA_INVALID_CTX_HANDLE	0
>   #define AMDXDNA_INVALID_BO_HANDLE	0
>   #define AMDXDNA_INVALID_FENCE_HANDLE	0
> +#define AMDXDNA_INVALID_DOORBELL_OFFSET	(~0U)
>   
>   /*
>    * Define hardware context priority


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
  2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
@ 2026-05-05 20:31   ` Mario Limonciello
  2026-05-06 16:11     ` Lizhi Hou
  0 siblings, 1 reply; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:31 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue



On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
> 
> Expose the command doorbell register to userspace on a per-hardware
> context basis, enabling applications to notify the firmware of pending
> commands via doorbell writes.
> 
> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
> completion of individual commands.
> 
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Minor suggestion below.

> ---
>   drivers/accel/amdxdna/aie4_ctx.c        | 75 +++++++++++++++++++++++++
>   drivers/accel/amdxdna/aie4_host_queue.h |  2 +
>   drivers/accel/amdxdna/aie4_pci.c        | 34 +++++++++++
>   drivers/accel/amdxdna/aie4_pci.h        |  3 +
>   drivers/accel/amdxdna/amdxdna_ctx.c     | 34 +++++++++++
>   drivers/accel/amdxdna/amdxdna_ctx.h     |  4 +-
>   drivers/accel/amdxdna/amdxdna_gem.c     |  5 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  3 +
>   drivers/accel/amdxdna/npu3_regs.c       |  5 ++
>   include/uapi/drm/amdxdna_accel.h        | 22 +++++++-
>   11 files changed, 198 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
> index 84ac706d0ffb..8408b0d2696f 100644
> --- a/drivers/accel/amdxdna/aie4_ctx.c
> +++ b/drivers/accel/amdxdna/aie4_ctx.c
> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
>   	aie4_hwctx_umq_fini(hwctx);
>   	kfree(hwctx->priv);
>   }
> +
> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
> +{
> +	return (write >= read) && ((write - read) <= capacity);
> +}
> +
> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
> +{
> +	u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
> +	u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
> +	struct amdxdna_dev *xdna = hwctx->client->xdna;
> +
> +	/*
> +	 * CERT cannot update read index as uint64 atomically. Driver may read
> +	 * half-updated read index when it has bits in high 32bit. In case read
> +	 * index is not valid, wait for some time and retry once. It should
> +	 * allow CERT to complete the read index update.
> +	 */
> +	if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
> +		XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
> +		usleep_range(100, 200);
> +		ri = READ_ONCE(*hwctx->priv->umq_read_index);
> +		if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
> +			XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
> +			ri = 0;
> +		}
> +	}
> +
> +	return ri;
> +}
> +
> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
> +{
> +	u64 read_idx = get_read_index(hwctx);
> +
> +	return read_idx > seq;
> +}
> +
> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
> +{
> +	unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
> +	struct amdxdna_hwctx_priv *priv = hwctx->priv;
> +	struct cert_comp *cert_comp = priv->cert_comp;
> +	long ret;

Not sure I see the point in making ret a long. 
wait_event_interruptible_timeout() retun 0 or 1.

bool val;
val = wait_event_interruptible_timeout()
return val ? 0 : -ETIME;


> +
> +	if (timeout)
> +		wait_jifs = msecs_to_jiffies(timeout);
> +
> +	ret = wait_event_interruptible_timeout(cert_comp->waitq,
> +					       (check_cmd_done(hwctx, seq)),
> +					       wait_jifs);
> +
> +	if (!ret)
> +		ret = -ETIME;
> +
> +	return ret <= 0 ? ret : 0;
> +}
> +
> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
> +{
> +	struct amdxdna_hwctx *hwctx;
> +	unsigned long hwctx_id;
> +	int idx;
> +
> +	idx = srcu_read_lock(&client->hwctx_srcu);
> +	amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
> +		if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
> +			srcu_read_unlock(&client->hwctx_srcu, idx);
> +			return 1;
> +		}
> +	}
> +	srcu_read_unlock(&client->hwctx_srcu, idx);
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
> index eb6a38dfb53e..1b33eda3f727 100644
> --- a/drivers/accel/amdxdna/aie4_host_queue.h
> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
> @@ -8,6 +8,8 @@
>   
>   #include <linux/types.h>
>   
> +#define CTX_MAX_CMDS                    32
> +
>   struct host_queue_header {
>   	__u64 read_index;
>   	struct {
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 3be9066b7178..9ff34ce57fcb 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
>   	return 0;
>   }
>   
> +static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
> +{
> +	struct amdxdna_dev *xdna = client->xdna;
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
> +	phys_addr_t res_start;
> +	unsigned long pfn;
> +	int ret;
> +
> +	if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
> +		XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff);
> +		return -EINVAL;
> +	}
> +
> +	if (vma_pages(vma) != 1) {
> +		XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
> +		return -EINVAL;
> +	}
> +
> +	res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
> +	pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +	vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
> +	ret = io_remap_pfn_range(vma, vma->vm_start,
> +				 pfn,
> +				 PAGE_SIZE,
> +				 vma->vm_page_prot);
> +
> +	XDNA_DBG(xdna, "doorbell ret %d", ret);
> +	return ret;
> +}
> +
>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>   {
>   	int ret;
> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>   	.fini			= aie4_vf_fini,
>   	.hwctx_init		= aie4_hwctx_init,
>   	.hwctx_fini		= aie4_hwctx_fini,
> +	.mmap			= aie4_doorbell_mmap,
> +	.cmd_wait		= aie4_cmd_wait,
>   };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 6103007e6d2f..b69489acd53d 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
>   	u32			mbox_bar;
>   	u32			mbox_rbuf_bar;
>   	u64			mbox_info_off;
> +	u32			doorbell_off;
>   
>   	struct aie_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
>   	struct aie_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>   /* aie4_ctx.c */
>   int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
>   void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
>   
>   /* aie4_sriov.c */
>   #if IS_ENABLED(CONFIG_PCI_IOV)
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index b5ad60d4b734..b79229a63af3 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
>   	XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
>   	return -EINVAL;
>   }
> +
> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> +	struct amdxdna_client *client = filp->driver_priv;
> +	struct amdxdna_dev *xdna = to_xdna_dev(dev);
> +	struct amdxdna_drm_wait_cmd *args = data;
> +	struct amdxdna_hwctx *hwctx;
> +	int ret, idx;
> +
> +	XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
> +		 client->pid, args->hwctx, args->timeout, args->seq);
> +
> +	if (!xdna->dev_info->ops->cmd_wait)
> +		return -EOPNOTSUPP;
> +
> +	idx = srcu_read_lock(&client->hwctx_srcu);
> +	hwctx = xa_load(&client->hwctx_xa, args->hwctx);
> +	if (!hwctx) {
> +		XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
> +		ret = -EINVAL;
> +		goto unlock_ctx_srcu;
> +	}
> +
> +	ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout);
> +
> +	XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
> +		 client->pid, args->hwctx, args->seq, ret);
> +
> +	trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
> +
> +unlock_ctx_srcu:
> +	srcu_read_unlock(&client->hwctx_srcu, idx);
> +	return ret;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index c5622718b4d5..6e3c6371a088 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
>   		       u32 *arg_bo_hdls, u32 arg_bo_cnt,
>   		       u32 hwctx_hdl, u64 *seq);
>   
> -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
> -		     u64 seq, u32 timeout);
> -
>   int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   
>   #endif /* _AMDXDNA_CTX_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index ebfc472aa9e7..319d2064fafa 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
>   	mmu_interval_set_seq(&mapp->notifier, cur_seq);
>   	up_write(&xdna->notifier_lock);
>   
> -	xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
> +	if (xdna->dev_info->ops->hmm_invalidate)
> +		xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>   
>   	if (range->event == MMU_NOTIFY_UNMAP) {
>   		down_write(&xdna->notifier_lock);
> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
>   	u32 nr_pages;
>   	int ret;
>   
> -	if (!xdna->dev_info->ops->hmm_invalidate)
> +	if (!amdxdna_pasid_on(abo->client))
>   		return 0;
>   
>   	mapp = kzalloc_obj(*mapp);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 39ad081ac082..c0d00db25cde 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
>   	return ret;
>   }
>   
> +static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct drm_file *drm_filp = filp->private_data;
> +	struct amdxdna_client *client = drm_filp->driver_priv;
> +	struct amdxdna_dev *xdna = client->xdna;
> +
> +	if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
> +		return drm_gem_mmap(filp, vma);
> +
> +	if (!xdna->dev_info->ops->mmap)
> +		return -EOPNOTSUPP;
> +
> +	return xdna->dev_info->ops->mmap(client, vma);
> +}
> +
>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>   	/* Context */
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
>   	/* Execution */
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
> +	DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
>   	/* AIE hardware */
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
>   	.poll		= drm_poll,
>   	.read		= drm_read,
>   	.llseek		= noop_llseek,
> -	.mmap		= drm_gem_mmap,
> +	.mmap		= amdxdna_drm_gem_mmap,
>   	.show_fdinfo	= drm_show_fdinfo,
>   	.fop_flags	= FOP_UNSIGNED_OFFSET,
>   };
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index caed11c09e55..471b72299aee 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
>   	int (*resume)(struct amdxdna_dev *xdna);
>   	int (*suspend)(struct amdxdna_dev *xdna);
>   	int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
> +	int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
>   	int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>   	void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>   	int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
>   	int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
>   	void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
>   	int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
> +	int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>   	int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
>   	int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
>   	int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
>   	int				sram_bar;
>   	int				psp_bar;
>   	int				smu_bar;
> +	int				doorbell_bar;
>   	int				device_type;
>   	int				first_col;
>   	u32				dev_mem_buf_shift;
> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
> index 6d5da779232b..d76b2e99c308 100644
> --- a/drivers/accel/amdxdna/npu3_regs.c
> +++ b/drivers/accel/amdxdna/npu3_regs.c
> @@ -14,6 +14,9 @@
>   #define NPU3_MBOX_BUFFER_BAR	2
>   #define NPU3_MBOX_INFO_OFF	0x0
>   
> +#define NPU3_DOORBELL_BAR       2
> +#define NPU3_DOORBELL_OFF       0x0
> +
>   /* PCIe BAR Index for NPU3 */
>   #define NPU3_REG_BAR_INDEX	0
>   #define NPU3_PSP_BAR_INDEX      4
> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>   	.mbox_bar		= NPU3_MBOX_BAR,
>   	.mbox_rbuf_bar		= NPU3_MBOX_BUFFER_BAR,
>   	.mbox_info_off		= NPU3_MBOX_INFO_OFF,
> +	.doorbell_off		= NPU3_DOORBELL_OFF,
>   	.psp_regs_off   = {
>   		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>   		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>   const struct amdxdna_dev_info dev_npu3_vf_info = {
>   	.mbox_bar		= NPU3_MBOX_BAR,
>   	.sram_bar		= NPU3_MBOX_BUFFER_BAR,
> +	.doorbell_bar		= NPU3_DOORBELL_BAR,
>   	.default_vbnv		= "RyzenAI-npu3-vf",
>   	.device_type		= AMDXDNA_DEV_TYPE_UMQ,
>   	.dev_priv		= &npu3_dev_vf_priv,
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index ad9b33dd7b13..51a507561df6 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
>   	DRM_AMDXDNA_EXEC_CMD,
>   	DRM_AMDXDNA_GET_INFO,
>   	DRM_AMDXDNA_SET_STATE,
> -	DRM_AMDXDNA_GET_ARRAY = 10,
> +	DRM_AMDXDNA_WAIT_CMD,
> +	DRM_AMDXDNA_GET_ARRAY,
>   };
>   
>   /**
> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
>   	__u64 seq;
>   };
>   
> +/**
> + * struct amdxdna_drm_wait_cmd - Wait execution command.
> + *
> + * @hwctx: Context handle.
> + * @timeout: timeout in ms, 0 implies infinite wait.
> + * @seq: sequence number of the command returned by execute command.
> + *
> + * Wait a command specified by seq to be completed.
> + */
> +struct amdxdna_drm_wait_cmd {
> +	__u32 hwctx;
> +	__u32 timeout;
> +	__u64 seq;
> +};
> +
>   /**
>    * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
>    * @buffer: The user space buffer that will return the AIE status.
> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
>   	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
>   		 struct amdxdna_drm_get_array)
>   
> +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
> +	DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
> +		struct amdxdna_drm_wait_cmd)
> +
>   #if defined(__cplusplus)
>   } /* extern c end */
>   #endif


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization
  2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
@ 2026-05-05 20:36   ` Mario Limonciello
  0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:36 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: Nishad Saraf, linux-kernel, max.zhen, sonal.santan



On 5/5/26 11:09, Lizhi Hou wrote:
> From: Nishad Saraf <nishads@amd.com>
> 
> NPU firmware requires a host-allocated work buffer for hardware contexts.
> Allocate a 4 MB host buffer and attach it to device during device init.
> 
> Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common
> helpers by moving them to aie.c and renaming them to
> amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both
> AIE2 and AIE4 to reuse the implementation.
> 
> Signed-off-by: Nishad Saraf <nishads@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie.c             | 34 +++++++++++++++
>   drivers/accel/amdxdna/aie.h             |  4 ++
>   drivers/accel/amdxdna/aie2_error.c      |  7 ++--
>   drivers/accel/amdxdna/aie2_message.c    | 49 +++-------------------
>   drivers/accel/amdxdna/aie2_pci.h        |  4 --
>   drivers/accel/amdxdna/aie4_message.c    | 18 ++++++++
>   drivers/accel/amdxdna/aie4_msg_priv.h   | 14 +++++++
>   drivers/accel/amdxdna/aie4_pci.c        | 55 ++++++++++++++++++++++++-
>   drivers/accel/amdxdna/aie4_pci.h        |  5 +++
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  3 +-
>   10 files changed, 141 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
> index a31051cc1ec8..4db2fd80a032 100644
> --- a/drivers/accel/amdxdna/aie.c
> +++ b/drivers/accel/amdxdna/aie.c
> @@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie,
>   	kfree(meta);
>   	return ret;
>   }
> +
> +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
> +			       dma_addr_t *dma_addr)
> +{
> +	void *vaddr;
> +	int order;
> +
> +	*size = max_t(u32, *size, SZ_8K);
> +	order = get_order(*size);
> +	if (order > MAX_PAGE_ORDER)
> +		return ERR_PTR(-EINVAL);
> +	*size = PAGE_SIZE << order;
> +
> +	if (amdxdna_iova_on(xdna))
> +		return amdxdna_iommu_alloc(xdna, *size, dma_addr);
> +
> +	vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
> +				      DMA_FROM_DEVICE, GFP_KERNEL);
> +	if (!vaddr)
> +		return ERR_PTR(-ENOMEM);
> +
> +	return vaddr;
> +}
> +
> +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
> +			     void *cpu_addr, dma_addr_t dma_addr)
> +{
> +	if (amdxdna_iova_on(xdna)) {
> +		amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
> +		return;
> +	}
> +
> +	dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
> +}
> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
> index 4bb3719ee0c0..70618204c0ab 100644
> --- a/drivers/accel/amdxdna/aie.h
> +++ b/drivers/accel/amdxdna/aie.h
> @@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
>   void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
>   int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
>   			 struct amdxdna_drm_get_info *args);
> +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
> +			       dma_addr_t *dma_addr);
> +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
> +			     void *cpu_addr, dma_addr_t dma_addr);
>   
>   /* aie_psp.c */
>   struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
> diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
> index 70007b4363cd..babdac0157ab 100644
> --- a/drivers/accel/amdxdna/aie2_error.c
> +++ b/drivers/accel/amdxdna/aie2_error.c
> @@ -11,6 +11,7 @@
>   #include <linux/kthread.h>
>   #include <linux/kernel.h>
>   
> +#include "aie.h"
>   #include "aie2_msg_priv.h"
>   #include "aie2_pci.h"
>   #include "amdxdna_error.h"
> @@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
>   	destroy_workqueue(events->wq);
>   	mutex_lock(&xdna->dev_lock);
>   
> -	aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
> +	amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
>   	kfree(events);
>   }
>   
> @@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
>   	if (!events)
>   		return -ENOMEM;
>   
> -	events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr);
> +	events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, &events->addr);
>   	if (IS_ERR(events->buf)) {
>   		ret = PTR_ERR(events->buf);
>   		goto free_events;
> @@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
>   free_wq:
>   	destroy_workqueue(events->wq);
>   free_buf:
> -	aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
> +	amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
>   free_events:
>   	kfree(events);
>   	return ret;
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index f555ffecea6f..0417c6a4c80a 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -27,43 +27,6 @@
>   
>   #define EXEC_MSG_OPS(xdna)	((xdna)->dev_handle->exec_msg_ops)
>   
> -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
> -			    dma_addr_t *dma_addr)
> -{
> -	struct amdxdna_dev *xdna = ndev->aie.xdna;
> -	void *vaddr;
> -	int order;
> -
> -	*size = max(*size, SZ_8K);
> -	order = get_order(*size);
> -	if (order > MAX_PAGE_ORDER)
> -		return ERR_PTR(-EINVAL);
> -	*size = PAGE_SIZE << order;
> -
> -	if (amdxdna_iova_on(xdna))
> -		return amdxdna_iommu_alloc(xdna, *size, dma_addr);
> -
> -	vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
> -				      DMA_FROM_DEVICE, GFP_KERNEL);
> -	if (!vaddr)
> -		return ERR_PTR(-ENOMEM);
> -
> -	return vaddr;
> -}
> -
> -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
> -			  void *cpu_addr, dma_addr_t dma_addr)
> -{
> -	struct amdxdna_dev *xdna = ndev->aie.xdna;
> -
> -	if (amdxdna_iova_on(xdna)) {
> -		amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
> -		return;
> -	}
> -
> -	dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
> -}
> -
>   int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
>   {
>   	DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND);
> @@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
>   	int ret;
>   
>   	buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
> -	buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
> +	buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
>   	if (IS_ERR(buff_addr))
>   		return PTR_ERR(buff_addr);
>   
> @@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
>   	*cols_filled = aie_bitmap;
>   
>   fail:
> -	aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
> +	amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr);
>   	return ret;
>   }
>   
> @@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
>   		return -EINVAL;
>   
>   	buf_sz = min(size, SZ_4M);
> -	addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
> +	addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
>   	if (IS_ERR(addr))
>   		return PTR_ERR(addr);
>   
> @@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
>   	header->minor = resp.minor;
>   
>   free_buf:
> -	aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
> +	amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr);
>   	return ret;
>   }
>   
> @@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
>   	}
>   
>   	buf_size = sizeof(*report);
> -	buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
> +	buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr);
>   	if (IS_ERR(buf)) {
>   		XDNA_ERR(xdna, "Failed to allocate buffer for app health");
>   		return PTR_ERR(buf);
> @@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
>   	memcpy(report, buf, sizeof(*report));
>   
>   free_buf:
> -	aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
> +	amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr);
>   	return ret;
>   }
>   
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index c884fed610f9..33b6c84e8b6e 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
>   int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
>   			 int (*notify_cb)(void *, void __iomem *, size_t));
>   int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
> -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
> -			    dma_addr_t *dma_addr);
> -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
> -			  void *cpu_addr, dma_addr_t dma_addr);
>   
>   /* aie2_hwctx.c */
>   int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
> diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
> index ac89a9a842b2..d85df04c5f6b 100644
> --- a/drivers/accel/amdxdna/aie4_message.c
> +++ b/drivers/accel/amdxdna/aie4_message.c
> @@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *m
>   
>   	return 0;
>   }
> +
> +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> +	DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, AIE4_MSG_OP_ATTACH_WORK_BUFFER);
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	int ret;
> +
> +	req.buff_addr = ndev->work_buf_addr;
> +	req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE;
> +
> +	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +	if (ret)
> +		XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret);
> +	else
> +		XDNA_DBG(xdna, "Attached work buffer");
> +
> +	return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 69e220e40900..af0866045b91 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -6,10 +6,12 @@
>   #ifndef _AIE4_MSG_PRIV_H_
>   #define _AIE4_MSG_PRIV_H_
>   
> +#include <linux/sizes.h>
>   #include <linux/types.h>
>   
>   enum aie4_msg_opcode {
>   	AIE4_MSG_OP_SUSPEND                          = 0x10003,
> +	AIE4_MSG_OP_ATTACH_WORK_BUFFER               = 0x1000D,
>   
>   	AIE4_MSG_OP_CREATE_VFS                       = 0x20001,
>   	AIE4_MSG_OP_DESTROY_VFS                      = 0x20002,
> @@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp {
>   	struct aie4_tile_info info;
>   } __packed;
>   
> +#define AIE4_WORK_BUFFER_MIN_SIZE      SZ_4M
> +
> +struct aie4_msg_attach_work_buffer_req {
> +	__u64 buff_addr;
> +	__u32 reserved;
> +	__u32 buff_size;
> +} __packed;
> +
> +struct aie4_msg_attach_work_buffer_resp {
> +	enum aie4_msg_status status;
> +} __packed;
> +
>   #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 8b5eff0e45c1..a58a83af42a4 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>   	if (ret)
>   		goto stop_fw;
>   
> +	ret = aie4_attach_work_buffer(ndev);
> +	if (ret)
> +		goto mbox_fini;
> +
>   	return 0;
>   
> +mbox_fini:
> +	aie4_mailbox_fini(ndev);
>   stop_fw:
>   	aie4_fw_stop(ndev);
>   
> @@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>   	return ret;
>   }
>   
> +static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +	u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE;
> +
> +	ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size,
> +						  &ndev->work_buf_addr);
> +	if (IS_ERR(ndev->work_buf)) {
> +		int ret = PTR_ERR(ndev->work_buf);
> +
> +		XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x",
> +			 AIE4_WORK_BUFFER_MIN_SIZE);
> +		ndev->work_buf = NULL;
> +		return ret;
> +	}
> +
> +	ndev->work_buf_size = buf_size;
> +	XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size);
> +
> +	return 0;
> +}
> +
> +static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> +	struct amdxdna_dev *xdna = ndev->aie.xdna;
> +
> +	if (!ndev->work_buf)
> +		return;
> +
> +	amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf,
> +				ndev->work_buf_addr);
> +	ndev->work_buf = NULL;
> +}
> +
>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>   {
>   	int ret;
> @@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna)
>   	if (ret)
>   		return ret;
>   
> -	return aie4_pf_hw_start(xdna->dev_handle);
> +	ret = aie4_alloc_work_buffer(xdna->dev_handle);
> +	if (ret)
> +		return ret;
> +
> +	ret = aie4_pf_hw_start(xdna->dev_handle);
> +	if (ret)
> +		goto free_work_buf;
> +
> +	return 0;
> +
> +free_work_buf:
> +	aie4_free_work_buffer(xdna->dev_handle);
> +	return ret;
>   }
>   
>   static int aie4_vf_init(struct amdxdna_dev *xdna)
> @@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna)
>   {
>   	aie4_sriov_stop(xdna->dev_handle);
>   	aie4_pf_hw_stop(xdna->dev_handle);
> +	aie4_free_work_buffer(xdna->dev_handle);
>   }
>   
>   static void aie4_vf_fini(struct amdxdna_dev *xdna)
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 1886cffc62db..390864876ca5 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -53,11 +53,16 @@ struct amdxdna_dev_hdl {
>   
>   	struct xarray                   cert_comp_xa; /* device level indexed by msix id */
>   	struct mutex                    cert_comp_lock; /* protects cert_comp operations*/
> +
> +	void				*work_buf;
> +	dma_addr_t			work_buf_addr;
> +	u32				work_buf_size;
>   };
>   
>   /* aie4_message.c */
>   int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
>   int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
> +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev);
>   
>   /* aie4_ctx.c */
>   int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index c0d00db25cde..a6e9be7960c2 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>    * 0.7: Support getting power and utilization data
>    * 0.8: Support BO usage query
>    * 0.9: Add new device type AMDXDNA_DEV_TYPE_PF
> + * 0.10: Support AIE4 UMQ
>    */
>   #define AMDXDNA_DRIVER_MAJOR		0
> -#define AMDXDNA_DRIVER_MINOR		9
> +#define AMDXDNA_DRIVER_MINOR		10
>   
>   /*
>    * Bind the driver base on (vendor_id, device_id) pair and later use the


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
  2026-05-05 20:31   ` Mario Limonciello
@ 2026-05-06 16:11     ` Lizhi Hou
  2026-05-06 16:33       ` Mario Limonciello
  0 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-06 16:11 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
	karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue


On 5/5/26 13:31, Mario Limonciello wrote:
>
>
> On 5/5/26 11:09, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@amd.com>
>>
>> Expose the command doorbell register to userspace on a per-hardware
>> context basis, enabling applications to notify the firmware of pending
>> commands via doorbell writes.
>>
>> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
>> completion of individual commands.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> Minor suggestion below.
>
>> ---
>>   drivers/accel/amdxdna/aie4_ctx.c        | 75 +++++++++++++++++++++++++
>>   drivers/accel/amdxdna/aie4_host_queue.h |  2 +
>>   drivers/accel/amdxdna/aie4_pci.c        | 34 +++++++++++
>>   drivers/accel/amdxdna/aie4_pci.h        |  3 +
>>   drivers/accel/amdxdna/amdxdna_ctx.c     | 34 +++++++++++
>>   drivers/accel/amdxdna/amdxdna_ctx.h     |  4 +-
>>   drivers/accel/amdxdna/amdxdna_gem.c     |  5 +-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  3 +
>>   drivers/accel/amdxdna/npu3_regs.c       |  5 ++
>>   include/uapi/drm/amdxdna_accel.h        | 22 +++++++-
>>   11 files changed, 198 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie4_ctx.c 
>> b/drivers/accel/amdxdna/aie4_ctx.c
>> index 84ac706d0ffb..8408b0d2696f 100644
>> --- a/drivers/accel/amdxdna/aie4_ctx.c
>> +++ b/drivers/accel/amdxdna/aie4_ctx.c
>> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>       aie4_hwctx_umq_fini(hwctx);
>>       kfree(hwctx->priv);
>>   }
>> +
>> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
>> +{
>> +    return (write >= read) && ((write - read) <= capacity);
>> +}
>> +
>> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
>> +{
>> +    u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
>> +    u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
>> +    struct amdxdna_dev *xdna = hwctx->client->xdna;
>> +
>> +    /*
>> +     * CERT cannot update read index as uint64 atomically. Driver 
>> may read
>> +     * half-updated read index when it has bits in high 32bit. In 
>> case read
>> +     * index is not valid, wait for some time and retry once. It should
>> +     * allow CERT to complete the read index update.
>> +     */
>> +    if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>> +        XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
>> +        usleep_range(100, 200);
>> +        ri = READ_ONCE(*hwctx->priv->umq_read_index);
>> +        if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>> +            XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi 
>> %llu", ri, wi);
>> +            ri = 0;
>> +        }
>> +    }
>> +
>> +    return ri;
>> +}
>> +
>> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
>> +{
>> +    u64 read_idx = get_read_index(hwctx);
>> +
>> +    return read_idx > seq;
>> +}
>> +
>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
>> +{
>> +    unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
>> +    struct amdxdna_hwctx_priv *priv = hwctx->priv;
>> +    struct cert_comp *cert_comp = priv->cert_comp;
>> +    long ret;
>
> Not sure I see the point in making ret a long. 
> wait_event_interruptible_timeout() retun 0 or 1.

Other than 0 or 1, wait_event_interruptible_timeout() can also return 
the remaining jiffies and -ERESTARTSYS

Lizhi

>
> bool val;
> val = wait_event_interruptible_timeout()
> return val ? 0 : -ETIME;
>
>
>> +
>> +    if (timeout)
>> +        wait_jifs = msecs_to_jiffies(timeout);
>> +
>> +    ret = wait_event_interruptible_timeout(cert_comp->waitq,
>> +                           (check_cmd_done(hwctx, seq)),
>> +                           wait_jifs);
>> +
>> +    if (!ret)
>> +        ret = -ETIME;
>> +
>> +    return ret <= 0 ? ret : 0;
>> +}
>> +
>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 
>> vm_pgoff)
>> +{
>> +    struct amdxdna_hwctx *hwctx;
>> +    unsigned long hwctx_id;
>> +    int idx;
>> +
>> +    idx = srcu_read_lock(&client->hwctx_srcu);
>> +    amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>> +        if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
>> +            srcu_read_unlock(&client->hwctx_srcu, idx);
>> +            return 1;
>> +        }
>> +    }
>> +    srcu_read_unlock(&client->hwctx_srcu, idx);
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h 
>> b/drivers/accel/amdxdna/aie4_host_queue.h
>> index eb6a38dfb53e..1b33eda3f727 100644
>> --- a/drivers/accel/amdxdna/aie4_host_queue.h
>> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
>> @@ -8,6 +8,8 @@
>>     #include <linux/types.h>
>>   +#define CTX_MAX_CMDS                    32
>> +
>>   struct host_queue_header {
>>       __u64 read_index;
>>       struct {
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c 
>> b/drivers/accel/amdxdna/aie4_pci.c
>> index 3be9066b7178..9ff34ce57fcb 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev 
>> *xdna)
>>       return 0;
>>   }
>>   +static int aie4_doorbell_mmap(struct amdxdna_client *client, 
>> struct vm_area_struct *vma)
>> +{
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +    const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
>> +    phys_addr_t res_start;
>> +    unsigned long pfn;
>> +    int ret;
>> +
>> +    if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
>> +        XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", 
>> vma->vm_pgoff);
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vma_pages(vma) != 1) {
>> +        XDNA_ERR(xdna, "can only map one page, got %ld", 
>> vma_pages(vma));
>> +        return -EINVAL;
>> +    }
>> +
>> +    res_start = pci_resource_start(pdev, 
>> xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
>> +    pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
>> +    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>> +    vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
>> +    ret = io_remap_pfn_range(vma, vma->vm_start,
>> +                 pfn,
>> +                 PAGE_SIZE,
>> +                 vma->vm_page_prot);
>> +
>> +    XDNA_DBG(xdna, "doorbell ret %d", ret);
>> +    return ret;
>> +}
>> +
>>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>>   {
>>       int ret;
>> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>>       .fini            = aie4_vf_fini,
>>       .hwctx_init        = aie4_hwctx_init,
>>       .hwctx_fini        = aie4_hwctx_fini,
>> +    .mmap            = aie4_doorbell_mmap,
>> +    .cmd_wait        = aie4_cmd_wait,
>>   };
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h 
>> b/drivers/accel/amdxdna/aie4_pci.h
>> index 6103007e6d2f..b69489acd53d 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
>>       u32            mbox_bar;
>>       u32            mbox_rbuf_bar;
>>       u64            mbox_info_off;
>> +    u32            doorbell_off;
>>         struct aie_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>       struct aie_bar_off_pair    smu_regs_off[SMU_MAX_REGS];
>> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>   /* aie4_ctx.c */
>>   int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
>>   void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 
>> vm_pgoff);
>>     /* aie4_sriov.c */
>>   #if IS_ENABLED(CONFIG_PCI_IOV)
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c 
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index b5ad60d4b734..b79229a63af3 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct 
>> drm_device *dev, void *data, struct drm_
>>       XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
>>       return -EINVAL;
>>   }
>> +
>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp)
>> +{
>> +    struct amdxdna_client *client = filp->driver_priv;
>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> +    struct amdxdna_drm_wait_cmd *args = data;
>> +    struct amdxdna_hwctx *hwctx;
>> +    int ret, idx;
>> +
>> +    XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
>> +         client->pid, args->hwctx, args->timeout, args->seq);
>> +
>> +    if (!xdna->dev_info->ops->cmd_wait)
>> +        return -EOPNOTSUPP;
>> +
>> +    idx = srcu_read_lock(&client->hwctx_srcu);
>> +    hwctx = xa_load(&client->hwctx_xa, args->hwctx);
>> +    if (!hwctx) {
>> +        XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, 
>> args->hwctx);
>> +        ret = -EINVAL;
>> +        goto unlock_ctx_srcu;
>> +    }
>> +
>> +    ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, 
>> args->timeout);
>> +
>> +    XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
>> +         client->pid, args->hwctx, args->seq, ret);
>> +
>> +    trace_amdxdna_debug_point(current->comm, args->seq, "job 
>> returned to user");
>> +
>> +unlock_ctx_srcu:
>> +    srcu_read_unlock(&client->hwctx_srcu, idx);
>> +    return ret;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index c5622718b4d5..6e3c6371a088 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client 
>> *client,
>>                  u32 *arg_bo_hdls, u32 arg_bo_cnt,
>>                  u32 hwctx_hdl, u64 *seq);
>>   -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
>> -             u64 seq, u32 timeout);
>> -
>>   int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>>   int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>>   int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>>   int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp);
>>     #endif /* _AMDXDNA_CTX_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c 
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index ebfc472aa9e7..319d2064fafa 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct 
>> mmu_interval_notifier *mni,
>>       mmu_interval_set_seq(&mapp->notifier, cur_seq);
>>       up_write(&xdna->notifier_lock);
>>   -    xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>> +    if (xdna->dev_info->ops->hmm_invalidate)
>> +        xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>>         if (range->event == MMU_NOTIFY_UNMAP) {
>>           down_write(&xdna->notifier_lock);
>> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct 
>> amdxdna_gem_obj *abo,
>>       u32 nr_pages;
>>       int ret;
>>   -    if (!xdna->dev_info->ops->hmm_invalidate)
>> +    if (!amdxdna_pasid_on(abo->client))
>>           return 0;
>>         mapp = kzalloc_obj(*mapp);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index 39ad081ac082..c0d00db25cde 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct 
>> drm_device *dev, void *data, struc
>>       return ret;
>>   }
>>   +static int amdxdna_drm_gem_mmap(struct file *filp, struct 
>> vm_area_struct *vma)
>> +{
>> +    struct drm_file *drm_filp = filp->private_data;
>> +    struct amdxdna_client *client = drm_filp->driver_priv;
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +
>> +    if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
>> +        return drm_gem_mmap(filp, vma);
>> +
>> +    if (!xdna->dev_info->ops->mmap)
>> +        return -EOPNOTSUPP;
>> +
>> +    return xdna->dev_info->ops->mmap(client, vma);
>> +}
>> +
>>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>       /* Context */
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, 
>> amdxdna_drm_create_hwctx_ioctl, 0),
>> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc 
>> amdxdna_drm_ioctls[] = {
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
>>       /* Execution */
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, 
>> amdxdna_drm_submit_cmd_ioctl, 0),
>> +    DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
>>       /* AIE hardware */
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 
>> 0),
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, 
>> amdxdna_drm_get_array_ioctl, 0),
>> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
>>       .poll        = drm_poll,
>>       .read        = drm_read,
>>       .llseek        = noop_llseek,
>> -    .mmap        = drm_gem_mmap,
>> +    .mmap        = amdxdna_drm_gem_mmap,
>>       .show_fdinfo    = drm_show_fdinfo,
>>       .fop_flags    = FOP_UNSIGNED_OFFSET,
>>   };
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index caed11c09e55..471b72299aee 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
>>       int (*resume)(struct amdxdna_dev *xdna);
>>       int (*suspend)(struct amdxdna_dev *xdna);
>>       int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
>> +    int (*mmap)(struct amdxdna_client *client, struct vm_area_struct 
>> *vma);
>>       int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>>       void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>>       int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 
>> value, void *buf, u32 size);
>>       int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 
>> debug_bo_hdl);
>>       void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned 
>> long cur_seq);
>>       int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct 
>> amdxdna_sched_job *job, u64 *seq);
>> +    int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>>       int (*get_aie_info)(struct amdxdna_client *client, struct 
>> amdxdna_drm_get_info *args);
>>       int (*set_aie_state)(struct amdxdna_client *client, struct 
>> amdxdna_drm_set_state *args);
>>       int (*get_array)(struct amdxdna_client *client, struct 
>> amdxdna_drm_get_array *args);
>> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
>>       int                sram_bar;
>>       int                psp_bar;
>>       int                smu_bar;
>> +    int                doorbell_bar;
>>       int                device_type;
>>       int                first_col;
>>       u32                dev_mem_buf_shift;
>> diff --git a/drivers/accel/amdxdna/npu3_regs.c 
>> b/drivers/accel/amdxdna/npu3_regs.c
>> index 6d5da779232b..d76b2e99c308 100644
>> --- a/drivers/accel/amdxdna/npu3_regs.c
>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>> @@ -14,6 +14,9 @@
>>   #define NPU3_MBOX_BUFFER_BAR    2
>>   #define NPU3_MBOX_INFO_OFF    0x0
>>   +#define NPU3_DOORBELL_BAR       2
>> +#define NPU3_DOORBELL_OFF       0x0
>> +
>>   /* PCIe BAR Index for NPU3 */
>>   #define NPU3_REG_BAR_INDEX    0
>>   #define NPU3_PSP_BAR_INDEX      4
>> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>>       .mbox_bar        = NPU3_MBOX_BAR,
>>       .mbox_rbuf_bar        = NPU3_MBOX_BUFFER_BAR,
>>       .mbox_info_off        = NPU3_MBOX_INFO_OFF,
>> +    .doorbell_off        = NPU3_DOORBELL_OFF,
>>       .psp_regs_off   = {
>>           DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, 
>> MPASP_C2PMSG_123_ALT_1),
>>           DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, 
>> MPASP_C2PMSG_156_ALT_1),
>> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>>   const struct amdxdna_dev_info dev_npu3_vf_info = {
>>       .mbox_bar        = NPU3_MBOX_BAR,
>>       .sram_bar        = NPU3_MBOX_BUFFER_BAR,
>> +    .doorbell_bar        = NPU3_DOORBELL_BAR,
>>       .default_vbnv        = "RyzenAI-npu3-vf",
>>       .device_type        = AMDXDNA_DEV_TYPE_UMQ,
>>       .dev_priv        = &npu3_dev_vf_priv,
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> index ad9b33dd7b13..51a507561df6 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
>>       DRM_AMDXDNA_EXEC_CMD,
>>       DRM_AMDXDNA_GET_INFO,
>>       DRM_AMDXDNA_SET_STATE,
>> -    DRM_AMDXDNA_GET_ARRAY = 10,
>> +    DRM_AMDXDNA_WAIT_CMD,
>> +    DRM_AMDXDNA_GET_ARRAY,
>>   };
>>     /**
>> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
>>       __u64 seq;
>>   };
>>   +/**
>> + * struct amdxdna_drm_wait_cmd - Wait execution command.
>> + *
>> + * @hwctx: Context handle.
>> + * @timeout: timeout in ms, 0 implies infinite wait.
>> + * @seq: sequence number of the command returned by execute command.
>> + *
>> + * Wait a command specified by seq to be completed.
>> + */
>> +struct amdxdna_drm_wait_cmd {
>> +    __u32 hwctx;
>> +    __u32 timeout;
>> +    __u64 seq;
>> +};
>> +
>>   /**
>>    * struct amdxdna_drm_query_aie_status - Query the status of the 
>> AIE hardware
>>    * @buffer: The user space buffer that will return the AIE status.
>> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
>>            struct amdxdna_drm_get_array)
>>   +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
>> +    DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
>> +        struct amdxdna_drm_wait_cmd)
>> +
>>   #if defined(__cplusplus)
>>   } /* extern c end */
>>   #endif
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
  2026-05-06 16:11     ` Lizhi Hou
@ 2026-05-06 16:33       ` Mario Limonciello
  0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-06 16:33 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
  Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
	Hayden Laccabue

On 5/6/26 11:11, Lizhi Hou wrote:
> 
> On 5/5/26 13:31, Mario Limonciello wrote:
>>
>>
>> On 5/5/26 11:09, Lizhi Hou wrote:
>>> From: David Zhang <yidong.zhang@amd.com>
>>>
>>> Expose the command doorbell register to userspace on a per-hardware
>>> context basis, enabling applications to notify the firmware of pending
>>> commands via doorbell writes.
>>>
>>> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
>>> completion of individual commands.
>>>
>>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> Minor suggestion below.
>>
>>> ---
>>>   drivers/accel/amdxdna/aie4_ctx.c        | 75 +++++++++++++++++++++++++
>>>   drivers/accel/amdxdna/aie4_host_queue.h |  2 +
>>>   drivers/accel/amdxdna/aie4_pci.c        | 34 +++++++++++
>>>   drivers/accel/amdxdna/aie4_pci.h        |  3 +
>>>   drivers/accel/amdxdna/amdxdna_ctx.c     | 34 +++++++++++
>>>   drivers/accel/amdxdna/amdxdna_ctx.h     |  4 +-
>>>   drivers/accel/amdxdna/amdxdna_gem.c     |  5 +-
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  3 +
>>>   drivers/accel/amdxdna/npu3_regs.c       |  5 ++
>>>   include/uapi/drm/amdxdna_accel.h        | 22 +++++++-
>>>   11 files changed, 198 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/ 
>>> amdxdna/aie4_ctx.c
>>> index 84ac706d0ffb..8408b0d2696f 100644
>>> --- a/drivers/accel/amdxdna/aie4_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie4_ctx.c
>>> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>>       aie4_hwctx_umq_fini(hwctx);
>>>       kfree(hwctx->priv);
>>>   }
>>> +
>>> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
>>> +{
>>> +    return (write >= read) && ((write - read) <= capacity);
>>> +}
>>> +
>>> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
>>> +{
>>> +    u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
>>> +    u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
>>> +    struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> +
>>> +    /*
>>> +     * CERT cannot update read index as uint64 atomically. Driver 
>>> may read
>>> +     * half-updated read index when it has bits in high 32bit. In 
>>> case read
>>> +     * index is not valid, wait for some time and retry once. It should
>>> +     * allow CERT to complete the read index update.
>>> +     */
>>> +    if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>>> +        XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
>>> +        usleep_range(100, 200);
>>> +        ri = READ_ONCE(*hwctx->priv->umq_read_index);
>>> +        if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>>> +            XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi 
>>> %llu", ri, wi);
>>> +            ri = 0;
>>> +        }
>>> +    }
>>> +
>>> +    return ri;
>>> +}
>>> +
>>> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
>>> +{
>>> +    u64 read_idx = get_read_index(hwctx);
>>> +
>>> +    return read_idx > seq;
>>> +}
>>> +
>>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
>>> +{
>>> +    unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
>>> +    struct amdxdna_hwctx_priv *priv = hwctx->priv;
>>> +    struct cert_comp *cert_comp = priv->cert_comp;
>>> +    long ret;
>>
>> Not sure I see the point in making ret a long. 
>> wait_event_interruptible_timeout() retun 0 or 1.
> 
> Other than 0 or 1, wait_event_interruptible_timeout() can also return 
> the remaining jiffies and -ERESTARTSYS
> 

Ah thanks.

> Lizhi
> 
>>
>> bool val;
>> val = wait_event_interruptible_timeout()
>> return val ? 0 : -ETIME;
>>
>>
>>> +
>>> +    if (timeout)
>>> +        wait_jifs = msecs_to_jiffies(timeout);
>>> +
>>> +    ret = wait_event_interruptible_timeout(cert_comp->waitq,
>>> +                           (check_cmd_done(hwctx, seq)),
>>> +                           wait_jifs);
>>> +
>>> +    if (!ret)
>>> +        ret = -ETIME;
>>> +
>>> +    return ret <= 0 ? ret : 0;
>>> +}
>>> +
>>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 
>>> vm_pgoff)
>>> +{
>>> +    struct amdxdna_hwctx *hwctx;
>>> +    unsigned long hwctx_id;
>>> +    int idx;
>>> +
>>> +    idx = srcu_read_lock(&client->hwctx_srcu);
>>> +    amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>>> +        if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
>>> +            srcu_read_unlock(&client->hwctx_srcu, idx);
>>> +            return 1;
>>> +        }
>>> +    }
>>> +    srcu_read_unlock(&client->hwctx_srcu, idx);
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/ 
>>> amdxdna/aie4_host_queue.h
>>> index eb6a38dfb53e..1b33eda3f727 100644
>>> --- a/drivers/accel/amdxdna/aie4_host_queue.h
>>> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
>>> @@ -8,6 +8,8 @@
>>>     #include <linux/types.h>
>>>   +#define CTX_MAX_CMDS                    32
>>> +
>>>   struct host_queue_header {
>>>       __u64 read_index;
>>>       struct {
>>> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/ 
>>> amdxdna/aie4_pci.c
>>> index 3be9066b7178..9ff34ce57fcb 100644
>>> --- a/drivers/accel/amdxdna/aie4_pci.c
>>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>>> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev 
>>> *xdna)
>>>       return 0;
>>>   }
>>>   +static int aie4_doorbell_mmap(struct amdxdna_client *client, 
>>> struct vm_area_struct *vma)
>>> +{
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>>> +    const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
>>> +    phys_addr_t res_start;
>>> +    unsigned long pfn;
>>> +    int ret;
>>> +
>>> +    if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
>>> +        XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma- 
>>> >vm_pgoff);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    if (vma_pages(vma) != 1) {
>>> +        XDNA_ERR(xdna, "can only map one page, got %ld", 
>>> vma_pages(vma));
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    res_start = pci_resource_start(pdev, xdna->dev_info- 
>>> >doorbell_bar) + npriv->doorbell_off;
>>> +    pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
>>> +    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>>> +    vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
>>> +    ret = io_remap_pfn_range(vma, vma->vm_start,
>>> +                 pfn,
>>> +                 PAGE_SIZE,
>>> +                 vma->vm_page_prot);
>>> +
>>> +    XDNA_DBG(xdna, "doorbell ret %d", ret);
>>> +    return ret;
>>> +}
>>> +
>>>   static int aie4_pf_init(struct amdxdna_dev *xdna)
>>>   {
>>>       int ret;
>>> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>>>       .fini            = aie4_vf_fini,
>>>       .hwctx_init        = aie4_hwctx_init,
>>>       .hwctx_fini        = aie4_hwctx_fini,
>>> +    .mmap            = aie4_doorbell_mmap,
>>> +    .cmd_wait        = aie4_cmd_wait,
>>>   };
>>> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/ 
>>> amdxdna/aie4_pci.h
>>> index 6103007e6d2f..b69489acd53d 100644
>>> --- a/drivers/accel/amdxdna/aie4_pci.h
>>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>>> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
>>>       u32            mbox_bar;
>>>       u32            mbox_rbuf_bar;
>>>       u64            mbox_info_off;
>>> +    u32            doorbell_off;
>>>         struct aie_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>>       struct aie_bar_off_pair    smu_regs_off[SMU_MAX_REGS];
>>> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>>   /* aie4_ctx.c */
>>>   int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
>>>   void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
>>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 
>>> vm_pgoff);
>>>     /* aie4_sriov.c */
>>>   #if IS_ENABLED(CONFIG_PCI_IOV)
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/ 
>>> amdxdna/amdxdna_ctx.c
>>> index b5ad60d4b734..b79229a63af3 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct 
>>> drm_device *dev, void *data, struct drm_
>>>       XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
>>>       return -EINVAL;
>>>   }
>>> +
>>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, 
>>> struct drm_file *filp)
>>> +{
>>> +    struct amdxdna_client *client = filp->driver_priv;
>>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>>> +    struct amdxdna_drm_wait_cmd *args = data;
>>> +    struct amdxdna_hwctx *hwctx;
>>> +    int ret, idx;
>>> +
>>> +    XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
>>> +         client->pid, args->hwctx, args->timeout, args->seq);
>>> +
>>> +    if (!xdna->dev_info->ops->cmd_wait)
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    idx = srcu_read_lock(&client->hwctx_srcu);
>>> +    hwctx = xa_load(&client->hwctx_xa, args->hwctx);
>>> +    if (!hwctx) {
>>> +        XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, 
>>> args->hwctx);
>>> +        ret = -EINVAL;
>>> +        goto unlock_ctx_srcu;
>>> +    }
>>> +
>>> +    ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args- 
>>> >timeout);
>>> +
>>> +    XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
>>> +         client->pid, args->hwctx, args->seq, ret);
>>> +
>>> +    trace_amdxdna_debug_point(current->comm, args->seq, "job 
>>> returned to user");
>>> +
>>> +unlock_ctx_srcu:
>>> +    srcu_read_unlock(&client->hwctx_srcu, idx);
>>> +    return ret;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/ 
>>> amdxdna/amdxdna_ctx.h
>>> index c5622718b4d5..6e3c6371a088 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client 
>>> *client,
>>>                  u32 *arg_bo_hdls, u32 arg_bo_cnt,
>>>                  u32 hwctx_hdl, u64 *seq);
>>>   -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
>>> -             u64 seq, u32 timeout);
>>> -
>>>   int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void 
>>> *data, struct drm_file *filp);
>>>   int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void 
>>> *data, struct drm_file *filp);
>>>   int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void 
>>> *data, struct drm_file *filp);
>>>   int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void 
>>> *data, struct drm_file *filp);
>>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, 
>>> struct drm_file *filp);
>>>     #endif /* _AMDXDNA_CTX_H_ */
>>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/ 
>>> amdxdna/amdxdna_gem.c
>>> index ebfc472aa9e7..319d2064fafa 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>>> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct 
>>> mmu_interval_notifier *mni,
>>>       mmu_interval_set_seq(&mapp->notifier, cur_seq);
>>>       up_write(&xdna->notifier_lock);
>>>   -    xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>>> +    if (xdna->dev_info->ops->hmm_invalidate)
>>> +        xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>>>         if (range->event == MMU_NOTIFY_UNMAP) {
>>>           down_write(&xdna->notifier_lock);
>>> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct 
>>> amdxdna_gem_obj *abo,
>>>       u32 nr_pages;
>>>       int ret;
>>>   -    if (!xdna->dev_info->ops->hmm_invalidate)
>>> +    if (!amdxdna_pasid_on(abo->client))
>>>           return 0;
>>>         mapp = kzalloc_obj(*mapp);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.c
>>> index 39ad081ac082..c0d00db25cde 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct 
>>> drm_device *dev, void *data, struc
>>>       return ret;
>>>   }
>>>   +static int amdxdna_drm_gem_mmap(struct file *filp, struct 
>>> vm_area_struct *vma)
>>> +{
>>> +    struct drm_file *drm_filp = filp->private_data;
>>> +    struct amdxdna_client *client = drm_filp->driver_priv;
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +
>>> +    if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
>>> +        return drm_gem_mmap(filp, vma);
>>> +
>>> +    if (!xdna->dev_info->ops->mmap)
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    return xdna->dev_info->ops->mmap(client, vma);
>>> +}
>>> +
>>>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>>       /* Context */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, 
>>> amdxdna_drm_create_hwctx_ioctl, 0),
>>> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc 
>>> amdxdna_drm_ioctls[] = {
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
>>>       /* Execution */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, 
>>> amdxdna_drm_submit_cmd_ioctl, 0),
>>> +    DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
>>>       /* AIE hardware */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 
>>> 0),
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, 
>>> amdxdna_drm_get_array_ioctl, 0),
>>> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
>>>       .poll        = drm_poll,
>>>       .read        = drm_read,
>>>       .llseek        = noop_llseek,
>>> -    .mmap        = drm_gem_mmap,
>>> +    .mmap        = amdxdna_drm_gem_mmap,
>>>       .show_fdinfo    = drm_show_fdinfo,
>>>       .fop_flags    = FOP_UNSIGNED_OFFSET,
>>>   };
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.h
>>> index caed11c09e55..471b72299aee 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
>>>       int (*resume)(struct amdxdna_dev *xdna);
>>>       int (*suspend)(struct amdxdna_dev *xdna);
>>>       int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
>>> +    int (*mmap)(struct amdxdna_client *client, struct vm_area_struct 
>>> *vma);
>>>       int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>>>       void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>>>       int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 
>>> value, void *buf, u32 size);
>>>       int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 
>>> debug_bo_hdl);
>>>       void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned 
>>> long cur_seq);
>>>       int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct 
>>> amdxdna_sched_job *job, u64 *seq);
>>> +    int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>>>       int (*get_aie_info)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_get_info *args);
>>>       int (*set_aie_state)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_set_state *args);
>>>       int (*get_array)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_get_array *args);
>>> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
>>>       int                sram_bar;
>>>       int                psp_bar;
>>>       int                smu_bar;
>>> +    int                doorbell_bar;
>>>       int                device_type;
>>>       int                first_col;
>>>       u32                dev_mem_buf_shift;
>>> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/ 
>>> amdxdna/npu3_regs.c
>>> index 6d5da779232b..d76b2e99c308 100644
>>> --- a/drivers/accel/amdxdna/npu3_regs.c
>>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>>> @@ -14,6 +14,9 @@
>>>   #define NPU3_MBOX_BUFFER_BAR    2
>>>   #define NPU3_MBOX_INFO_OFF    0x0
>>>   +#define NPU3_DOORBELL_BAR       2
>>> +#define NPU3_DOORBELL_OFF       0x0
>>> +
>>>   /* PCIe BAR Index for NPU3 */
>>>   #define NPU3_REG_BAR_INDEX    0
>>>   #define NPU3_PSP_BAR_INDEX      4
>>> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>>>       .mbox_bar        = NPU3_MBOX_BAR,
>>>       .mbox_rbuf_bar        = NPU3_MBOX_BUFFER_BAR,
>>>       .mbox_info_off        = NPU3_MBOX_INFO_OFF,
>>> +    .doorbell_off        = NPU3_DOORBELL_OFF,
>>>       .psp_regs_off   = {
>>>           DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, 
>>> MPASP_C2PMSG_123_ALT_1),
>>>           DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, 
>>> MPASP_C2PMSG_156_ALT_1),
>>> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>>>   const struct amdxdna_dev_info dev_npu3_vf_info = {
>>>       .mbox_bar        = NPU3_MBOX_BAR,
>>>       .sram_bar        = NPU3_MBOX_BUFFER_BAR,
>>> +    .doorbell_bar        = NPU3_DOORBELL_BAR,
>>>       .default_vbnv        = "RyzenAI-npu3-vf",
>>>       .device_type        = AMDXDNA_DEV_TYPE_UMQ,
>>>       .dev_priv        = &npu3_dev_vf_priv,
>>> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/ 
>>> amdxdna_accel.h
>>> index ad9b33dd7b13..51a507561df6 100644
>>> --- a/include/uapi/drm/amdxdna_accel.h
>>> +++ b/include/uapi/drm/amdxdna_accel.h
>>> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
>>>       DRM_AMDXDNA_EXEC_CMD,
>>>       DRM_AMDXDNA_GET_INFO,
>>>       DRM_AMDXDNA_SET_STATE,
>>> -    DRM_AMDXDNA_GET_ARRAY = 10,
>>> +    DRM_AMDXDNA_WAIT_CMD,
>>> +    DRM_AMDXDNA_GET_ARRAY,
>>>   };
>>>     /**
>>> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
>>>       __u64 seq;
>>>   };
>>>   +/**
>>> + * struct amdxdna_drm_wait_cmd - Wait execution command.
>>> + *
>>> + * @hwctx: Context handle.
>>> + * @timeout: timeout in ms, 0 implies infinite wait.
>>> + * @seq: sequence number of the command returned by execute command.
>>> + *
>>> + * Wait a command specified by seq to be completed.
>>> + */
>>> +struct amdxdna_drm_wait_cmd {
>>> +    __u32 hwctx;
>>> +    __u32 timeout;
>>> +    __u64 seq;
>>> +};
>>> +
>>>   /**
>>>    * struct amdxdna_drm_query_aie_status - Query the status of the 
>>> AIE hardware
>>>    * @buffer: The user space buffer that will return the AIE status.
>>> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
>>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
>>>            struct amdxdna_drm_get_array)
>>>   +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
>>> +    DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
>>> +        struct amdxdna_drm_wait_cmd)
>>> +
>>>   #if defined(__cplusplus)
>>>   } /* extern c end */
>>>   #endif
>>


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2026-05-06 16:33 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
2026-05-05 19:37   ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
2026-05-05 19:53   ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
2026-05-05 20:28   ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
2026-05-05 20:31   ` Mario Limonciello
2026-05-06 16:11     ` Lizhi Hou
2026-05-06 16:33       ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
2026-05-05 17:14   ` Mario Limonciello
2026-05-05 18:03     ` Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
2026-05-05 20:36   ` Mario Limonciello

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox