* [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 19:37 ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
` (4 subsequent siblings)
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue, Lizhi Hou
From: David Zhang <yidong.zhang@amd.com>
Add basic device initialization support for AIE4 Virtual Functions (PCI
device IDs 0x17F3 and 0x1B0C).
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie4_pci.c | 160 +++++++++++++-----------
drivers/accel/amdxdna/aie4_pci.h | 3 +-
drivers/accel/amdxdna/amdxdna_pci_drv.c | 4 +
drivers/accel/amdxdna/amdxdna_pci_drv.h | 1 +
drivers/accel/amdxdna/npu3_regs.c | 20 ++-
include/uapi/drm/amdxdna_accel.h | 1 +
6 files changed, 113 insertions(+), 76 deletions(-)
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 87f80f804f91..a967e2db7ebd 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -196,8 +196,9 @@ static int aie4_mailbox_start(struct amdxdna_dev *xdna,
return ret;
}
-static int aie4_mailbox_init(struct amdxdna_dev *xdna)
+static int aie4_mailbox_init(struct amdxdna_dev_hdl *ndev)
{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
struct mailbox_info mbox_info;
int ret;
@@ -208,13 +209,13 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
return aie4_mailbox_start(xdna, &mbox_info);
}
-static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
+static void aie4_fw_stop(struct amdxdna_dev_hdl *ndev)
{
aie_psp_stop(ndev->aie.psp_hdl);
aie_smu_fini(ndev->aie.smu_hdl);
}
-static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
+static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
{
int ret;
@@ -233,49 +234,49 @@ static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
return ret;
}
-static int aie4_hw_start(struct amdxdna_dev *xdna)
+static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
int ret;
- ret = aie4_fw_load(ndev);
+ ret = aie4_fw_start(ndev);
if (ret)
return ret;
- ret = aie4_mailbox_init(xdna);
+ ret = aie4_mailbox_init(ndev);
if (ret)
- goto fw_unload;
+ goto stop_fw;
return 0;
-fw_unload:
- aie4_fw_unload(ndev);
+stop_fw:
+ aie4_fw_stop(ndev);
return ret;
}
-static void aie4_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
+static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
{
- int ret;
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
- /* No paired resume needed, fw is stateless */
- ret = aie4_suspend_fw(ndev);
- if (ret)
- XDNA_ERR(ndev->aie.xdna, "suspend_fw failed, ret %d", ret);
- else
- XDNA_DBG(ndev->aie.xdna, "npu firmware suspended");
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ aie4_suspend_fw(ndev);
+ aie4_mailbox_fini(ndev);
+ aie4_fw_stop(ndev);
}
-static void aie4_hw_stop(struct amdxdna_dev *xdna)
+static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ return aie4_mailbox_init(ndev);
+}
+
+static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
- aie4_mgmt_fw_fini(ndev);
aie4_mailbox_fini(ndev);
-
- aie4_fw_unload(ndev);
}
static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
@@ -365,15 +366,41 @@ static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
return 0;
}
-static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
+static int aie4_load_fw(struct amdxdna_dev_hdl *ndev,
+ void __iomem *tbl[PCI_NUM_RESOURCES])
+{
+ const struct firmware *npufw, *certfw;
+ int ret;
+
+ if (!ndev->priv->npufw_path && !ndev->priv->certfw_path)
+ return 0;
+
+ ret = aie4_request_firmware(ndev, &npufw, &certfw);
+ if (ret)
+ return ret;
+
+ ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
+ aie4_release_firmware(ndev, npufw, certfw);
+
+ return ret;
+}
+
+static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev *xdna = ndev->aie.xdna;
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct amdxdna_dev_hdl *ndev;
void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
- const struct firmware *npufw, *certfw;
unsigned long bars = 0;
int ret, i;
+ ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
+ if (!ndev)
+ return -ENOMEM;
+
+ ndev->priv = xdna->dev_info->dev_priv;
+ ndev->aie.xdna = xdna;
+ xdna->dev_handle = ndev;
+
/* Enable managed PCI device */
ret = pcim_enable_device(pdev);
if (ret) {
@@ -409,75 +436,60 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
pci_set_master(pdev);
- ret = aie4_request_firmware(ndev, &npufw, &certfw);
- if (ret)
- goto clear_master;
-
- ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
- aie4_release_firmware(ndev, npufw, certfw);
+ ret = aie4_load_fw(ndev, tbl);
if (ret)
- goto clear_master;
+ return ret;
ret = aie4_irq_init(xdna);
if (ret)
- goto clear_master;
+ return ret;
- ret = aie4_hw_start(xdna);
- if (ret)
- goto clear_master;
+ amdxdna_vbnv_init(xdna);
+ XDNA_DBG(xdna, "init finished");
return 0;
-
-clear_master:
- pci_clear_master(pdev);
-
- return ret;
}
-static void aie4_pcidev_fini(struct amdxdna_dev_hdl *ndev)
+static int aie4_pf_init(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev *xdna = ndev->aie.xdna;
- struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
-
- aie4_hw_stop(xdna);
-
- pci_clear_master(pdev);
-}
+ int ret;
-static void aie4_fini(struct amdxdna_dev *xdna)
-{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ ret = aie4m_pcidev_init(xdna);
+ if (ret)
+ return ret;
- aie4_sriov_stop(ndev);
- aie4_pcidev_fini(ndev);
+ return aie4_pf_hw_start(xdna->dev_handle);
}
-static int aie4_init(struct amdxdna_dev *xdna)
+static int aie4_vf_init(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev_hdl *ndev;
int ret;
- ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
- if (!ndev)
- return -ENOMEM;
+ ret = aie4m_pcidev_init(xdna);
+ if (ret)
+ return ret;
- ndev->priv = xdna->dev_info->dev_priv;
- ndev->aie.xdna = xdna;
- xdna->dev_handle = ndev;
+ return aie4_vf_hw_start(xdna->dev_handle);
+}
- ret = aie4_pcidev_init(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Setup PCI device failed, ret %d", ret);
- return ret;
- }
+static void aie4_pf_fini(struct amdxdna_dev *xdna)
+{
+ aie4_sriov_stop(xdna->dev_handle);
+ aie4_pf_hw_stop(xdna->dev_handle);
+}
- amdxdna_vbnv_init(xdna);
- XDNA_DBG(xdna, "aie4 init finished");
- return 0;
+static void aie4_vf_fini(struct amdxdna_dev *xdna)
+{
+ aie4_vf_hw_stop(xdna->dev_handle);
}
-const struct amdxdna_dev_ops aie4_ops = {
- .init = aie4_init,
- .fini = aie4_fini,
+const struct amdxdna_dev_ops aie4_pf_ops = {
+ .init = aie4_pf_init,
+ .fini = aie4_pf_fini,
.sriov_configure = aie4_sriov_configure,
};
+
+const struct amdxdna_dev_ops aie4_vf_ops = {
+ .init = aie4_vf_init,
+ .fini = aie4_vf_fini,
+};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index aa1495c3370b..cbf3424a4341 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -48,6 +48,7 @@ static inline int aie4_sriov_stop(struct amdxdna_dev_hdl *ndev)
}
#endif
-extern const struct amdxdna_dev_ops aie4_ops;
+extern const struct amdxdna_dev_ops aie4_pf_ops;
+extern const struct amdxdna_dev_ops aie4_vf_ops;
#endif /* _AIE4_PCI_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 1b08a08343cf..39ad081ac082 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -53,7 +53,9 @@ static const struct pci_device_id pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f2) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0B) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0C) },
{0}
};
@@ -65,7 +67,9 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
{ 0x17f0, 0x11, &dev_npu5_info },
{ 0x17f0, 0x20, &dev_npu6_info },
{ 0x17f2, 0x10, &dev_npu3_pf_info },
+ { 0x17f3, 0x10, &dev_npu3_vf_info },
{ 0x1B0B, 0x10, &dev_npu3_pf_info },
+ { 0x1B0C, 0x10, &dev_npu3_vf_info },
{0}
};
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index b1548cf16f59..caed11c09e55 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -167,6 +167,7 @@ struct amdxdna_client {
/* Add device info below */
extern const struct amdxdna_dev_info dev_npu1_info;
extern const struct amdxdna_dev_info dev_npu3_pf_info;
+extern const struct amdxdna_dev_info dev_npu3_vf_info;
extern const struct amdxdna_dev_info dev_npu4_info;
extern const struct amdxdna_dev_info dev_npu5_info;
extern const struct amdxdna_dev_info dev_npu6_info;
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
index acece0faddf2..6d5da779232b 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -64,6 +64,14 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
},
};
+static const struct amdxdna_dev_priv npu3_dev_vf_priv = {
+ /* vf device does not load firmware */
+ .mbox_bar = NPU3_MBOX_BAR,
+ .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
+ .mbox_info_off = NPU3_MBOX_INFO_OFF,
+ /* vf device does not have smu and psp */
+};
+
const struct amdxdna_dev_info dev_npu3_pf_info = {
.mbox_bar = NPU3_MBOX_BAR,
.sram_bar = NPU3_MBOX_BUFFER_BAR,
@@ -73,5 +81,15 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
.device_type = AMDXDNA_DEV_TYPE_PF,
.dev_priv = &npu3_dev_priv,
.fw_feature_tbl = npu3_fw_feature_table,
- .ops = &aie4_ops,
+ .ops = &aie4_pf_ops,
+};
+
+const struct amdxdna_dev_info dev_npu3_vf_info = {
+ .mbox_bar = NPU3_MBOX_BAR,
+ .sram_bar = NPU3_MBOX_BUFFER_BAR,
+ .default_vbnv = "RyzenAI-npu3-vf",
+ .device_type = AMDXDNA_DEV_TYPE_UMQ,
+ .dev_priv = &npu3_dev_vf_priv,
+ .fw_feature_tbl = npu3_fw_feature_table,
+ .ops = &aie4_vf_ops,
};
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 0b11e8e3ea5d..34212feee15c 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -30,6 +30,7 @@ extern "C" {
enum amdxdna_device_type {
AMDXDNA_DEV_TYPE_UNKNOWN = -1,
AMDXDNA_DEV_TYPE_KMQ = 0,
+ AMDXDNA_DEV_TYPE_UMQ = 1,
AMDXDNA_DEV_TYPE_PF = 2,
};
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF
2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
@ 2026-05-05 19:37 ` Mario Limonciello
0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 19:37 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
>
> Add basic device initialization support for AIE4 Virtual Functions (PCI
> device IDs 0x17F3 and 0x1B0C).
>
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/aie4_pci.c | 160 +++++++++++++-----------
> drivers/accel/amdxdna/aie4_pci.h | 3 +-
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 4 +
> drivers/accel/amdxdna/amdxdna_pci_drv.h | 1 +
> drivers/accel/amdxdna/npu3_regs.c | 20 ++-
> include/uapi/drm/amdxdna_accel.h | 1 +
> 6 files changed, 113 insertions(+), 76 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 87f80f804f91..a967e2db7ebd 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -196,8 +196,9 @@ static int aie4_mailbox_start(struct amdxdna_dev *xdna,
> return ret;
> }
>
> -static int aie4_mailbox_init(struct amdxdna_dev *xdna)
> +static int aie4_mailbox_init(struct amdxdna_dev_hdl *ndev)
> {
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> struct mailbox_info mbox_info;
> int ret;
>
> @@ -208,13 +209,13 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
> return aie4_mailbox_start(xdna, &mbox_info);
> }
>
> -static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
> +static void aie4_fw_stop(struct amdxdna_dev_hdl *ndev)
> {
> aie_psp_stop(ndev->aie.psp_hdl);
> aie_smu_fini(ndev->aie.smu_hdl);
> }
>
> -static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
> +static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
> {
> int ret;
>
> @@ -233,49 +234,49 @@ static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
> return ret;
> }
>
> -static int aie4_hw_start(struct amdxdna_dev *xdna)
> +static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
> {
> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> int ret;
>
> - ret = aie4_fw_load(ndev);
> + ret = aie4_fw_start(ndev);
> if (ret)
> return ret;
>
> - ret = aie4_mailbox_init(xdna);
> + ret = aie4_mailbox_init(ndev);
> if (ret)
> - goto fw_unload;
> + goto stop_fw;
>
> return 0;
>
> -fw_unload:
> - aie4_fw_unload(ndev);
> +stop_fw:
> + aie4_fw_stop(ndev);
>
> return ret;
> }
>
> -static void aie4_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
> +static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
> {
> - int ret;
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
>
> - /* No paired resume needed, fw is stateless */
> - ret = aie4_suspend_fw(ndev);
> - if (ret)
> - XDNA_ERR(ndev->aie.xdna, "suspend_fw failed, ret %d", ret);
> - else
> - XDNA_DBG(ndev->aie.xdna, "npu firmware suspended");
> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> + aie4_suspend_fw(ndev);
> + aie4_mailbox_fini(ndev);
> + aie4_fw_stop(ndev);
> }
>
> -static void aie4_hw_stop(struct amdxdna_dev *xdna)
> +static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
> {
> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> + return aie4_mailbox_init(ndev);
> +}
> +
> +static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
> +{
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
>
> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>
> - aie4_mgmt_fw_fini(ndev);
> aie4_mailbox_fini(ndev);
> -
> - aie4_fw_unload(ndev);
> }
>
> static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
> @@ -365,15 +366,41 @@ static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
> return 0;
> }
>
> -static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
> +static int aie4_load_fw(struct amdxdna_dev_hdl *ndev,
> + void __iomem *tbl[PCI_NUM_RESOURCES])
> +{
> + const struct firmware *npufw, *certfw;
> + int ret;
> +
> + if (!ndev->priv->npufw_path && !ndev->priv->certfw_path)
> + return 0;
> +
> + ret = aie4_request_firmware(ndev, &npufw, &certfw);
> + if (ret)
> + return ret;
> +
> + ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
> + aie4_release_firmware(ndev, npufw, certfw);
> +
> + return ret;
> +}
> +
> +static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
> {
> - struct amdxdna_dev *xdna = ndev->aie.xdna;
> struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> + struct amdxdna_dev_hdl *ndev;
> void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
> - const struct firmware *npufw, *certfw;
> unsigned long bars = 0;
> int ret, i;
>
> + ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
> + if (!ndev)
> + return -ENOMEM;
> +
> + ndev->priv = xdna->dev_info->dev_priv;
> + ndev->aie.xdna = xdna;
> + xdna->dev_handle = ndev;
> +
> /* Enable managed PCI device */
> ret = pcim_enable_device(pdev);
> if (ret) {
> @@ -409,75 +436,60 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>
> pci_set_master(pdev);
>
> - ret = aie4_request_firmware(ndev, &npufw, &certfw);
> - if (ret)
> - goto clear_master;
> -
> - ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
> - aie4_release_firmware(ndev, npufw, certfw);
> + ret = aie4_load_fw(ndev, tbl);
> if (ret)
> - goto clear_master;
> + return ret;
>
> ret = aie4_irq_init(xdna);
> if (ret)
> - goto clear_master;
> + return ret;
>
> - ret = aie4_hw_start(xdna);
> - if (ret)
> - goto clear_master;
> + amdxdna_vbnv_init(xdna);
> + XDNA_DBG(xdna, "init finished");
>
> return 0;
> -
> -clear_master:
> - pci_clear_master(pdev);
> -
> - return ret;
> }
>
> -static void aie4_pcidev_fini(struct amdxdna_dev_hdl *ndev)
> +static int aie4_pf_init(struct amdxdna_dev *xdna)
> {
> - struct amdxdna_dev *xdna = ndev->aie.xdna;
> - struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> -
> - aie4_hw_stop(xdna);
> -
> - pci_clear_master(pdev);
> -}
> + int ret;
>
> -static void aie4_fini(struct amdxdna_dev *xdna)
> -{
> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> + ret = aie4m_pcidev_init(xdna);
> + if (ret)
> + return ret;
>
> - aie4_sriov_stop(ndev);
> - aie4_pcidev_fini(ndev);
> + return aie4_pf_hw_start(xdna->dev_handle);
> }
>
> -static int aie4_init(struct amdxdna_dev *xdna)
> +static int aie4_vf_init(struct amdxdna_dev *xdna)
> {
> - struct amdxdna_dev_hdl *ndev;
> int ret;
>
> - ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
> - if (!ndev)
> - return -ENOMEM;
> + ret = aie4m_pcidev_init(xdna);
> + if (ret)
> + return ret;
>
> - ndev->priv = xdna->dev_info->dev_priv;
> - ndev->aie.xdna = xdna;
> - xdna->dev_handle = ndev;
> + return aie4_vf_hw_start(xdna->dev_handle);
> +}
>
> - ret = aie4_pcidev_init(ndev);
> - if (ret) {
> - XDNA_ERR(xdna, "Setup PCI device failed, ret %d", ret);
> - return ret;
> - }
> +static void aie4_pf_fini(struct amdxdna_dev *xdna)
> +{
> + aie4_sriov_stop(xdna->dev_handle);
> + aie4_pf_hw_stop(xdna->dev_handle);
> +}
>
> - amdxdna_vbnv_init(xdna);
> - XDNA_DBG(xdna, "aie4 init finished");
> - return 0;
> +static void aie4_vf_fini(struct amdxdna_dev *xdna)
> +{
> + aie4_vf_hw_stop(xdna->dev_handle);
> }
>
> -const struct amdxdna_dev_ops aie4_ops = {
> - .init = aie4_init,
> - .fini = aie4_fini,
> +const struct amdxdna_dev_ops aie4_pf_ops = {
> + .init = aie4_pf_init,
> + .fini = aie4_pf_fini,
> .sriov_configure = aie4_sriov_configure,
> };
> +
> +const struct amdxdna_dev_ops aie4_vf_ops = {
> + .init = aie4_vf_init,
> + .fini = aie4_vf_fini,
> +};
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index aa1495c3370b..cbf3424a4341 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -48,6 +48,7 @@ static inline int aie4_sriov_stop(struct amdxdna_dev_hdl *ndev)
> }
> #endif
>
> -extern const struct amdxdna_dev_ops aie4_ops;
> +extern const struct amdxdna_dev_ops aie4_pf_ops;
> +extern const struct amdxdna_dev_ops aie4_vf_ops;
>
> #endif /* _AIE4_PCI_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 1b08a08343cf..39ad081ac082 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -53,7 +53,9 @@ static const struct pci_device_id pci_ids[] = {
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1502) },
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f0) },
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f2) },
> + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x17f3) },
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0B) },
> + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1B0C) },
> {0}
> };
>
> @@ -65,7 +67,9 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
> { 0x17f0, 0x11, &dev_npu5_info },
> { 0x17f0, 0x20, &dev_npu6_info },
> { 0x17f2, 0x10, &dev_npu3_pf_info },
> + { 0x17f3, 0x10, &dev_npu3_vf_info },
> { 0x1B0B, 0x10, &dev_npu3_pf_info },
> + { 0x1B0C, 0x10, &dev_npu3_vf_info },
> {0}
> };
>
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index b1548cf16f59..caed11c09e55 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -167,6 +167,7 @@ struct amdxdna_client {
> /* Add device info below */
> extern const struct amdxdna_dev_info dev_npu1_info;
> extern const struct amdxdna_dev_info dev_npu3_pf_info;
> +extern const struct amdxdna_dev_info dev_npu3_vf_info;
> extern const struct amdxdna_dev_info dev_npu4_info;
> extern const struct amdxdna_dev_info dev_npu5_info;
> extern const struct amdxdna_dev_info dev_npu6_info;
> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
> index acece0faddf2..6d5da779232b 100644
> --- a/drivers/accel/amdxdna/npu3_regs.c
> +++ b/drivers/accel/amdxdna/npu3_regs.c
> @@ -64,6 +64,14 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
> },
> };
>
> +static const struct amdxdna_dev_priv npu3_dev_vf_priv = {
> + /* vf device does not load firmware */
> + .mbox_bar = NPU3_MBOX_BAR,
> + .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
> + .mbox_info_off = NPU3_MBOX_INFO_OFF,
> + /* vf device does not have smu and psp */
> +};
> +
> const struct amdxdna_dev_info dev_npu3_pf_info = {
> .mbox_bar = NPU3_MBOX_BAR,
> .sram_bar = NPU3_MBOX_BUFFER_BAR,
> @@ -73,5 +81,15 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
> .device_type = AMDXDNA_DEV_TYPE_PF,
> .dev_priv = &npu3_dev_priv,
> .fw_feature_tbl = npu3_fw_feature_table,
> - .ops = &aie4_ops,
> + .ops = &aie4_pf_ops,
> +};
> +
> +const struct amdxdna_dev_info dev_npu3_vf_info = {
> + .mbox_bar = NPU3_MBOX_BAR,
> + .sram_bar = NPU3_MBOX_BUFFER_BAR,
> + .default_vbnv = "RyzenAI-npu3-vf",
> + .device_type = AMDXDNA_DEV_TYPE_UMQ,
> + .dev_priv = &npu3_dev_vf_priv,
> + .fw_feature_tbl = npu3_fw_feature_table,
> + .ops = &aie4_vf_ops,
> };
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 0b11e8e3ea5d..34212feee15c 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -30,6 +30,7 @@ extern "C" {
> enum amdxdna_device_type {
> AMDXDNA_DEV_TYPE_UNKNOWN = -1,
> AMDXDNA_DEV_TYPE_KMQ = 0,
> + AMDXDNA_DEV_TYPE_UMQ = 1,
> AMDXDNA_DEV_TYPE_PF = 2,
> };
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 19:53 ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
` (3 subsequent siblings)
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue, Lizhi Hou
From: David Zhang <yidong.zhang@amd.com>
Send partition creation command to firmware during VF initialization.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie4_msg_priv.h | 21 +++++++++++
drivers/accel/amdxdna/aie4_pci.c | 52 ++++++++++++++++++++++++++-
drivers/accel/amdxdna/aie4_pci.h | 1 +
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 88463cc3a98a..cada53257921 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -13,6 +13,9 @@ enum aie4_msg_opcode {
AIE4_MSG_OP_CREATE_VFS = 0x20001,
AIE4_MSG_OP_DESTROY_VFS = 0x20002,
+
+ AIE4_MSG_OP_CREATE_PARTITION = 0x30001,
+ AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
};
enum aie4_msg_status {
@@ -46,4 +49,22 @@ struct aie4_msg_destroy_vfs_resp {
enum aie4_msg_status status;
} __packed;
+struct aie4_msg_create_partition_req {
+ __u32 partition_col_start;
+ __u32 partition_col_count;
+} __packed;
+
+struct aie4_msg_create_partition_resp {
+ enum aie4_msg_status status;
+ __u32 partition_id;
+} __packed;
+
+struct aie4_msg_destroy_partition_req {
+ __u32 partition_id;
+} __packed;
+
+struct aie4_msg_destroy_partition_resp {
+ enum aie4_msg_status status;
+} __packed;
+
#endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index a967e2db7ebd..13f5d45e388d 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -9,11 +9,16 @@
#include <linux/firmware.h>
#include <linux/sizes.h>
+#include "aie.h"
+#include "aie4_msg_priv.h"
#include "aie4_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
#include "amdxdna_pci_drv.h"
#define NO_IOHUB 0
#define PSP_NOTIFY_INTR 0xD007BE11
+#define AIE4_TOTAL_COLUMN 3
/*
* The management mailbox channel is allocated by firmware.
@@ -234,6 +239,36 @@ static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
return ret;
}
+static int aie4_partition_init(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE_MSG(aie4_msg_create_partition, AIE4_MSG_OP_CREATE_PARTITION);
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ int ret;
+
+ req.partition_col_start = 0;
+ req.partition_col_count = AIE4_TOTAL_COLUMN;
+ ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "partition init failed: %d", ret);
+ return ret;
+ }
+
+ ndev->partition_id = resp.partition_id;
+ return 0;
+}
+
+static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE_MSG(aie4_msg_destroy_partition, AIE4_MSG_OP_DESTROY_PARTITION);
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ int ret;
+
+ req.partition_id = ndev->partition_id;
+ ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+ if (ret)
+ XDNA_ERR(xdna, "partition fini failed: %d", ret);
+}
+
static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
{
int ret;
@@ -267,7 +302,21 @@ static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
{
- return aie4_mailbox_init(ndev);
+ int ret;
+
+ ret = aie4_mailbox_init(ndev);
+ if (ret)
+ return ret;
+
+ ret = aie4_partition_init(ndev);
+ if (ret)
+ goto mailbox_fini;
+
+ return 0;
+
+mailbox_fini:
+ aie4_mailbox_fini(ndev);
+ return ret;
}
static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
@@ -276,6 +325,7 @@ static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ aie4_partition_fini(ndev);
aie4_mailbox_fini(ndev);
}
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index cbf3424a4341..620fb5bd23e4 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -31,6 +31,7 @@ struct amdxdna_dev_hdl {
void __iomem *rbuf_base;
struct mailbox *mbox;
+ u32 partition_id;
};
/* aie4_message.c */
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition
2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
@ 2026-05-05 19:53 ` Mario Limonciello
0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 19:53 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
>
> Send partition creation command to firmware during VF initialization.
>
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> drivers/accel/amdxdna/aie4_msg_priv.h | 21 +++++++++++
> drivers/accel/amdxdna/aie4_pci.c | 52 ++++++++++++++++++++++++++-
> drivers/accel/amdxdna/aie4_pci.h | 1 +
> 3 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 88463cc3a98a..cada53257921 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -13,6 +13,9 @@ enum aie4_msg_opcode {
>
> AIE4_MSG_OP_CREATE_VFS = 0x20001,
> AIE4_MSG_OP_DESTROY_VFS = 0x20002,
> +
> + AIE4_MSG_OP_CREATE_PARTITION = 0x30001,
> + AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
> };
>
> enum aie4_msg_status {
> @@ -46,4 +49,22 @@ struct aie4_msg_destroy_vfs_resp {
> enum aie4_msg_status status;
> } __packed;
>
> +struct aie4_msg_create_partition_req {
> + __u32 partition_col_start;
> + __u32 partition_col_count;
> +} __packed;
> +
> +struct aie4_msg_create_partition_resp {
> + enum aie4_msg_status status;
> + __u32 partition_id;
> +} __packed;
> +
> +struct aie4_msg_destroy_partition_req {
> + __u32 partition_id;
> +} __packed;
> +
> +struct aie4_msg_destroy_partition_resp {
> + enum aie4_msg_status status;
> +} __packed;
> +
> #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index a967e2db7ebd..13f5d45e388d 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -9,11 +9,16 @@
> #include <linux/firmware.h>
> #include <linux/sizes.h>
>
> +#include "aie.h"
> +#include "aie4_msg_priv.h"
> #include "aie4_pci.h"
> +#include "amdxdna_mailbox.h"
> +#include "amdxdna_mailbox_helper.h"
> #include "amdxdna_pci_drv.h"
>
> #define NO_IOHUB 0
> #define PSP_NOTIFY_INTR 0xD007BE11
> +#define AIE4_TOTAL_COLUMN 3
>
> /*
> * The management mailbox channel is allocated by firmware.
> @@ -234,6 +239,36 @@ static int aie4_fw_start(struct amdxdna_dev_hdl *ndev)
> return ret;
> }
>
> +static int aie4_partition_init(struct amdxdna_dev_hdl *ndev)
> +{
> + DECLARE_AIE_MSG(aie4_msg_create_partition, AIE4_MSG_OP_CREATE_PARTITION);
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> + int ret;
> +
> + req.partition_col_start = 0;
> + req.partition_col_count = AIE4_TOTAL_COLUMN;
> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> + if (ret) {
> + XDNA_ERR(xdna, "partition init failed: %d", ret);
> + return ret;
> + }
> +
> + ndev->partition_id = resp.partition_id;
> + return 0;
> +}
> +
> +static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
> +{
> + DECLARE_AIE_MSG(aie4_msg_destroy_partition, AIE4_MSG_OP_DESTROY_PARTITION);
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> + int ret;
> +
> + req.partition_id = ndev->partition_id;
> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> + if (ret)
> + XDNA_ERR(xdna, "partition fini failed: %d", ret);
> +}
> +
> static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
> {
> int ret;
> @@ -267,7 +302,21 @@ static void aie4_pf_hw_stop(struct amdxdna_dev_hdl *ndev)
>
> static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
> {
> - return aie4_mailbox_init(ndev);
> + int ret;
> +
> + ret = aie4_mailbox_init(ndev);
> + if (ret)
> + return ret;
> +
> + ret = aie4_partition_init(ndev);
> + if (ret)
> + goto mailbox_fini;
> +
> + return 0;
> +
> +mailbox_fini:
> + aie4_mailbox_fini(ndev);
> + return ret;
> }
>
> static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
> @@ -276,6 +325,7 @@ static void aie4_vf_hw_stop(struct amdxdna_dev_hdl *ndev)
>
> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>
> + aie4_partition_fini(ndev);
> aie4_mailbox_fini(ndev);
> }
>
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index cbf3424a4341..620fb5bd23e4 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -31,6 +31,7 @@ struct amdxdna_dev_hdl {
> void __iomem *rbuf_base;
>
> struct mailbox *mbox;
> + u32 partition_id;
> };
>
> /* aie4_message.c */
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 1/6] accel/amdxdna: Add initial support for AIE4 VF Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 2/6] accel/amdxdna: Init AIE4 device partition Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 20:28 ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
` (2 subsequent siblings)
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue, Lizhi Hou
From: David Zhang <yidong.zhang@amd.com>
Implement hardware context creation and destruction for AIE4 VF devices.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/Makefile | 1 +
drivers/accel/amdxdna/aie4_ctx.c | 258 ++++++++++++++++++++++++
drivers/accel/amdxdna/aie4_host_queue.h | 22 ++
drivers/accel/amdxdna/aie4_msg_priv.h | 29 +++
drivers/accel/amdxdna/aie4_pci.c | 5 +
drivers/accel/amdxdna/aie4_pci.h | 24 +++
drivers/accel/amdxdna/amdxdna_ctx.c | 6 +
drivers/accel/amdxdna/amdxdna_ctx.h | 3 +
include/uapi/drm/amdxdna_accel.h | 1 +
9 files changed, 349 insertions(+)
create mode 100644 drivers/accel/amdxdna/aie4_ctx.c
create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index d7720c8c8a98..05cce0a38692 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -10,6 +10,7 @@ amdxdna-y := \
aie2_pci.o \
aie2_pm.o \
aie2_solver.o \
+ aie4_ctx.o \
aie4_message.o \
aie4_pci.o \
amdxdna_cbuf.o \
diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
new file mode 100644
index 000000000000..84ac706d0ffb
--- /dev/null
+++ b/drivers/accel/amdxdna/aie4_ctx.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+
+#include "aie.h"
+#include "aie4_host_queue.h"
+#include "aie4_msg_priv.h"
+#include "aie4_pci.h"
+#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_mailbox_helper.h"
+#include "amdxdna_pci_drv.h"
+
+static irqreturn_t cert_comp_isr(int irq, void *p)
+{
+ struct cert_comp *cert_comp = p;
+
+ wake_up_all(&cert_comp->waitq);
+ return IRQ_HANDLED;
+}
+
+static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx)
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ struct cert_comp *cert_comp;
+ int ret;
+
+ guard(mutex)(&ndev->cert_comp_lock);
+
+ cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx);
+ if (cert_comp) {
+ kref_get(&cert_comp->kref);
+ return cert_comp;
+ }
+
+ cert_comp = kzalloc_obj(*cert_comp);
+ if (!cert_comp)
+ return NULL;
+
+ cert_comp->ndev = ndev;
+ cert_comp->msix_idx = msix_idx;
+ init_waitqueue_head(&cert_comp->waitq);
+ kref_init(&cert_comp->kref);
+
+ ret = pci_irq_vector(pdev, cert_comp->msix_idx);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret);
+ goto free_cert_comp;
+ }
+ cert_comp->irq = ret;
+
+ ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp);
+ if (ret) {
+ XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret);
+ goto free_cert_comp;
+ }
+
+ ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL));
+ if (ret) {
+ XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret);
+ goto free_irq;
+ }
+
+ return cert_comp;
+
+free_irq:
+ free_irq(cert_comp->irq, cert_comp);
+free_cert_comp:
+ kfree(cert_comp);
+ return NULL;
+}
+
+static void cert_comp_release(struct kref *kref)
+{
+ struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref);
+ struct amdxdna_dev_hdl *ndev = cert_comp->ndev;
+
+ drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock));
+
+ xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx);
+ free_irq(cert_comp->irq, cert_comp);
+ kfree(cert_comp);
+}
+
+static void aie4_put_cert_comp(struct cert_comp *cert_comp)
+{
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = cert_comp->ndev;
+ guard(mutex)(&ndev->cert_comp_lock);
+ kref_put(&cert_comp->kref, cert_comp_release);
+}
+
+static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id)
+{
+ DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT);
+
+ req.hw_context_id = hw_context_id;
+ return aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+}
+
+static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx)
+{
+ DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT);
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_hwctx_priv *priv = hwctx->priv;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (!ndev->partition_id || !hwctx->num_tiles) {
+ XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d",
+ ndev->partition_id, hwctx->num_tiles);
+ return -EINVAL;
+ }
+
+ req.partition_id = ndev->partition_id;
+ req.request_num_tiles = hwctx->num_tiles;
+ req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) |
+ FIELD_PREP(AIE4_MSG_PASID_VLD, 1);
+ req.priority_band = hwctx->qos.priority;
+
+ req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+ req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
+
+ XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]",
+ req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low);
+
+ ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+ if (ret) {
+ XDNA_ERR(xdna, "create ctx failed: %d", ret);
+ return ret;
+ }
+
+ XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d",
+ resp.job_complete_msix_idx,
+ resp.hw_context_id,
+ resp.doorbell_offset);
+
+ /* setup interrupt completion per msix index */
+ priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx);
+ if (!priv->cert_comp) {
+ aie4_msg_destroy_context(ndev, resp.hw_context_id);
+ return -EINVAL;
+ }
+
+ priv->hw_ctx_id = resp.hw_context_id;
+ hwctx->doorbell_offset = resp.doorbell_offset;
+
+ return 0;
+}
+
+static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_hwctx_priv *priv = hwctx->priv;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ aie4_msg_destroy_context(ndev, priv->hw_ctx_id);
+ aie4_put_cert_comp(priv->cert_comp);
+}
+
+static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx)
+{
+ if (hwctx->priv && hwctx->priv->umq_bo)
+ amdxdna_gem_put_obj(hwctx->priv->umq_bo);
+}
+
+static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_hwctx_priv *priv = hwctx->priv;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *umq_bo;
+ struct host_queue_header *qhdr;
+ int ret;
+
+ umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE);
+ if (!umq_bo) {
+ XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl);
+ return -ENOENT;
+ }
+ if (umq_bo->mem.size < sizeof(*qhdr)) {
+ XDNA_ERR(xdna, "umq_bo size is too small");
+ ret = -EINVAL;
+ goto put_umq_bo;
+ }
+
+ /* get kva address for host queue read index and write index */
+ qhdr = amdxdna_gem_vmap(umq_bo);
+ if (!qhdr) {
+ ret = -ENOMEM;
+ goto put_umq_bo;
+ }
+
+ priv->umq_bo = umq_bo;
+ priv->umq_read_index = &qhdr->read_index;
+ priv->umq_write_index = &qhdr->write_index;
+
+ return 0;
+
+put_umq_bo:
+ amdxdna_gem_put_obj(umq_bo);
+ return ret;
+}
+
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx_priv *priv;
+ int ret;
+
+ priv = kzalloc_obj(*priv);
+ if (!priv)
+ return -ENOMEM;
+ hwctx->priv = priv;
+
+ ret = aie4_hwctx_umq_init(hwctx);
+ if (ret)
+ goto free_priv;
+
+ ret = aie4_hwctx_create(hwctx);
+ if (ret)
+ goto umq_fini;
+
+ XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
+ return 0;
+
+umq_fini:
+ aie4_hwctx_umq_fini(hwctx);
+free_priv:
+ kfree(priv);
+ hwctx->priv = NULL;
+ return ret;
+}
+
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
+{
+ aie4_hwctx_destroy(hwctx);
+ aie4_hwctx_umq_fini(hwctx);
+ kfree(hwctx->priv);
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
new file mode 100644
index 000000000000..eb6a38dfb53e
--- /dev/null
+++ b/drivers/accel/amdxdna/aie4_host_queue.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE4_HOST_QUEUE_H_
+#define _AIE4_HOST_QUEUE_H_
+
+#include <linux/types.h>
+
+struct host_queue_header {
+ __u64 read_index;
+ struct {
+ __u16 major;
+ __u16 minor;
+ } version;
+ __u32 capacity; /* Queue capacity, must be power of two. */
+ __u64 write_index;
+ __u64 data_address; /* The xdna dev addr for payload. */
+};
+
+#endif /* _AIE4_HOST_QUEUE_H_ */
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index cada53257921..7faa01ca3436 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -16,6 +16,8 @@ enum aie4_msg_opcode {
AIE4_MSG_OP_CREATE_PARTITION = 0x30001,
AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
+ AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003,
+ AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004,
};
enum aie4_msg_status {
@@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp {
enum aie4_msg_status status;
} __packed;
+struct aie4_msg_create_hw_context_req {
+ __u32 partition_id;
+ __u32 request_num_tiles;
+ __u32 hsa_addr_high;
+ __u32 hsa_addr_low;
+#define AIE4_MSG_PASID GENMASK(19, 0)
+#define AIE4_MSG_PASID_VLD GENMASK(31, 31)
+ __u32 pasid;
+ __u32 priority_band;
+} __packed;
+
+struct aie4_msg_create_hw_context_resp {
+ enum aie4_msg_status status;
+ __u32 hw_context_id;
+ __u32 doorbell_offset;
+ __u32 job_complete_msix_idx;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_req {
+ __u32 hw_context_id;
+ __u32 resvd1;
+} __packed;
+
+struct aie4_msg_destroy_hw_context_resp {
+ enum aie4_msg_status status;
+} __packed;
+
#endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 13f5d45e388d..3be9066b7178 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
ndev->aie.xdna = xdna;
xdna->dev_handle = ndev;
+ xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC);
+ mutex_init(&ndev->cert_comp_lock);
+
/* Enable managed PCI device */
ret = pcim_enable_device(pdev);
if (ret) {
@@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = {
const struct amdxdna_dev_ops aie4_vf_ops = {
.init = aie4_vf_init,
.fini = aie4_vf_fini,
+ .hwctx_init = aie4_hwctx_init,
+ .hwctx_fini = aie4_hwctx_fini,
};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 620fb5bd23e4..6103007e6d2f 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -13,6 +13,23 @@
#include "aie.h"
#include "amdxdna_mailbox.h"
+struct cert_comp {
+ struct amdxdna_dev_hdl *ndev;
+ u32 msix_idx;
+ int irq;
+ struct kref kref;
+ wait_queue_head_t waitq;
+};
+
+struct amdxdna_hwctx_priv {
+ struct amdxdna_gem_obj *umq_bo;
+ u64 *umq_read_index;
+ u64 *umq_write_index;
+
+ struct cert_comp *cert_comp;
+ u32 hw_ctx_id;
+};
+
struct amdxdna_dev_priv {
const char *npufw_path;
const char *certfw_path;
@@ -32,11 +49,18 @@ struct amdxdna_dev_hdl {
struct mailbox *mbox;
u32 partition_id;
+
+ struct xarray cert_comp_xa; /* device level indexed by msix id */
+ struct mutex cert_comp_lock; /* protects cert_comp operations*/
};
/* aie4_message.c */
int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
+/* aie4_ctx.c */
+int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
+void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+
/* aie4_sriov.c */
#if IS_ENABLED(CONFIG_PCI_IOV)
int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 2c2c21992c87..b5ad60d4b734 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
if (args->ext || args->ext_flags)
return -EINVAL;
+ if (!xdna->dev_info->ops->hwctx_init)
+ return -EOPNOTSUPP;
+
hwctx = kzalloc_obj(*hwctx);
if (!hwctx)
return -ENOMEM;
@@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
hwctx->client = client;
hwctx->fw_ctx_id = -1;
hwctx->num_tiles = args->num_tiles;
+ hwctx->umq_bo_hdl = args->umq_bo;
+ hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET;
hwctx->mem_size = args->mem_size;
hwctx->max_opc = args->max_opc;
@@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
args->handle = hwctx->id;
args->syncobj_handle = hwctx->syncobj_hdl;
+ args->umq_doorbell = hwctx->doorbell_offset;
atomic64_set(&hwctx->job_submit_cnt, 0);
atomic64_set(&hwctx->job_free_cnt, 0);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 355798687376..c5622718b4d5 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv;
enum ert_cmd_opcode {
ERT_START_CU = 0,
+ ERT_START_DPU = 18,
ERT_CMD_CHAIN = 19,
ERT_START_NPU = 20,
ERT_START_NPU_PREEMPT = 21,
@@ -105,6 +106,8 @@ struct amdxdna_hwctx {
u32 *col_list;
u32 start_col;
u32 num_col;
+ u32 umq_bo_hdl;
+ u32 doorbell_offset;
u32 num_unused_col;
struct amdxdna_qos_info qos;
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 34212feee15c..ad9b33dd7b13 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -18,6 +18,7 @@ extern "C" {
#define AMDXDNA_INVALID_CTX_HANDLE 0
#define AMDXDNA_INVALID_BO_HANDLE 0
#define AMDXDNA_INVALID_FENCE_HANDLE 0
+#define AMDXDNA_INVALID_DOORBELL_OFFSET (~0U)
/*
* Define hardware context priority
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy
2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
@ 2026-05-05 20:28 ` Mario Limonciello
0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:28 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
>
> Implement hardware context creation and destruction for AIE4 VF devices.
>
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/Makefile | 1 +
> drivers/accel/amdxdna/aie4_ctx.c | 258 ++++++++++++++++++++++++
> drivers/accel/amdxdna/aie4_host_queue.h | 22 ++
> drivers/accel/amdxdna/aie4_msg_priv.h | 29 +++
> drivers/accel/amdxdna/aie4_pci.c | 5 +
> drivers/accel/amdxdna/aie4_pci.h | 24 +++
> drivers/accel/amdxdna/amdxdna_ctx.c | 6 +
> drivers/accel/amdxdna/amdxdna_ctx.h | 3 +
> include/uapi/drm/amdxdna_accel.h | 1 +
> 9 files changed, 349 insertions(+)
> create mode 100644 drivers/accel/amdxdna/aie4_ctx.c
> create mode 100644 drivers/accel/amdxdna/aie4_host_queue.h
>
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index d7720c8c8a98..05cce0a38692 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -10,6 +10,7 @@ amdxdna-y := \
> aie2_pci.o \
> aie2_pm.o \
> aie2_solver.o \
> + aie4_ctx.o \
> aie4_message.o \
> aie4_pci.o \
> amdxdna_cbuf.o \
> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
> new file mode 100644
> index 000000000000..84ac706d0ffb
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie4_ctx.c
> @@ -0,0 +1,258 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_gem.h>
> +#include <drm/drm_gem_shmem_helper.h>
> +#include <drm/drm_print.h>
> +#include <drm/gpu_scheduler.h>
> +#include <linux/types.h>
> +
> +#include "aie.h"
> +#include "aie4_host_queue.h"
> +#include "aie4_msg_priv.h"
> +#include "aie4_pci.h"
> +#include "amdxdna_ctx.h"
> +#include "amdxdna_gem.h"
> +#include "amdxdna_mailbox.h"
> +#include "amdxdna_mailbox_helper.h"
> +#include "amdxdna_pci_drv.h"
> +
> +static irqreturn_t cert_comp_isr(int irq, void *p)
> +{
> + struct cert_comp *cert_comp = p;
> +
> + wake_up_all(&cert_comp->waitq);
> + return IRQ_HANDLED;
> +}
> +
> +static struct cert_comp *aie4_lookup_cert_comp(struct amdxdna_dev_hdl *ndev, u32 msix_idx)
> +{
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> + struct cert_comp *cert_comp;
> + int ret;
> +
> + guard(mutex)(&ndev->cert_comp_lock);
> +
> + cert_comp = xa_load(&ndev->cert_comp_xa, msix_idx);
> + if (cert_comp) {
> + kref_get(&cert_comp->kref);
> + return cert_comp;
> + }
> +
> + cert_comp = kzalloc_obj(*cert_comp);
> + if (!cert_comp)
> + return NULL;
> +
> + cert_comp->ndev = ndev;
> + cert_comp->msix_idx = msix_idx;
> + init_waitqueue_head(&cert_comp->waitq);
> + kref_init(&cert_comp->kref);
> +
> + ret = pci_irq_vector(pdev, cert_comp->msix_idx);
> + if (ret < 0) {
> + XDNA_ERR(xdna, "MSI-X idx %u is invalid, ret:%d", msix_idx, ret);
> + goto free_cert_comp;
> + }
> + cert_comp->irq = ret;
> +
> + ret = request_irq(cert_comp->irq, cert_comp_isr, 0, "xdna_hsa", cert_comp);
> + if (ret) {
> + XDNA_ERR(xdna, "request irq %d failed %d", cert_comp->irq, ret);
> + goto free_cert_comp;
> + }
> +
> + ret = xa_err(xa_store(&ndev->cert_comp_xa, msix_idx, cert_comp, GFP_KERNEL));
> + if (ret) {
> + XDNA_ERR(xdna, "store cert_comp for msix idx %d failed %d", msix_idx, ret);
> + goto free_irq;
> + }
> +
> + return cert_comp;
> +
> +free_irq:
> + free_irq(cert_comp->irq, cert_comp);
> +free_cert_comp:
> + kfree(cert_comp);
> + return NULL;
> +}
> +
> +static void cert_comp_release(struct kref *kref)
> +{
> + struct cert_comp *cert_comp = container_of(kref, struct cert_comp, kref);
> + struct amdxdna_dev_hdl *ndev = cert_comp->ndev;
> +
> + drm_WARN_ON(&ndev->aie.xdna->ddev, !mutex_is_locked(&ndev->cert_comp_lock));
> +
> + xa_erase(&ndev->cert_comp_xa, cert_comp->msix_idx);
> + free_irq(cert_comp->irq, cert_comp);
> + kfree(cert_comp);
> +}
> +
> +static void aie4_put_cert_comp(struct cert_comp *cert_comp)
> +{
> + struct amdxdna_dev_hdl *ndev;
> +
> + ndev = cert_comp->ndev;
> + guard(mutex)(&ndev->cert_comp_lock);
> + kref_put(&cert_comp->kref, cert_comp_release);
> +}
> +
> +static int aie4_msg_destroy_context(struct amdxdna_dev_hdl *ndev, u32 hw_context_id)
> +{
> + DECLARE_AIE_MSG(aie4_msg_destroy_hw_context, AIE4_MSG_OP_DESTROY_HW_CONTEXT);
> +
> + req.hw_context_id = hw_context_id;
> + return aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> +}
> +
> +static int aie4_hwctx_create(struct amdxdna_hwctx *hwctx)
> +{
> + DECLARE_AIE_MSG(aie4_msg_create_hw_context, AIE4_MSG_OP_CREATE_HW_CONTEXT);
> + struct amdxdna_client *client = hwctx->client;
> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> + int ret;
> +
> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> + if (!ndev->partition_id || !hwctx->num_tiles) {
> + XDNA_ERR(xdna, "invalid request partition_id %d, num_tiles %d",
> + ndev->partition_id, hwctx->num_tiles);
> + return -EINVAL;
> + }
> +
> + req.partition_id = ndev->partition_id;
> + req.request_num_tiles = hwctx->num_tiles;
> + req.pasid = FIELD_PREP(AIE4_MSG_PASID, client->pasid) |
> + FIELD_PREP(AIE4_MSG_PASID_VLD, 1);
> + req.priority_band = hwctx->qos.priority;
> +
> + req.hsa_addr_high = upper_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
> + req.hsa_addr_low = lower_32_bits(amdxdna_gem_dev_addr(priv->umq_bo));
> +
> + XDNA_DBG(xdna, "pasid 0x%x, num_tiles %d, hsa[0x%x 0x%x]",
> + req.pasid, req.request_num_tiles, req.hsa_addr_high, req.hsa_addr_low);
> +
> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> + if (ret) {
> + XDNA_ERR(xdna, "create ctx failed: %d", ret);
> + return ret;
> + }
> +
> + XDNA_DBG(xdna, "resp msix: %d, ctx id: %d, doorbell: %d",
> + resp.job_complete_msix_idx,
> + resp.hw_context_id,
> + resp.doorbell_offset);
> +
> + /* setup interrupt completion per msix index */
> + priv->cert_comp = aie4_lookup_cert_comp(ndev, resp.job_complete_msix_idx);
> + if (!priv->cert_comp) {
> + aie4_msg_destroy_context(ndev, resp.hw_context_id);
> + return -EINVAL;
> + }
> +
> + priv->hw_ctx_id = resp.hw_context_id;
> + hwctx->doorbell_offset = resp.doorbell_offset;
> +
> + return 0;
> +}
> +
> +static void aie4_hwctx_destroy(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_client *client = hwctx->client;
> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
> + struct amdxdna_dev *xdna = client->xdna;
> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +
> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> + aie4_msg_destroy_context(ndev, priv->hw_ctx_id);
> + aie4_put_cert_comp(priv->cert_comp);
> +}
> +
> +static void aie4_hwctx_umq_fini(struct amdxdna_hwctx *hwctx)
> +{
> + if (hwctx->priv && hwctx->priv->umq_bo)
> + amdxdna_gem_put_obj(hwctx->priv->umq_bo);
> +}
> +
> +static int aie4_hwctx_umq_init(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + struct amdxdna_gem_obj *umq_bo;
> + struct host_queue_header *qhdr;
> + int ret;
> +
> + umq_bo = amdxdna_gem_get_obj(hwctx->client, hwctx->umq_bo_hdl, AMDXDNA_BO_SHARE);
> + if (!umq_bo) {
> + XDNA_ERR(xdna, "cannot find umq_bo handle %d", hwctx->umq_bo_hdl);
> + return -ENOENT;
> + }
> + if (umq_bo->mem.size < sizeof(*qhdr)) {
> + XDNA_ERR(xdna, "umq_bo size is too small");
> + ret = -EINVAL;
> + goto put_umq_bo;
> + }
> +
> + /* get kva address for host queue read index and write index */
> + qhdr = amdxdna_gem_vmap(umq_bo);
> + if (!qhdr) {
> + ret = -ENOMEM;
> + goto put_umq_bo;
> + }
> +
> + priv->umq_bo = umq_bo;
> + priv->umq_read_index = &qhdr->read_index;
> + priv->umq_write_index = &qhdr->write_index;
> +
> + return 0;
> +
> +put_umq_bo:
> + amdxdna_gem_put_obj(umq_bo);
> + return ret;
> +}
> +
> +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_client *client = hwctx->client;
> + struct amdxdna_dev *xdna = client->xdna;
> + struct amdxdna_hwctx_priv *priv;
> + int ret;
> +
> + priv = kzalloc_obj(*priv);
> + if (!priv)
> + return -ENOMEM;
> + hwctx->priv = priv;
> +
> + ret = aie4_hwctx_umq_init(hwctx);
> + if (ret)
> + goto free_priv;
> +
> + ret = aie4_hwctx_create(hwctx);
> + if (ret)
> + goto umq_fini;
> +
> + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
> + return 0;
> +
> +umq_fini:
> + aie4_hwctx_umq_fini(hwctx);
> +free_priv:
> + kfree(priv);
> + hwctx->priv = NULL;
> + return ret;
> +}
> +
> +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
> +{
> + aie4_hwctx_destroy(hwctx);
> + aie4_hwctx_umq_fini(hwctx);
> + kfree(hwctx->priv);
> +}
> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
> new file mode 100644
> index 000000000000..eb6a38dfb53e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AIE4_HOST_QUEUE_H_
> +#define _AIE4_HOST_QUEUE_H_
> +
> +#include <linux/types.h>
> +
> +struct host_queue_header {
> + __u64 read_index;
> + struct {
> + __u16 major;
> + __u16 minor;
> + } version;
> + __u32 capacity; /* Queue capacity, must be power of two. */
> + __u64 write_index;
> + __u64 data_address; /* The xdna dev addr for payload. */
> +};
> +
> +#endif /* _AIE4_HOST_QUEUE_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index cada53257921..7faa01ca3436 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -16,6 +16,8 @@ enum aie4_msg_opcode {
>
> AIE4_MSG_OP_CREATE_PARTITION = 0x30001,
> AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
> + AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003,
> + AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004,
> };
>
> enum aie4_msg_status {
> @@ -67,4 +69,31 @@ struct aie4_msg_destroy_partition_resp {
> enum aie4_msg_status status;
> } __packed;
>
> +struct aie4_msg_create_hw_context_req {
> + __u32 partition_id;
> + __u32 request_num_tiles;
> + __u32 hsa_addr_high;
> + __u32 hsa_addr_low;
> +#define AIE4_MSG_PASID GENMASK(19, 0)
> +#define AIE4_MSG_PASID_VLD GENMASK(31, 31)
> + __u32 pasid;
> + __u32 priority_band;
> +} __packed;
> +
> +struct aie4_msg_create_hw_context_resp {
> + enum aie4_msg_status status;
> + __u32 hw_context_id;
> + __u32 doorbell_offset;
> + __u32 job_complete_msix_idx;
> +} __packed;
> +
> +struct aie4_msg_destroy_hw_context_req {
> + __u32 hw_context_id;
> + __u32 resvd1;
> +} __packed;
> +
> +struct aie4_msg_destroy_hw_context_resp {
> + enum aie4_msg_status status;
> +} __packed;
> +
> #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 13f5d45e388d..3be9066b7178 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -451,6 +451,9 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
> ndev->aie.xdna = xdna;
> xdna->dev_handle = ndev;
>
> + xa_init_flags(&ndev->cert_comp_xa, XA_FLAGS_ALLOC);
> + mutex_init(&ndev->cert_comp_lock);
> +
> /* Enable managed PCI device */
> ret = pcim_enable_device(pdev);
> if (ret) {
> @@ -542,4 +545,6 @@ const struct amdxdna_dev_ops aie4_pf_ops = {
> const struct amdxdna_dev_ops aie4_vf_ops = {
> .init = aie4_vf_init,
> .fini = aie4_vf_fini,
> + .hwctx_init = aie4_hwctx_init,
> + .hwctx_fini = aie4_hwctx_fini,
> };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 620fb5bd23e4..6103007e6d2f 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -13,6 +13,23 @@
> #include "aie.h"
> #include "amdxdna_mailbox.h"
>
> +struct cert_comp {
> + struct amdxdna_dev_hdl *ndev;
> + u32 msix_idx;
> + int irq;
> + struct kref kref;
> + wait_queue_head_t waitq;
> +};
> +
> +struct amdxdna_hwctx_priv {
> + struct amdxdna_gem_obj *umq_bo;
> + u64 *umq_read_index;
> + u64 *umq_write_index;
> +
> + struct cert_comp *cert_comp;
> + u32 hw_ctx_id;
> +};
> +
> struct amdxdna_dev_priv {
> const char *npufw_path;
> const char *certfw_path;
> @@ -32,11 +49,18 @@ struct amdxdna_dev_hdl {
>
> struct mailbox *mbox;
> u32 partition_id;
> +
> + struct xarray cert_comp_xa; /* device level indexed by msix id */
> + struct mutex cert_comp_lock; /* protects cert_comp operations*/
> };
>
> /* aie4_message.c */
> int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>
> +/* aie4_ctx.c */
> +int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
> +void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
> +
> /* aie4_sriov.c */
> #if IS_ENABLED(CONFIG_PCI_IOV)
> int aie4_sriov_configure(struct amdxdna_dev *xdna, int num_vfs);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index 2c2c21992c87..b5ad60d4b734 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -207,6 +207,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
> if (args->ext || args->ext_flags)
> return -EINVAL;
>
> + if (!xdna->dev_info->ops->hwctx_init)
> + return -EOPNOTSUPP;
> +
> hwctx = kzalloc_obj(*hwctx);
> if (!hwctx)
> return -ENOMEM;
> @@ -220,6 +223,8 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
> hwctx->client = client;
> hwctx->fw_ctx_id = -1;
> hwctx->num_tiles = args->num_tiles;
> + hwctx->umq_bo_hdl = args->umq_bo;
> + hwctx->doorbell_offset = AMDXDNA_INVALID_DOORBELL_OFFSET;
> hwctx->mem_size = args->mem_size;
> hwctx->max_opc = args->max_opc;
>
> @@ -252,6 +257,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
>
> args->handle = hwctx->id;
> args->syncobj_handle = hwctx->syncobj_hdl;
> + args->umq_doorbell = hwctx->doorbell_offset;
>
> atomic64_set(&hwctx->job_submit_cnt, 0);
> atomic64_set(&hwctx->job_free_cnt, 0);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index 355798687376..c5622718b4d5 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -14,6 +14,7 @@ struct amdxdna_hwctx_priv;
>
> enum ert_cmd_opcode {
> ERT_START_CU = 0,
> + ERT_START_DPU = 18,
> ERT_CMD_CHAIN = 19,
> ERT_START_NPU = 20,
> ERT_START_NPU_PREEMPT = 21,
> @@ -105,6 +106,8 @@ struct amdxdna_hwctx {
> u32 *col_list;
> u32 start_col;
> u32 num_col;
> + u32 umq_bo_hdl;
> + u32 doorbell_offset;
> u32 num_unused_col;
>
> struct amdxdna_qos_info qos;
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 34212feee15c..ad9b33dd7b13 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -18,6 +18,7 @@ extern "C" {
> #define AMDXDNA_INVALID_CTX_HANDLE 0
> #define AMDXDNA_INVALID_BO_HANDLE 0
> #define AMDXDNA_INVALID_FENCE_HANDLE 0
> +#define AMDXDNA_INVALID_DOORBELL_OFFSET (~0U)
>
> /*
> * Define hardware context priority
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
` (2 preceding siblings ...)
2026-05-05 16:09 ` [PATCH V1 3/6] accel/amdxdna: Add AIE4 VF hardware context create and destroy Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 20:31 ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue, Lizhi Hou
From: David Zhang <yidong.zhang@amd.com>
Expose the command doorbell register to userspace on a per-hardware
context basis, enabling applications to notify the firmware of pending
commands via doorbell writes.
Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
completion of individual commands.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++
drivers/accel/amdxdna/aie4_host_queue.h | 2 +
drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++
drivers/accel/amdxdna/aie4_pci.h | 3 +
drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++
drivers/accel/amdxdna/amdxdna_ctx.h | 4 +-
drivers/accel/amdxdna/amdxdna_gem.c | 5 +-
drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 +
drivers/accel/amdxdna/npu3_regs.c | 5 ++
include/uapi/drm/amdxdna_accel.h | 22 +++++++-
11 files changed, 198 insertions(+), 7 deletions(-)
diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
index 84ac706d0ffb..8408b0d2696f 100644
--- a/drivers/accel/amdxdna/aie4_ctx.c
+++ b/drivers/accel/amdxdna/aie4_ctx.c
@@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
aie4_hwctx_umq_fini(hwctx);
kfree(hwctx->priv);
}
+
+static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
+{
+ return (write >= read) && ((write - read) <= capacity);
+}
+
+static u64 get_read_index(struct amdxdna_hwctx *hwctx)
+{
+ u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
+ u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * CERT cannot update read index as uint64 atomically. Driver may read
+ * half-updated read index when it has bits in high 32bit. In case read
+ * index is not valid, wait for some time and retry once. It should
+ * allow CERT to complete the read index update.
+ */
+ if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+ XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
+ usleep_range(100, 200);
+ ri = READ_ONCE(*hwctx->priv->umq_read_index);
+ if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+ XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
+ ri = 0;
+ }
+ }
+
+ return ri;
+}
+
+static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ u64 read_idx = get_read_index(hwctx);
+
+ return read_idx > seq;
+}
+
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
+{
+ unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
+ struct amdxdna_hwctx_priv *priv = hwctx->priv;
+ struct cert_comp *cert_comp = priv->cert_comp;
+ long ret;
+
+ if (timeout)
+ wait_jifs = msecs_to_jiffies(timeout);
+
+ ret = wait_event_interruptible_timeout(cert_comp->waitq,
+ (check_cmd_done(hwctx, seq)),
+ wait_jifs);
+
+ if (!ret)
+ ret = -ETIME;
+
+ return ret <= 0 ? ret : 0;
+}
+
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ int idx;
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ return 1;
+ }
+ }
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
index eb6a38dfb53e..1b33eda3f727 100644
--- a/drivers/accel/amdxdna/aie4_host_queue.h
+++ b/drivers/accel/amdxdna/aie4_host_queue.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#define CTX_MAX_CMDS 32
+
struct host_queue_header {
__u64 read_index;
struct {
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 3be9066b7178..9ff34ce57fcb 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
return 0;
}
+static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
+ phys_addr_t res_start;
+ unsigned long pfn;
+ int ret;
+
+ if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
+ XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ if (vma_pages(vma) != 1) {
+ XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
+ return -EINVAL;
+ }
+
+ res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
+ pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ pfn,
+ PAGE_SIZE,
+ vma->vm_page_prot);
+
+ XDNA_DBG(xdna, "doorbell ret %d", ret);
+ return ret;
+}
+
static int aie4_pf_init(struct amdxdna_dev *xdna)
{
int ret;
@@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
.fini = aie4_vf_fini,
.hwctx_init = aie4_hwctx_init,
.hwctx_fini = aie4_hwctx_fini,
+ .mmap = aie4_doorbell_mmap,
+ .cmd_wait = aie4_cmd_wait,
};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 6103007e6d2f..b69489acd53d 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
u32 mbox_bar;
u32 mbox_rbuf_bar;
u64 mbox_info_off;
+ u32 doorbell_off;
struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
@@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
/* aie4_ctx.c */
int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
/* aie4_sriov.c */
#if IS_ENABLED(CONFIG_PCI_IOV)
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index b5ad60d4b734..b79229a63af3 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
return -EINVAL;
}
+
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_wait_cmd *args = data;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
+ client->pid, args->hwctx, args->timeout, args->seq);
+
+ if (!xdna->dev_info->ops->cmd_wait)
+ return -EOPNOTSUPP;
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, args->hwctx);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
+ ret = -EINVAL;
+ goto unlock_ctx_srcu;
+ }
+
+ ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout);
+
+ XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
+ client->pid, args->hwctx, args->seq, ret);
+
+ trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
+
+unlock_ctx_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index c5622718b4d5..6e3c6371a088 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq);
-int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
- u64 seq, u32 timeout);
-
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index ebfc472aa9e7..319d2064fafa 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(&mapp->notifier, cur_seq);
up_write(&xdna->notifier_lock);
- xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+ if (xdna->dev_info->ops->hmm_invalidate)
+ xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
if (range->event == MMU_NOTIFY_UNMAP) {
down_write(&xdna->notifier_lock);
@@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
u32 nr_pages;
int ret;
- if (!xdna->dev_info->ops->hmm_invalidate)
+ if (!amdxdna_pasid_on(abo->client))
return 0;
mapp = kzalloc_obj(*mapp);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 39ad081ac082..c0d00db25cde 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
return ret;
}
+static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct drm_file *drm_filp = filp->private_data;
+ struct amdxdna_client *client = drm_filp->driver_priv;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
+ return drm_gem_mmap(filp, vma);
+
+ if (!xdna->dev_info->ops->mmap)
+ return -EOPNOTSUPP;
+
+ return xdna->dev_info->ops->mmap(client, vma);
+}
+
static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
/* Context */
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
/* Execution */
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
/* AIE hardware */
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
@@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
.poll = drm_poll,
.read = drm_read,
.llseek = noop_llseek,
- .mmap = drm_gem_mmap,
+ .mmap = amdxdna_drm_gem_mmap,
.show_fdinfo = drm_show_fdinfo,
.fop_flags = FOP_UNSIGNED_OFFSET,
};
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index caed11c09e55..471b72299aee 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
int (*resume)(struct amdxdna_dev *xdna);
int (*suspend)(struct amdxdna_dev *xdna);
int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
+ int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+ int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
@@ -85,6 +87,7 @@ struct amdxdna_dev_info {
int sram_bar;
int psp_bar;
int smu_bar;
+ int doorbell_bar;
int device_type;
int first_col;
u32 dev_mem_buf_shift;
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
index 6d5da779232b..d76b2e99c308 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -14,6 +14,9 @@
#define NPU3_MBOX_BUFFER_BAR 2
#define NPU3_MBOX_INFO_OFF 0x0
+#define NPU3_DOORBELL_BAR 2
+#define NPU3_DOORBELL_OFF 0x0
+
/* PCIe BAR Index for NPU3 */
#define NPU3_REG_BAR_INDEX 0
#define NPU3_PSP_BAR_INDEX 4
@@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
.mbox_bar = NPU3_MBOX_BAR,
.mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
.mbox_info_off = NPU3_MBOX_INFO_OFF,
+ .doorbell_off = NPU3_DOORBELL_OFF,
.psp_regs_off = {
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
@@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
const struct amdxdna_dev_info dev_npu3_vf_info = {
.mbox_bar = NPU3_MBOX_BAR,
.sram_bar = NPU3_MBOX_BUFFER_BAR,
+ .doorbell_bar = NPU3_DOORBELL_BAR,
.default_vbnv = "RyzenAI-npu3-vf",
.device_type = AMDXDNA_DEV_TYPE_UMQ,
.dev_priv = &npu3_dev_vf_priv,
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index ad9b33dd7b13..51a507561df6 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
DRM_AMDXDNA_EXEC_CMD,
DRM_AMDXDNA_GET_INFO,
DRM_AMDXDNA_SET_STATE,
- DRM_AMDXDNA_GET_ARRAY = 10,
+ DRM_AMDXDNA_WAIT_CMD,
+ DRM_AMDXDNA_GET_ARRAY,
};
/**
@@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
__u64 seq;
};
+/**
+ * struct amdxdna_drm_wait_cmd - Wait execution command.
+ *
+ * @hwctx: Context handle.
+ * @timeout: timeout in ms, 0 implies infinite wait.
+ * @seq: sequence number of the command returned by execute command.
+ *
+ * Wait a command specified by seq to be completed.
+ */
+struct amdxdna_drm_wait_cmd {
+ __u32 hwctx;
+ __u32 timeout;
+ __u64 seq;
+};
+
/**
* struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
* @buffer: The user space buffer that will return the AIE status.
@@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
struct amdxdna_drm_get_array)
+#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
+ DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
+ struct amdxdna_drm_wait_cmd)
+
#if defined(__cplusplus)
} /* extern c end */
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
@ 2026-05-05 20:31 ` Mario Limonciello
2026-05-06 16:11 ` Lizhi Hou
0 siblings, 1 reply; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:31 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
>
> Expose the command doorbell register to userspace on a per-hardware
> context basis, enabling applications to notify the firmware of pending
> commands via doorbell writes.
>
> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
> completion of individual commands.
>
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Minor suggestion below.
> ---
> drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++
> drivers/accel/amdxdna/aie4_host_queue.h | 2 +
> drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++
> drivers/accel/amdxdna/aie4_pci.h | 3 +
> drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++
> drivers/accel/amdxdna/amdxdna_ctx.h | 4 +-
> drivers/accel/amdxdna/amdxdna_gem.c | 5 +-
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
> drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 +
> drivers/accel/amdxdna/npu3_regs.c | 5 ++
> include/uapi/drm/amdxdna_accel.h | 22 +++++++-
> 11 files changed, 198 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/amdxdna/aie4_ctx.c
> index 84ac706d0ffb..8408b0d2696f 100644
> --- a/drivers/accel/amdxdna/aie4_ctx.c
> +++ b/drivers/accel/amdxdna/aie4_ctx.c
> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
> aie4_hwctx_umq_fini(hwctx);
> kfree(hwctx->priv);
> }
> +
> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
> +{
> + return (write >= read) && ((write - read) <= capacity);
> +}
> +
> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
> +{
> + u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
> + u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> +
> + /*
> + * CERT cannot update read index as uint64 atomically. Driver may read
> + * half-updated read index when it has bits in high 32bit. In case read
> + * index is not valid, wait for some time and retry once. It should
> + * allow CERT to complete the read index update.
> + */
> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
> + XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
> + usleep_range(100, 200);
> + ri = READ_ONCE(*hwctx->priv->umq_read_index);
> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
> + XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
> + ri = 0;
> + }
> + }
> +
> + return ri;
> +}
> +
> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
> +{
> + u64 read_idx = get_read_index(hwctx);
> +
> + return read_idx > seq;
> +}
> +
> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
> +{
> + unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
> + struct cert_comp *cert_comp = priv->cert_comp;
> + long ret;
Not sure I see the point in making ret a long.
wait_event_interruptible_timeout() retun 0 or 1.
bool val;
val = wait_event_interruptible_timeout()
return val ? 0 : -ETIME;
> +
> + if (timeout)
> + wait_jifs = msecs_to_jiffies(timeout);
> +
> + ret = wait_event_interruptible_timeout(cert_comp->waitq,
> + (check_cmd_done(hwctx, seq)),
> + wait_jifs);
> +
> + if (!ret)
> + ret = -ETIME;
> +
> + return ret <= 0 ? ret : 0;
> +}
> +
> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
> +{
> + struct amdxdna_hwctx *hwctx;
> + unsigned long hwctx_id;
> + int idx;
> +
> + idx = srcu_read_lock(&client->hwctx_srcu);
> + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
> + if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
> + srcu_read_unlock(&client->hwctx_srcu, idx);
> + return 1;
> + }
> + }
> + srcu_read_unlock(&client->hwctx_srcu, idx);
> +
> + return 0;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/amdxdna/aie4_host_queue.h
> index eb6a38dfb53e..1b33eda3f727 100644
> --- a/drivers/accel/amdxdna/aie4_host_queue.h
> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
> @@ -8,6 +8,8 @@
>
> #include <linux/types.h>
>
> +#define CTX_MAX_CMDS 32
> +
> struct host_queue_header {
> __u64 read_index;
> struct {
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 3be9066b7178..9ff34ce57fcb 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
> return 0;
> }
>
> +static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
> +{
> + struct amdxdna_dev *xdna = client->xdna;
> + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> + const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
> + phys_addr_t res_start;
> + unsigned long pfn;
> + int ret;
> +
> + if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
> + XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma->vm_pgoff);
> + return -EINVAL;
> + }
> +
> + if (vma_pages(vma) != 1) {
> + XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
> + return -EINVAL;
> + }
> +
> + res_start = pci_resource_start(pdev, xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
> + pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
> + ret = io_remap_pfn_range(vma, vma->vm_start,
> + pfn,
> + PAGE_SIZE,
> + vma->vm_page_prot);
> +
> + XDNA_DBG(xdna, "doorbell ret %d", ret);
> + return ret;
> +}
> +
> static int aie4_pf_init(struct amdxdna_dev *xdna)
> {
> int ret;
> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
> .fini = aie4_vf_fini,
> .hwctx_init = aie4_hwctx_init,
> .hwctx_fini = aie4_hwctx_fini,
> + .mmap = aie4_doorbell_mmap,
> + .cmd_wait = aie4_cmd_wait,
> };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 6103007e6d2f..b69489acd53d 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
> u32 mbox_bar;
> u32 mbox_rbuf_bar;
> u64 mbox_info_off;
> + u32 doorbell_off;
>
> struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
> struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
> /* aie4_ctx.c */
> int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
> void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
>
> /* aie4_sriov.c */
> #if IS_ENABLED(CONFIG_PCI_IOV)
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index b5ad60d4b734..b79229a63af3 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
> XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
> return -EINVAL;
> }
> +
> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + struct amdxdna_drm_wait_cmd *args = data;
> + struct amdxdna_hwctx *hwctx;
> + int ret, idx;
> +
> + XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
> + client->pid, args->hwctx, args->timeout, args->seq);
> +
> + if (!xdna->dev_info->ops->cmd_wait)
> + return -EOPNOTSUPP;
> +
> + idx = srcu_read_lock(&client->hwctx_srcu);
> + hwctx = xa_load(&client->hwctx_xa, args->hwctx);
> + if (!hwctx) {
> + XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
> + ret = -EINVAL;
> + goto unlock_ctx_srcu;
> + }
> +
> + ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args->timeout);
> +
> + XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
> + client->pid, args->hwctx, args->seq, ret);
> +
> + trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
> +
> +unlock_ctx_srcu:
> + srcu_read_unlock(&client->hwctx_srcu, idx);
> + return ret;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index c5622718b4d5..6e3c6371a088 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
> u32 *arg_bo_hdls, u32 arg_bo_cnt,
> u32 hwctx_hdl, u64 *seq);
>
> -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
> - u64 seq, u32 timeout);
> -
> int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>
> #endif /* _AMDXDNA_CTX_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index ebfc472aa9e7..319d2064fafa 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
> mmu_interval_set_seq(&mapp->notifier, cur_seq);
> up_write(&xdna->notifier_lock);
>
> - xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
> + if (xdna->dev_info->ops->hmm_invalidate)
> + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>
> if (range->event == MMU_NOTIFY_UNMAP) {
> down_write(&xdna->notifier_lock);
> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
> u32 nr_pages;
> int ret;
>
> - if (!xdna->dev_info->ops->hmm_invalidate)
> + if (!amdxdna_pasid_on(abo->client))
> return 0;
>
> mapp = kzalloc_obj(*mapp);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 39ad081ac082..c0d00db25cde 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
> return ret;
> }
>
> +static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> + struct drm_file *drm_filp = filp->private_data;
> + struct amdxdna_client *client = drm_filp->driver_priv;
> + struct amdxdna_dev *xdna = client->xdna;
> +
> + if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
> + return drm_gem_mmap(filp, vma);
> +
> + if (!xdna->dev_info->ops->mmap)
> + return -EOPNOTSUPP;
> +
> + return xdna->dev_info->ops->mmap(client, vma);
> +}
> +
> static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
> /* Context */
> DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
> /* Execution */
> DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
> + DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
> /* AIE hardware */
> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
> .poll = drm_poll,
> .read = drm_read,
> .llseek = noop_llseek,
> - .mmap = drm_gem_mmap,
> + .mmap = amdxdna_drm_gem_mmap,
> .show_fdinfo = drm_show_fdinfo,
> .fop_flags = FOP_UNSIGNED_OFFSET,
> };
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index caed11c09e55..471b72299aee 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
> int (*resume)(struct amdxdna_dev *xdna);
> int (*suspend)(struct amdxdna_dev *xdna);
> int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
> + int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
> int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
> void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
> int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
> int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
> void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
> int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
> + int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
> int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
> int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
> int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
> int sram_bar;
> int psp_bar;
> int smu_bar;
> + int doorbell_bar;
> int device_type;
> int first_col;
> u32 dev_mem_buf_shift;
> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
> index 6d5da779232b..d76b2e99c308 100644
> --- a/drivers/accel/amdxdna/npu3_regs.c
> +++ b/drivers/accel/amdxdna/npu3_regs.c
> @@ -14,6 +14,9 @@
> #define NPU3_MBOX_BUFFER_BAR 2
> #define NPU3_MBOX_INFO_OFF 0x0
>
> +#define NPU3_DOORBELL_BAR 2
> +#define NPU3_DOORBELL_OFF 0x0
> +
> /* PCIe BAR Index for NPU3 */
> #define NPU3_REG_BAR_INDEX 0
> #define NPU3_PSP_BAR_INDEX 4
> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
> .mbox_bar = NPU3_MBOX_BAR,
> .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
> .mbox_info_off = NPU3_MBOX_INFO_OFF,
> + .doorbell_off = NPU3_DOORBELL_OFF,
> .psp_regs_off = {
> DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
> DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
> const struct amdxdna_dev_info dev_npu3_vf_info = {
> .mbox_bar = NPU3_MBOX_BAR,
> .sram_bar = NPU3_MBOX_BUFFER_BAR,
> + .doorbell_bar = NPU3_DOORBELL_BAR,
> .default_vbnv = "RyzenAI-npu3-vf",
> .device_type = AMDXDNA_DEV_TYPE_UMQ,
> .dev_priv = &npu3_dev_vf_priv,
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index ad9b33dd7b13..51a507561df6 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
> DRM_AMDXDNA_EXEC_CMD,
> DRM_AMDXDNA_GET_INFO,
> DRM_AMDXDNA_SET_STATE,
> - DRM_AMDXDNA_GET_ARRAY = 10,
> + DRM_AMDXDNA_WAIT_CMD,
> + DRM_AMDXDNA_GET_ARRAY,
> };
>
> /**
> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
> __u64 seq;
> };
>
> +/**
> + * struct amdxdna_drm_wait_cmd - Wait execution command.
> + *
> + * @hwctx: Context handle.
> + * @timeout: timeout in ms, 0 implies infinite wait.
> + * @seq: sequence number of the command returned by execute command.
> + *
> + * Wait a command specified by seq to be completed.
> + */
> +struct amdxdna_drm_wait_cmd {
> + __u32 hwctx;
> + __u32 timeout;
> + __u64 seq;
> +};
> +
> /**
> * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
> * @buffer: The user space buffer that will return the AIE status.
> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
> struct amdxdna_drm_get_array)
>
> +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
> + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
> + struct amdxdna_drm_wait_cmd)
> +
> #if defined(__cplusplus)
> } /* extern c end */
> #endif
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
2026-05-05 20:31 ` Mario Limonciello
@ 2026-05-06 16:11 ` Lizhi Hou
2026-05-06 16:33 ` Mario Limonciello
0 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-06 16:11 UTC (permalink / raw)
To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 13:31, Mario Limonciello wrote:
>
>
> On 5/5/26 11:09, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@amd.com>
>>
>> Expose the command doorbell register to userspace on a per-hardware
>> context basis, enabling applications to notify the firmware of pending
>> commands via doorbell writes.
>>
>> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
>> completion of individual commands.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> Minor suggestion below.
>
>> ---
>> drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++
>> drivers/accel/amdxdna/aie4_host_queue.h | 2 +
>> drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++
>> drivers/accel/amdxdna/aie4_pci.h | 3 +
>> drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++
>> drivers/accel/amdxdna/amdxdna_ctx.h | 4 +-
>> drivers/accel/amdxdna/amdxdna_gem.c | 5 +-
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 +
>> drivers/accel/amdxdna/npu3_regs.c | 5 ++
>> include/uapi/drm/amdxdna_accel.h | 22 +++++++-
>> 11 files changed, 198 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie4_ctx.c
>> b/drivers/accel/amdxdna/aie4_ctx.c
>> index 84ac706d0ffb..8408b0d2696f 100644
>> --- a/drivers/accel/amdxdna/aie4_ctx.c
>> +++ b/drivers/accel/amdxdna/aie4_ctx.c
>> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
>> aie4_hwctx_umq_fini(hwctx);
>> kfree(hwctx->priv);
>> }
>> +
>> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
>> +{
>> + return (write >= read) && ((write - read) <= capacity);
>> +}
>> +
>> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
>> +{
>> + u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
>> + u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>> +
>> + /*
>> + * CERT cannot update read index as uint64 atomically. Driver
>> may read
>> + * half-updated read index when it has bits in high 32bit. In
>> case read
>> + * index is not valid, wait for some time and retry once. It should
>> + * allow CERT to complete the read index update.
>> + */
>> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>> + XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
>> + usleep_range(100, 200);
>> + ri = READ_ONCE(*hwctx->priv->umq_read_index);
>> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>> + XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi
>> %llu", ri, wi);
>> + ri = 0;
>> + }
>> + }
>> +
>> + return ri;
>> +}
>> +
>> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
>> +{
>> + u64 read_idx = get_read_index(hwctx);
>> +
>> + return read_idx > seq;
>> +}
>> +
>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
>> +{
>> + unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
>> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
>> + struct cert_comp *cert_comp = priv->cert_comp;
>> + long ret;
>
> Not sure I see the point in making ret a long.
> wait_event_interruptible_timeout() retun 0 or 1.
Other than 0 or 1, wait_event_interruptible_timeout() can also return
the remaining jiffies and -ERESTARTSYS
Lizhi
>
> bool val;
> val = wait_event_interruptible_timeout()
> return val ? 0 : -ETIME;
>
>
>> +
>> + if (timeout)
>> + wait_jifs = msecs_to_jiffies(timeout);
>> +
>> + ret = wait_event_interruptible_timeout(cert_comp->waitq,
>> + (check_cmd_done(hwctx, seq)),
>> + wait_jifs);
>> +
>> + if (!ret)
>> + ret = -ETIME;
>> +
>> + return ret <= 0 ? ret : 0;
>> +}
>> +
>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32
>> vm_pgoff)
>> +{
>> + struct amdxdna_hwctx *hwctx;
>> + unsigned long hwctx_id;
>> + int idx;
>> +
>> + idx = srcu_read_lock(&client->hwctx_srcu);
>> + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>> + if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>> + return 1;
>> + }
>> + }
>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h
>> b/drivers/accel/amdxdna/aie4_host_queue.h
>> index eb6a38dfb53e..1b33eda3f727 100644
>> --- a/drivers/accel/amdxdna/aie4_host_queue.h
>> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
>> @@ -8,6 +8,8 @@
>> #include <linux/types.h>
>> +#define CTX_MAX_CMDS 32
>> +
>> struct host_queue_header {
>> __u64 read_index;
>> struct {
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c
>> b/drivers/accel/amdxdna/aie4_pci.c
>> index 3be9066b7178..9ff34ce57fcb 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev
>> *xdna)
>> return 0;
>> }
>> +static int aie4_doorbell_mmap(struct amdxdna_client *client,
>> struct vm_area_struct *vma)
>> +{
>> + struct amdxdna_dev *xdna = client->xdna;
>> + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> + const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
>> + phys_addr_t res_start;
>> + unsigned long pfn;
>> + int ret;
>> +
>> + if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
>> + XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx",
>> vma->vm_pgoff);
>> + return -EINVAL;
>> + }
>> +
>> + if (vma_pages(vma) != 1) {
>> + XDNA_ERR(xdna, "can only map one page, got %ld",
>> vma_pages(vma));
>> + return -EINVAL;
>> + }
>> +
>> + res_start = pci_resource_start(pdev,
>> xdna->dev_info->doorbell_bar) + npriv->doorbell_off;
>> + pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
>> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>> + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
>> + ret = io_remap_pfn_range(vma, vma->vm_start,
>> + pfn,
>> + PAGE_SIZE,
>> + vma->vm_page_prot);
>> +
>> + XDNA_DBG(xdna, "doorbell ret %d", ret);
>> + return ret;
>> +}
>> +
>> static int aie4_pf_init(struct amdxdna_dev *xdna)
>> {
>> int ret;
>> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>> .fini = aie4_vf_fini,
>> .hwctx_init = aie4_hwctx_init,
>> .hwctx_fini = aie4_hwctx_fini,
>> + .mmap = aie4_doorbell_mmap,
>> + .cmd_wait = aie4_cmd_wait,
>> };
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h
>> b/drivers/accel/amdxdna/aie4_pci.h
>> index 6103007e6d2f..b69489acd53d 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
>> u32 mbox_bar;
>> u32 mbox_rbuf_bar;
>> u64 mbox_info_off;
>> + u32 doorbell_off;
>> struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
>> struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
>> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>> /* aie4_ctx.c */
>> int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
>> void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32
>> vm_pgoff);
>> /* aie4_sriov.c */
>> #if IS_ENABLED(CONFIG_PCI_IOV)
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index b5ad60d4b734..b79229a63af3 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>> drm_device *dev, void *data, struct drm_
>> XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
>> return -EINVAL;
>> }
>> +
>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp)
>> +{
>> + struct amdxdna_client *client = filp->driver_priv;
>> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> + struct amdxdna_drm_wait_cmd *args = data;
>> + struct amdxdna_hwctx *hwctx;
>> + int ret, idx;
>> +
>> + XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
>> + client->pid, args->hwctx, args->timeout, args->seq);
>> +
>> + if (!xdna->dev_info->ops->cmd_wait)
>> + return -EOPNOTSUPP;
>> +
>> + idx = srcu_read_lock(&client->hwctx_srcu);
>> + hwctx = xa_load(&client->hwctx_xa, args->hwctx);
>> + if (!hwctx) {
>> + XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid,
>> args->hwctx);
>> + ret = -EINVAL;
>> + goto unlock_ctx_srcu;
>> + }
>> +
>> + ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq,
>> args->timeout);
>> +
>> + XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
>> + client->pid, args->hwctx, args->seq, ret);
>> +
>> + trace_amdxdna_debug_point(current->comm, args->seq, "job
>> returned to user");
>> +
>> +unlock_ctx_srcu:
>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>> + return ret;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index c5622718b4d5..6e3c6371a088 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client
>> *client,
>> u32 *arg_bo_hdls, u32 arg_bo_cnt,
>> u32 hwctx_hdl, u64 *seq);
>> -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
>> - u64 seq, u32 timeout);
>> -
>> int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp);
>> #endif /* _AMDXDNA_CTX_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index ebfc472aa9e7..319d2064fafa 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct
>> mmu_interval_notifier *mni,
>> mmu_interval_set_seq(&mapp->notifier, cur_seq);
>> up_write(&xdna->notifier_lock);
>> - xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>> + if (xdna->dev_info->ops->hmm_invalidate)
>> + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>> if (range->event == MMU_NOTIFY_UNMAP) {
>> down_write(&xdna->notifier_lock);
>> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct
>> amdxdna_gem_obj *abo,
>> u32 nr_pages;
>> int ret;
>> - if (!xdna->dev_info->ops->hmm_invalidate)
>> + if (!amdxdna_pasid_on(abo->client))
>> return 0;
>> mapp = kzalloc_obj(*mapp);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index 39ad081ac082..c0d00db25cde 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct
>> drm_device *dev, void *data, struc
>> return ret;
>> }
>> +static int amdxdna_drm_gem_mmap(struct file *filp, struct
>> vm_area_struct *vma)
>> +{
>> + struct drm_file *drm_filp = filp->private_data;
>> + struct amdxdna_client *client = drm_filp->driver_priv;
>> + struct amdxdna_dev *xdna = client->xdna;
>> +
>> + if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
>> + return drm_gem_mmap(filp, vma);
>> +
>> + if (!xdna->dev_info->ops->mmap)
>> + return -EOPNOTSUPP;
>> +
>> + return xdna->dev_info->ops->mmap(client, vma);
>> +}
>> +
>> static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>> /* Context */
>> DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX,
>> amdxdna_drm_create_hwctx_ioctl, 0),
>> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc
>> amdxdna_drm_ioctls[] = {
>> DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
>> /* Execution */
>> DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD,
>> amdxdna_drm_submit_cmd_ioctl, 0),
>> + DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
>> /* AIE hardware */
>> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl,
>> 0),
>> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY,
>> amdxdna_drm_get_array_ioctl, 0),
>> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
>> .poll = drm_poll,
>> .read = drm_read,
>> .llseek = noop_llseek,
>> - .mmap = drm_gem_mmap,
>> + .mmap = amdxdna_drm_gem_mmap,
>> .show_fdinfo = drm_show_fdinfo,
>> .fop_flags = FOP_UNSIGNED_OFFSET,
>> };
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index caed11c09e55..471b72299aee 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
>> int (*resume)(struct amdxdna_dev *xdna);
>> int (*suspend)(struct amdxdna_dev *xdna);
>> int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
>> + int (*mmap)(struct amdxdna_client *client, struct vm_area_struct
>> *vma);
>> int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>> void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>> int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64
>> value, void *buf, u32 size);
>> int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32
>> debug_bo_hdl);
>> void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned
>> long cur_seq);
>> int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job, u64 *seq);
>> + int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>> int (*get_aie_info)(struct amdxdna_client *client, struct
>> amdxdna_drm_get_info *args);
>> int (*set_aie_state)(struct amdxdna_client *client, struct
>> amdxdna_drm_set_state *args);
>> int (*get_array)(struct amdxdna_client *client, struct
>> amdxdna_drm_get_array *args);
>> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
>> int sram_bar;
>> int psp_bar;
>> int smu_bar;
>> + int doorbell_bar;
>> int device_type;
>> int first_col;
>> u32 dev_mem_buf_shift;
>> diff --git a/drivers/accel/amdxdna/npu3_regs.c
>> b/drivers/accel/amdxdna/npu3_regs.c
>> index 6d5da779232b..d76b2e99c308 100644
>> --- a/drivers/accel/amdxdna/npu3_regs.c
>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>> @@ -14,6 +14,9 @@
>> #define NPU3_MBOX_BUFFER_BAR 2
>> #define NPU3_MBOX_INFO_OFF 0x0
>> +#define NPU3_DOORBELL_BAR 2
>> +#define NPU3_DOORBELL_OFF 0x0
>> +
>> /* PCIe BAR Index for NPU3 */
>> #define NPU3_REG_BAR_INDEX 0
>> #define NPU3_PSP_BAR_INDEX 4
>> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>> .mbox_bar = NPU3_MBOX_BAR,
>> .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
>> .mbox_info_off = NPU3_MBOX_INFO_OFF,
>> + .doorbell_off = NPU3_DOORBELL_OFF,
>> .psp_regs_off = {
>> DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP,
>> MPASP_C2PMSG_123_ALT_1),
>> DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP,
>> MPASP_C2PMSG_156_ALT_1),
>> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>> const struct amdxdna_dev_info dev_npu3_vf_info = {
>> .mbox_bar = NPU3_MBOX_BAR,
>> .sram_bar = NPU3_MBOX_BUFFER_BAR,
>> + .doorbell_bar = NPU3_DOORBELL_BAR,
>> .default_vbnv = "RyzenAI-npu3-vf",
>> .device_type = AMDXDNA_DEV_TYPE_UMQ,
>> .dev_priv = &npu3_dev_vf_priv,
>> diff --git a/include/uapi/drm/amdxdna_accel.h
>> b/include/uapi/drm/amdxdna_accel.h
>> index ad9b33dd7b13..51a507561df6 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
>> DRM_AMDXDNA_EXEC_CMD,
>> DRM_AMDXDNA_GET_INFO,
>> DRM_AMDXDNA_SET_STATE,
>> - DRM_AMDXDNA_GET_ARRAY = 10,
>> + DRM_AMDXDNA_WAIT_CMD,
>> + DRM_AMDXDNA_GET_ARRAY,
>> };
>> /**
>> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
>> __u64 seq;
>> };
>> +/**
>> + * struct amdxdna_drm_wait_cmd - Wait execution command.
>> + *
>> + * @hwctx: Context handle.
>> + * @timeout: timeout in ms, 0 implies infinite wait.
>> + * @seq: sequence number of the command returned by execute command.
>> + *
>> + * Wait a command specified by seq to be completed.
>> + */
>> +struct amdxdna_drm_wait_cmd {
>> + __u32 hwctx;
>> + __u32 timeout;
>> + __u64 seq;
>> +};
>> +
>> /**
>> * struct amdxdna_drm_query_aie_status - Query the status of the
>> AIE hardware
>> * @buffer: The user space buffer that will return the AIE status.
>> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
>> struct amdxdna_drm_get_array)
>> +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
>> + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
>> + struct amdxdna_drm_wait_cmd)
>> +
>> #if defined(__cplusplus)
>> } /* extern c end */
>> #endif
>
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
2026-05-06 16:11 ` Lizhi Hou
@ 2026-05-06 16:33 ` Mario Limonciello
0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-06 16:33 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/6/26 11:11, Lizhi Hou wrote:
>
> On 5/5/26 13:31, Mario Limonciello wrote:
>>
>>
>> On 5/5/26 11:09, Lizhi Hou wrote:
>>> From: David Zhang <yidong.zhang@amd.com>
>>>
>>> Expose the command doorbell register to userspace on a per-hardware
>>> context basis, enabling applications to notify the firmware of pending
>>> commands via doorbell writes.
>>>
>>> Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
>>> completion of individual commands.
>>>
>>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> Minor suggestion below.
>>
>>> ---
>>> drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++
>>> drivers/accel/amdxdna/aie4_host_queue.h | 2 +
>>> drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++
>>> drivers/accel/amdxdna/aie4_pci.h | 3 +
>>> drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++
>>> drivers/accel/amdxdna/amdxdna_ctx.h | 4 +-
>>> drivers/accel/amdxdna/amdxdna_gem.c | 5 +-
>>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
>>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 +
>>> drivers/accel/amdxdna/npu3_regs.c | 5 ++
>>> include/uapi/drm/amdxdna_accel.h | 22 +++++++-
>>> 11 files changed, 198 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/
>>> amdxdna/aie4_ctx.c
>>> index 84ac706d0ffb..8408b0d2696f 100644
>>> --- a/drivers/accel/amdxdna/aie4_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie4_ctx.c
>>> @@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>> aie4_hwctx_umq_fini(hwctx);
>>> kfree(hwctx->priv);
>>> }
>>> +
>>> +static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
>>> +{
>>> + return (write >= read) && ((write - read) <= capacity);
>>> +}
>>> +
>>> +static u64 get_read_index(struct amdxdna_hwctx *hwctx)
>>> +{
>>> + u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
>>> + u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
>>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> +
>>> + /*
>>> + * CERT cannot update read index as uint64 atomically. Driver
>>> may read
>>> + * half-updated read index when it has bits in high 32bit. In
>>> case read
>>> + * index is not valid, wait for some time and retry once. It should
>>> + * allow CERT to complete the read index update.
>>> + */
>>> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>>> + XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
>>> + usleep_range(100, 200);
>>> + ri = READ_ONCE(*hwctx->priv->umq_read_index);
>>> + if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
>>> + XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi
>>> %llu", ri, wi);
>>> + ri = 0;
>>> + }
>>> + }
>>> +
>>> + return ri;
>>> +}
>>> +
>>> +static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
>>> +{
>>> + u64 read_idx = get_read_index(hwctx);
>>> +
>>> + return read_idx > seq;
>>> +}
>>> +
>>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
>>> +{
>>> + unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
>>> + struct amdxdna_hwctx_priv *priv = hwctx->priv;
>>> + struct cert_comp *cert_comp = priv->cert_comp;
>>> + long ret;
>>
>> Not sure I see the point in making ret a long.
>> wait_event_interruptible_timeout() retun 0 or 1.
>
> Other than 0 or 1, wait_event_interruptible_timeout() can also return
> the remaining jiffies and -ERESTARTSYS
>
Ah thanks.
> Lizhi
>
>>
>> bool val;
>> val = wait_event_interruptible_timeout()
>> return val ? 0 : -ETIME;
>>
>>
>>> +
>>> + if (timeout)
>>> + wait_jifs = msecs_to_jiffies(timeout);
>>> +
>>> + ret = wait_event_interruptible_timeout(cert_comp->waitq,
>>> + (check_cmd_done(hwctx, seq)),
>>> + wait_jifs);
>>> +
>>> + if (!ret)
>>> + ret = -ETIME;
>>> +
>>> + return ret <= 0 ? ret : 0;
>>> +}
>>> +
>>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32
>>> vm_pgoff)
>>> +{
>>> + struct amdxdna_hwctx *hwctx;
>>> + unsigned long hwctx_id;
>>> + int idx;
>>> +
>>> + idx = srcu_read_lock(&client->hwctx_srcu);
>>> + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>>> + if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
>>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>>> + return 1;
>>> + }
>>> + }
>>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>>> +
>>> + return 0;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/
>>> amdxdna/aie4_host_queue.h
>>> index eb6a38dfb53e..1b33eda3f727 100644
>>> --- a/drivers/accel/amdxdna/aie4_host_queue.h
>>> +++ b/drivers/accel/amdxdna/aie4_host_queue.h
>>> @@ -8,6 +8,8 @@
>>> #include <linux/types.h>
>>> +#define CTX_MAX_CMDS 32
>>> +
>>> struct host_queue_header {
>>> __u64 read_index;
>>> struct {
>>> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/
>>> amdxdna/aie4_pci.c
>>> index 3be9066b7178..9ff34ce57fcb 100644
>>> --- a/drivers/accel/amdxdna/aie4_pci.c
>>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>>> @@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev
>>> *xdna)
>>> return 0;
>>> }
>>> +static int aie4_doorbell_mmap(struct amdxdna_client *client,
>>> struct vm_area_struct *vma)
>>> +{
>>> + struct amdxdna_dev *xdna = client->xdna;
>>> + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>>> + const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
>>> + phys_addr_t res_start;
>>> + unsigned long pfn;
>>> + int ret;
>>> +
>>> + if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
>>> + XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma-
>>> >vm_pgoff);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + if (vma_pages(vma) != 1) {
>>> + XDNA_ERR(xdna, "can only map one page, got %ld",
>>> vma_pages(vma));
>>> + return -EINVAL;
>>> + }
>>> +
>>> + res_start = pci_resource_start(pdev, xdna->dev_info-
>>> >doorbell_bar) + npriv->doorbell_off;
>>> + pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
>>> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>>> + vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
>>> + ret = io_remap_pfn_range(vma, vma->vm_start,
>>> + pfn,
>>> + PAGE_SIZE,
>>> + vma->vm_page_prot);
>>> +
>>> + XDNA_DBG(xdna, "doorbell ret %d", ret);
>>> + return ret;
>>> +}
>>> +
>>> static int aie4_pf_init(struct amdxdna_dev *xdna)
>>> {
>>> int ret;
>>> @@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>>> .fini = aie4_vf_fini,
>>> .hwctx_init = aie4_hwctx_init,
>>> .hwctx_fini = aie4_hwctx_fini,
>>> + .mmap = aie4_doorbell_mmap,
>>> + .cmd_wait = aie4_cmd_wait,
>>> };
>>> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/
>>> amdxdna/aie4_pci.h
>>> index 6103007e6d2f..b69489acd53d 100644
>>> --- a/drivers/accel/amdxdna/aie4_pci.h
>>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>>> @@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
>>> u32 mbox_bar;
>>> u32 mbox_rbuf_bar;
>>> u64 mbox_info_off;
>>> + u32 doorbell_off;
>>> struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
>>> struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
>>> @@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>> /* aie4_ctx.c */
>>> int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
>>> void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
>>> +int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>>> +int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32
>>> vm_pgoff);
>>> /* aie4_sriov.c */
>>> #if IS_ENABLED(CONFIG_PCI_IOV)
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>> amdxdna/amdxdna_ctx.c
>>> index b5ad60d4b734..b79229a63af3 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct
>>> drm_device *dev, void *data, struct drm_
>>> XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
>>> return -EINVAL;
>>> }
>>> +
>>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data,
>>> struct drm_file *filp)
>>> +{
>>> + struct amdxdna_client *client = filp->driver_priv;
>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
>>> + struct amdxdna_drm_wait_cmd *args = data;
>>> + struct amdxdna_hwctx *hwctx;
>>> + int ret, idx;
>>> +
>>> + XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
>>> + client->pid, args->hwctx, args->timeout, args->seq);
>>> +
>>> + if (!xdna->dev_info->ops->cmd_wait)
>>> + return -EOPNOTSUPP;
>>> +
>>> + idx = srcu_read_lock(&client->hwctx_srcu);
>>> + hwctx = xa_load(&client->hwctx_xa, args->hwctx);
>>> + if (!hwctx) {
>>> + XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid,
>>> args->hwctx);
>>> + ret = -EINVAL;
>>> + goto unlock_ctx_srcu;
>>> + }
>>> +
>>> + ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args-
>>> >timeout);
>>> +
>>> + XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
>>> + client->pid, args->hwctx, args->seq, ret);
>>> +
>>> + trace_amdxdna_debug_point(current->comm, args->seq, "job
>>> returned to user");
>>> +
>>> +unlock_ctx_srcu:
>>> + srcu_read_unlock(&client->hwctx_srcu, idx);
>>> + return ret;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/
>>> amdxdna/amdxdna_ctx.h
>>> index c5622718b4d5..6e3c6371a088 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> @@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>> *client,
>>> u32 *arg_bo_hdls, u32 arg_bo_cnt,
>>> u32 hwctx_hdl, u64 *seq);
>>> -int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
>>> - u64 seq, u32 timeout);
>>> -
>>> int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp);
>>> int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp);
>>> int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp);
>>> int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp);
>>> +int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data,
>>> struct drm_file *filp);
>>> #endif /* _AMDXDNA_CTX_H_ */
>>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/
>>> amdxdna/amdxdna_gem.c
>>> index ebfc472aa9e7..319d2064fafa 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>>> @@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct
>>> mmu_interval_notifier *mni,
>>> mmu_interval_set_seq(&mapp->notifier, cur_seq);
>>> up_write(&xdna->notifier_lock);
>>> - xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>>> + if (xdna->dev_info->ops->hmm_invalidate)
>>> + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
>>> if (range->event == MMU_NOTIFY_UNMAP) {
>>> down_write(&xdna->notifier_lock);
>>> @@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct
>>> amdxdna_gem_obj *abo,
>>> u32 nr_pages;
>>> int ret;
>>> - if (!xdna->dev_info->ops->hmm_invalidate)
>>> + if (!amdxdna_pasid_on(abo->client))
>>> return 0;
>>> mapp = kzalloc_obj(*mapp);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/
>>> amdxdna/amdxdna_pci_drv.c
>>> index 39ad081ac082..c0d00db25cde 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct
>>> drm_device *dev, void *data, struc
>>> return ret;
>>> }
>>> +static int amdxdna_drm_gem_mmap(struct file *filp, struct
>>> vm_area_struct *vma)
>>> +{
>>> + struct drm_file *drm_filp = filp->private_data;
>>> + struct amdxdna_client *client = drm_filp->driver_priv;
>>> + struct amdxdna_dev *xdna = client->xdna;
>>> +
>>> + if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
>>> + return drm_gem_mmap(filp, vma);
>>> +
>>> + if (!xdna->dev_info->ops->mmap)
>>> + return -EOPNOTSUPP;
>>> +
>>> + return xdna->dev_info->ops->mmap(client, vma);
>>> +}
>>> +
>>> static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>> /* Context */
>>> DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX,
>>> amdxdna_drm_create_hwctx_ioctl, 0),
>>> @@ -235,6 +250,7 @@ static const struct drm_ioctl_desc
>>> amdxdna_drm_ioctls[] = {
>>> DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
>>> /* Execution */
>>> DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD,
>>> amdxdna_drm_submit_cmd_ioctl, 0),
>>> + DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
>>> /* AIE hardware */
>>> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl,
>>> 0),
>>> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY,
>>> amdxdna_drm_get_array_ioctl, 0),
>>> @@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
>>> .poll = drm_poll,
>>> .read = drm_read,
>>> .llseek = noop_llseek,
>>> - .mmap = drm_gem_mmap,
>>> + .mmap = amdxdna_drm_gem_mmap,
>>> .show_fdinfo = drm_show_fdinfo,
>>> .fop_flags = FOP_UNSIGNED_OFFSET,
>>> };
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/
>>> amdxdna/amdxdna_pci_drv.h
>>> index caed11c09e55..471b72299aee 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> @@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
>>> int (*resume)(struct amdxdna_dev *xdna);
>>> int (*suspend)(struct amdxdna_dev *xdna);
>>> int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
>>> + int (*mmap)(struct amdxdna_client *client, struct vm_area_struct
>>> *vma);
>>> int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>>> void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>>> int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64
>>> value, void *buf, u32 size);
>>> int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32
>>> debug_bo_hdl);
>>> void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned
>>> long cur_seq);
>>> int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct
>>> amdxdna_sched_job *job, u64 *seq);
>>> + int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
>>> int (*get_aie_info)(struct amdxdna_client *client, struct
>>> amdxdna_drm_get_info *args);
>>> int (*set_aie_state)(struct amdxdna_client *client, struct
>>> amdxdna_drm_set_state *args);
>>> int (*get_array)(struct amdxdna_client *client, struct
>>> amdxdna_drm_get_array *args);
>>> @@ -85,6 +87,7 @@ struct amdxdna_dev_info {
>>> int sram_bar;
>>> int psp_bar;
>>> int smu_bar;
>>> + int doorbell_bar;
>>> int device_type;
>>> int first_col;
>>> u32 dev_mem_buf_shift;
>>> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/
>>> amdxdna/npu3_regs.c
>>> index 6d5da779232b..d76b2e99c308 100644
>>> --- a/drivers/accel/amdxdna/npu3_regs.c
>>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>>> @@ -14,6 +14,9 @@
>>> #define NPU3_MBOX_BUFFER_BAR 2
>>> #define NPU3_MBOX_INFO_OFF 0x0
>>> +#define NPU3_DOORBELL_BAR 2
>>> +#define NPU3_DOORBELL_OFF 0x0
>>> +
>>> /* PCIe BAR Index for NPU3 */
>>> #define NPU3_REG_BAR_INDEX 0
>>> #define NPU3_PSP_BAR_INDEX 4
>>> @@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
>>> .mbox_bar = NPU3_MBOX_BAR,
>>> .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
>>> .mbox_info_off = NPU3_MBOX_INFO_OFF,
>>> + .doorbell_off = NPU3_DOORBELL_OFF,
>>> .psp_regs_off = {
>>> DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP,
>>> MPASP_C2PMSG_123_ALT_1),
>>> DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP,
>>> MPASP_C2PMSG_156_ALT_1),
>>> @@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
>>> const struct amdxdna_dev_info dev_npu3_vf_info = {
>>> .mbox_bar = NPU3_MBOX_BAR,
>>> .sram_bar = NPU3_MBOX_BUFFER_BAR,
>>> + .doorbell_bar = NPU3_DOORBELL_BAR,
>>> .default_vbnv = "RyzenAI-npu3-vf",
>>> .device_type = AMDXDNA_DEV_TYPE_UMQ,
>>> .dev_priv = &npu3_dev_vf_priv,
>>> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/
>>> amdxdna_accel.h
>>> index ad9b33dd7b13..51a507561df6 100644
>>> --- a/include/uapi/drm/amdxdna_accel.h
>>> +++ b/include/uapi/drm/amdxdna_accel.h
>>> @@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
>>> DRM_AMDXDNA_EXEC_CMD,
>>> DRM_AMDXDNA_GET_INFO,
>>> DRM_AMDXDNA_SET_STATE,
>>> - DRM_AMDXDNA_GET_ARRAY = 10,
>>> + DRM_AMDXDNA_WAIT_CMD,
>>> + DRM_AMDXDNA_GET_ARRAY,
>>> };
>>> /**
>>> @@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
>>> __u64 seq;
>>> };
>>> +/**
>>> + * struct amdxdna_drm_wait_cmd - Wait execution command.
>>> + *
>>> + * @hwctx: Context handle.
>>> + * @timeout: timeout in ms, 0 implies infinite wait.
>>> + * @seq: sequence number of the command returned by execute command.
>>> + *
>>> + * Wait a command specified by seq to be completed.
>>> + */
>>> +struct amdxdna_drm_wait_cmd {
>>> + __u32 hwctx;
>>> + __u32 timeout;
>>> + __u64 seq;
>>> +};
>>> +
>>> /**
>>> * struct amdxdna_drm_query_aie_status - Query the status of the
>>> AIE hardware
>>> * @buffer: The user space buffer that will return the AIE status.
>>> @@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
>>> struct amdxdna_drm_get_array)
>>> +#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
>>> + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
>>> + struct amdxdna_drm_wait_cmd)
>>> +
>>> #if defined(__cplusplus)
>>> } /* extern c end */
>>> #endif
>>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
` (3 preceding siblings ...)
2026-05-05 16:09 ` [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 17:14 ` Mario Limonciello
2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue, Lizhi Hou
From: David Zhang <yidong.zhang@amd.com>
Add support for querying device metadata on AIE4 via a mailbox message.
Refactor aie2_get_aie_metadata() into a common helper by moving it to
aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
and AIE4 to reuse the implementation.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
Signed-off-by: David Zhang <yidong.zhang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie.c | 45 ++++++++++++++++++++++
drivers/accel/amdxdna/aie.h | 27 ++++++++++++++
drivers/accel/amdxdna/aie2_ctx.c | 4 +-
drivers/accel/amdxdna/aie2_message.c | 2 +-
drivers/accel/amdxdna/aie2_pci.c | 54 ++-------------------------
drivers/accel/amdxdna/aie2_pci.h | 24 ------------
drivers/accel/amdxdna/aie4_message.c | 37 ++++++++++++++++++
drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
drivers/accel/amdxdna/aie4_pci.c | 30 +++++++++++++++
drivers/accel/amdxdna/aie4_pci.h | 1 +
10 files changed, 181 insertions(+), 77 deletions(-)
diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
index 66849ba9026a..a31051cc1ec8 100644
--- a/drivers/accel/amdxdna/aie.c
+++ b/drivers/accel/amdxdna/aie.c
@@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
}
+
+int amdxdna_get_metadata(struct aie_device *aie,
+ struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_aie_metadata *meta;
+ int ret = 0;
+ u32 buf_sz;
+
+ meta = kzalloc_obj(*meta);
+ if (!meta)
+ return -ENOMEM;
+
+ meta->col_size = aie->metadata.size;
+ meta->cols = aie->metadata.cols;
+ meta->rows = aie->metadata.rows;
+
+ meta->version.major = aie->metadata.version.major;
+ meta->version.minor = aie->metadata.version.minor;
+
+ meta->core.row_count = aie->metadata.core.row_count;
+ meta->core.row_start = aie->metadata.core.row_start;
+ meta->core.dma_channel_count = aie->metadata.core.dma_channel_count;
+ meta->core.lock_count = aie->metadata.core.lock_count;
+ meta->core.event_reg_count = aie->metadata.core.event_reg_count;
+
+ meta->mem.row_count = aie->metadata.mem.row_count;
+ meta->mem.row_start = aie->metadata.mem.row_start;
+ meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
+ meta->mem.lock_count = aie->metadata.mem.lock_count;
+ meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
+
+ meta->shim.row_count = aie->metadata.shim.row_count;
+ meta->shim.row_start = aie->metadata.shim.row_start;
+ meta->shim.dma_channel_count = aie->metadata.shim.dma_channel_count;
+ meta->shim.lock_count = aie->metadata.shim.lock_count;
+ meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;
+
+ buf_sz = min(args->buffer_size, sizeof(*meta));
+ if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
+ ret = -EFAULT;
+
+ kfree(meta);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 7a68b114f235..4bb3719ee0c0 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -14,6 +14,29 @@
struct psp_device;
struct smu_device;
+struct aie_version {
+ u16 major;
+ u16 minor;
+};
+
+struct aie_tile_metadata {
+ u16 row_count;
+ u16 row_start;
+ u16 dma_channel_count;
+ u16 lock_count;
+ u16 event_reg_count;
+};
+
+struct aie_metadata {
+ u32 size;
+ u16 cols;
+ u16 rows;
+ struct aie_version version;
+ struct aie_tile_metadata core;
+ struct aie_tile_metadata mem;
+ struct aie_tile_metadata shim;
+};
+
struct aie_device {
struct amdxdna_dev *xdna;
struct mailbox_channel *mgmt_chann;
@@ -26,6 +49,8 @@ struct aie_device {
struct psp_device *psp_hdl;
struct smu_device *smu_hdl;
+
+ struct aie_metadata metadata;
};
#define DECLARE_AIE_MSG(name, op) \
@@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie, struct mailbox_channel **chann);
int aie_send_mgmt_msg_wait(struct aie_device *aie, struct xdna_mailbox_msg *msg);
int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
+int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args);
/* aie_psp.c */
struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 139825ac8515..7d6094aefb6f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
}
ndev = xdna->dev_handle;
- if (unlikely(!ndev->metadata.core.row_count)) {
+ if (unlikely(!ndev->aie.metadata.core.row_count)) {
XDNA_WARN(xdna, "Core tile row count is zero");
return -EINVAL;
}
- hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
+ hwctx->num_col = hwctx->num_tiles / ndev->aie.metadata.core.row_count;
if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
return -EINVAL;
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 6e98af7b74db..f555ffecea6f 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
u8 *buff_addr;
int ret;
- buf_sz = ndev->metadata.cols * ndev->metadata.size;
+ buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
if (IS_ERR(buff_addr))
return PTR_ERR(buff_addr);
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index f0ddb843eb21..6c8a0f70b73d 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
return ret;
}
- ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
+ ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
if (ret) {
XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
return ret;
}
- ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+ ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
return 0;
}
@@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct amdxdna_client *client,
return 0;
}
-static int aie2_get_aie_metadata(struct amdxdna_client *client,
- struct amdxdna_drm_get_info *args)
-{
- struct amdxdna_drm_query_aie_metadata *meta;
- struct amdxdna_dev *xdna = client->xdna;
- struct amdxdna_dev_hdl *ndev;
- int ret = 0;
- u32 buf_sz;
-
- ndev = xdna->dev_handle;
- meta = kzalloc_obj(*meta);
- if (!meta)
- return -ENOMEM;
-
- meta->col_size = ndev->metadata.size;
- meta->cols = ndev->metadata.cols;
- meta->rows = ndev->metadata.rows;
-
- meta->version.major = ndev->metadata.version.major;
- meta->version.minor = ndev->metadata.version.minor;
-
- meta->core.row_count = ndev->metadata.core.row_count;
- meta->core.row_start = ndev->metadata.core.row_start;
- meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
- meta->core.lock_count = ndev->metadata.core.lock_count;
- meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
-
- meta->mem.row_count = ndev->metadata.mem.row_count;
- meta->mem.row_start = ndev->metadata.mem.row_start;
- meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
- meta->mem.lock_count = ndev->metadata.mem.lock_count;
- meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
-
- meta->shim.row_count = ndev->metadata.shim.row_count;
- meta->shim.row_start = ndev->metadata.shim.row_start;
- meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
- meta->shim.lock_count = ndev->metadata.shim.lock_count;
- meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
-
- buf_sz = min(args->buffer_size, sizeof(*meta));
- if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
- ret = -EFAULT;
-
- kfree(meta);
- return ret;
-}
-
static int aie2_get_aie_version(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
{
@@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct amdxdna_client *client,
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
{
struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
int ret, idx;
if (!drm_dev_enter(&xdna->ddev, &idx))
@@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
ret = aie2_get_aie_status(client, args);
break;
case DRM_AMDXDNA_QUERY_AIE_METADATA:
- ret = aie2_get_aie_metadata(client, args);
+ ret = amdxdna_get_metadata(&ndev->aie, client, args);
break;
case DRM_AMDXDNA_QUERY_AIE_VERSION:
ret = aie2_get_aie_version(client, args);
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index f12073175676..c884fed610f9 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
struct amdxdna_hwctx;
struct amdxdna_sched_job;
-struct aie_version {
- u16 major;
- u16 minor;
-};
-
-struct aie_tile_metadata {
- u16 row_count;
- u16 row_start;
- u16 dma_channel_count;
- u16 lock_count;
- u16 event_reg_count;
-};
-
-struct aie_metadata {
- u32 size;
- u16 cols;
- u16 rows;
- struct aie_version version;
- struct aie_tile_metadata core;
- struct aie_tile_metadata mem;
- struct aie_tile_metadata shim;
-};
-
enum rt_config_category {
AIE2_RT_CFG_INIT,
AIE2_RT_CFG_CLK_GATING,
@@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
u32 total_col;
struct aie_version version;
- struct aie_metadata metadata;
struct aie2_exec_msg_ops *exec_msg_ops;
/* power management and clock*/
diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
index d621dd32ac40..ac89a9a842b2 100644
--- a/drivers/accel/amdxdna/aie4_message.c
+++ b/drivers/accel/amdxdna/aie4_message.c
@@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
return ret;
}
+
+int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
+{
+ DECLARE_AIE_MSG(aie4_msg_aie4_tile_info, AIE4_MSG_OP_AIE_TILE_INFO);
+ int ret;
+
+ ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+ if (ret)
+ return ret;
+
+ metadata->size = resp.info.size;
+ metadata->cols = resp.info.cols;
+ metadata->rows = resp.info.rows;
+
+ metadata->version.major = resp.info.major;
+ metadata->version.minor = resp.info.minor;
+
+ metadata->core.row_count = resp.info.core_rows;
+ metadata->core.row_start = resp.info.core_row_start;
+ metadata->core.dma_channel_count = resp.info.core_dma_channels;
+ metadata->core.lock_count = resp.info.core_locks;
+ metadata->core.event_reg_count = resp.info.core_events;
+
+ metadata->mem.row_count = resp.info.mem_rows;
+ metadata->mem.row_start = resp.info.mem_row_start;
+ metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
+ metadata->mem.lock_count = resp.info.mem_locks;
+ metadata->mem.event_reg_count = resp.info.mem_events;
+
+ metadata->shim.row_count = resp.info.shim_rows;
+ metadata->shim.row_start = resp.info.shim_row_start;
+ metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
+ metadata->shim.lock_count = resp.info.shim_locks;
+ metadata->shim.event_reg_count = resp.info.shim_events;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 7faa01ca3436..69e220e40900 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -18,6 +18,7 @@ enum aie4_msg_opcode {
AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003,
AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004,
+ AIE4_MSG_OP_AIE_TILE_INFO = 0x30006,
};
enum aie4_msg_status {
@@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
enum aie4_msg_status status;
} __packed;
+struct aie4_tile_info {
+ __u32 size;
+ __u16 major;
+ __u16 minor;
+ __u16 cols;
+ __u16 rows;
+ __u16 core_rows;
+ __u16 mem_rows;
+ __u16 shim_rows;
+ __u16 core_row_start;
+ __u16 mem_row_start;
+ __u16 shim_row_start;
+ __u16 core_dma_channels;
+ __u16 mem_dma_channels;
+ __u16 shim_dma_channels;
+ __u16 core_locks;
+ __u16 mem_locks;
+ __u16 shim_locks;
+ __u16 core_events;
+ __u16 mem_events;
+ __u16 shim_events;
+ __u16 resvd;
+} __packed;
+
+struct aie4_msg_aie4_tile_info_req {
+ __u32 resvd;
+} __packed;
+
+struct aie4_msg_aie4_tile_info_resp {
+ enum aie4_msg_status status;
+ struct aie4_tile_info info;
+} __packed;
+
#endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 9ff34ce57fcb..8b5eff0e45c1 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -269,6 +269,11 @@ static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
XDNA_ERR(xdna, "partition fini failed: %d", ret);
}
+static int aie4_query(struct amdxdna_dev_hdl *ndev)
+{
+ return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
+}
+
static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
{
int ret;
@@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
if (ret)
return ret;
+ ret = aie4_query(ndev);
+ if (ret)
+ goto mailbox_fini;
+
ret = aie4_partition_init(ndev);
if (ret)
goto mailbox_fini;
@@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_stru
return ret;
}
+static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ int ret;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_QUERY_AIE_METADATA:
+ ret = amdxdna_get_metadata(&ndev->aie, client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ }
+
+ XDNA_DBG(xdna, "Got param %d", args->param);
+
+ return ret;
+}
+
static int aie4_pf_init(struct amdxdna_dev *xdna)
{
int ret;
@@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
.hwctx_fini = aie4_hwctx_fini,
.mmap = aie4_doorbell_mmap,
.cmd_wait = aie4_cmd_wait,
+ .get_aie_info = aie4_get_info,
};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index b69489acd53d..1886cffc62db 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
};
/* aie4_message.c */
+int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
/* aie4_ctx.c */
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
@ 2026-05-05 17:14 ` Mario Limonciello
2026-05-05 18:03 ` Lizhi Hou
0 siblings, 1 reply; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 17:14 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 11:09, Lizhi Hou wrote:
> From: David Zhang <yidong.zhang@amd.com>
>
> Add support for querying device metadata on AIE4 via a mailbox message.
> Refactor aie2_get_aie_metadata() into a common helper by moving it to
> aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
> and AIE4 to reuse the implementation.
>
> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
> Signed-off-by: David Zhang <yidong.zhang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
> drivers/accel/amdxdna/aie.c | 45 ++++++++++++++++++++++
> drivers/accel/amdxdna/aie.h | 27 ++++++++++++++
> drivers/accel/amdxdna/aie2_ctx.c | 4 +-
> drivers/accel/amdxdna/aie2_message.c | 2 +-
> drivers/accel/amdxdna/aie2_pci.c | 54 ++-------------------------
> drivers/accel/amdxdna/aie2_pci.h | 24 ------------
> drivers/accel/amdxdna/aie4_message.c | 37 ++++++++++++++++++
> drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
> drivers/accel/amdxdna/aie4_pci.c | 30 +++++++++++++++
> drivers/accel/amdxdna/aie4_pci.h | 1 +
> 10 files changed, 181 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
> index 66849ba9026a..a31051cc1ec8 100644
> --- a/drivers/accel/amdxdna/aie.c
> +++ b/drivers/accel/amdxdna/aie.c
> @@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
>
> amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
> }
> +
> +int amdxdna_get_metadata(struct aie_device *aie,
> + struct amdxdna_client *client,
> + struct amdxdna_drm_get_info *args)
> +{
> + struct amdxdna_drm_query_aie_metadata *meta;
> + int ret = 0;
> + u32 buf_sz;
> +
> + meta = kzalloc_obj(*meta);
> + if (!meta)
> + return -ENOMEM;
> +
> + meta->col_size = aie->metadata.size;
> + meta->cols = aie->metadata.cols;
> + meta->rows = aie->metadata.rows;
> +
> + meta->version.major = aie->metadata.version.major;
> + meta->version.minor = aie->metadata.version.minor;
> +
> + meta->core.row_count = aie->metadata.core.row_count;
> + meta->core.row_start = aie->metadata.core.row_start;
> + meta->core.dma_channel_count = aie->metadata.core.dma_channel_count;
> + meta->core.lock_count = aie->metadata.core.lock_count;
> + meta->core.event_reg_count = aie->metadata.core.event_reg_count;
> +
> + meta->mem.row_count = aie->metadata.mem.row_count;
> + meta->mem.row_start = aie->metadata.mem.row_start;
> + meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
> + meta->mem.lock_count = aie->metadata.mem.lock_count;
> + meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
> +
> + meta->shim.row_count = aie->metadata.shim.row_count;
> + meta->shim.row_start = aie->metadata.shim.row_start;
> + meta->shim.dma_channel_count = aie->metadata.shim.dma_channel_count;
> + meta->shim.lock_count = aie->metadata.shim.lock_count;
> + meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;
Looking at the code the structures for
struct amdxdna_drm_query_aie_metadata
and
struct aie_metadata
Look identical. Rather than copying every member, can you just use copy
everything from aie->metadata to args->buffer directly?
That could let you save the kzalloc/kfree call.
> +
> + buf_sz = min(args->buffer_size, sizeof(*meta));
> + if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
> + ret = -EFAULT;
> +
> + kfree(meta);
> + return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
> index 7a68b114f235..4bb3719ee0c0 100644
> --- a/drivers/accel/amdxdna/aie.h
> +++ b/drivers/accel/amdxdna/aie.h
> @@ -14,6 +14,29 @@
> struct psp_device;
> struct smu_device;
>
> +struct aie_version {
> + u16 major;
> + u16 minor;
> +};
> +
> +struct aie_tile_metadata {
> + u16 row_count;
> + u16 row_start;
> + u16 dma_channel_count;
> + u16 lock_count;
> + u16 event_reg_count;
> +};
> +
> +struct aie_metadata {
> + u32 size;
> + u16 cols;
> + u16 rows;
> + struct aie_version version;
> + struct aie_tile_metadata core;
> + struct aie_tile_metadata mem;
> + struct aie_tile_metadata shim;
> +};
> +
> struct aie_device {
> struct amdxdna_dev *xdna;
> struct mailbox_channel *mgmt_chann;
> @@ -26,6 +49,8 @@ struct aie_device {
>
> struct psp_device *psp_hdl;
> struct smu_device *smu_hdl;
> +
> + struct aie_metadata metadata;
> };
>
> #define DECLARE_AIE_MSG(name, op) \
> @@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie, struct mailbox_channel **chann);
> int aie_send_mgmt_msg_wait(struct aie_device *aie, struct xdna_mailbox_msg *msg);
> int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
> void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
> +int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
> + struct amdxdna_drm_get_info *args);
>
> /* aie_psp.c */
> struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 139825ac8515..7d6094aefb6f 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
> }
>
> ndev = xdna->dev_handle;
> - if (unlikely(!ndev->metadata.core.row_count)) {
> + if (unlikely(!ndev->aie.metadata.core.row_count)) {
> XDNA_WARN(xdna, "Core tile row count is zero");
> return -EINVAL;
> }
>
> - hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
> + hwctx->num_col = hwctx->num_tiles / ndev->aie.metadata.core.row_count;
> if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
> XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
> return -EINVAL;
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 6e98af7b74db..f555ffecea6f 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
> u8 *buff_addr;
> int ret;
>
> - buf_sz = ndev->metadata.cols * ndev->metadata.size;
> + buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
> buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
> if (IS_ERR(buff_addr))
> return PTR_ERR(buff_addr);
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index f0ddb843eb21..6c8a0f70b73d 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
> return ret;
> }
>
> - ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
> + ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
> if (ret) {
> XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
> return ret;
> }
>
> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
> + ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
>
> return 0;
> }
> @@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct amdxdna_client *client,
> return 0;
> }
>
> -static int aie2_get_aie_metadata(struct amdxdna_client *client,
> - struct amdxdna_drm_get_info *args)
> -{
> - struct amdxdna_drm_query_aie_metadata *meta;
> - struct amdxdna_dev *xdna = client->xdna;
> - struct amdxdna_dev_hdl *ndev;
> - int ret = 0;
> - u32 buf_sz;
> -
> - ndev = xdna->dev_handle;
> - meta = kzalloc_obj(*meta);
> - if (!meta)
> - return -ENOMEM;
> -
> - meta->col_size = ndev->metadata.size;
> - meta->cols = ndev->metadata.cols;
> - meta->rows = ndev->metadata.rows;
> -
> - meta->version.major = ndev->metadata.version.major;
> - meta->version.minor = ndev->metadata.version.minor;
> -
> - meta->core.row_count = ndev->metadata.core.row_count;
> - meta->core.row_start = ndev->metadata.core.row_start;
> - meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
> - meta->core.lock_count = ndev->metadata.core.lock_count;
> - meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
> -
> - meta->mem.row_count = ndev->metadata.mem.row_count;
> - meta->mem.row_start = ndev->metadata.mem.row_start;
> - meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
> - meta->mem.lock_count = ndev->metadata.mem.lock_count;
> - meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
> -
> - meta->shim.row_count = ndev->metadata.shim.row_count;
> - meta->shim.row_start = ndev->metadata.shim.row_start;
> - meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
> - meta->shim.lock_count = ndev->metadata.shim.lock_count;
> - meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
> -
> - buf_sz = min(args->buffer_size, sizeof(*meta));
> - if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
> - ret = -EFAULT;
> -
> - kfree(meta);
> - return ret;
> -}
> -
> static int aie2_get_aie_version(struct amdxdna_client *client,
> struct amdxdna_drm_get_info *args)
> {
> @@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct amdxdna_client *client,
> static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
> {
> struct amdxdna_dev *xdna = client->xdna;
> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> int ret, idx;
>
> if (!drm_dev_enter(&xdna->ddev, &idx))
> @@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
> ret = aie2_get_aie_status(client, args);
> break;
> case DRM_AMDXDNA_QUERY_AIE_METADATA:
> - ret = aie2_get_aie_metadata(client, args);
> + ret = amdxdna_get_metadata(&ndev->aie, client, args);
> break;
> case DRM_AMDXDNA_QUERY_AIE_VERSION:
> ret = aie2_get_aie_version(client, args);
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index f12073175676..c884fed610f9 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
> struct amdxdna_hwctx;
> struct amdxdna_sched_job;
>
> -struct aie_version {
> - u16 major;
> - u16 minor;
> -};
> -
> -struct aie_tile_metadata {
> - u16 row_count;
> - u16 row_start;
> - u16 dma_channel_count;
> - u16 lock_count;
> - u16 event_reg_count;
> -};
> -
> -struct aie_metadata {
> - u32 size;
> - u16 cols;
> - u16 rows;
> - struct aie_version version;
> - struct aie_tile_metadata core;
> - struct aie_tile_metadata mem;
> - struct aie_tile_metadata shim;
> -};
> -
> enum rt_config_category {
> AIE2_RT_CFG_INIT,
> AIE2_RT_CFG_CLK_GATING,
> @@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
>
> u32 total_col;
> struct aie_version version;
> - struct aie_metadata metadata;
> struct aie2_exec_msg_ops *exec_msg_ops;
>
> /* power management and clock*/
> diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
> index d621dd32ac40..ac89a9a842b2 100644
> --- a/drivers/accel/amdxdna/aie4_message.c
> +++ b/drivers/accel/amdxdna/aie4_message.c
> @@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
>
> return ret;
> }
> +
> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
> +{
> + DECLARE_AIE_MSG(aie4_msg_aie4_tile_info, AIE4_MSG_OP_AIE_TILE_INFO);
> + int ret;
> +
> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> + if (ret)
> + return ret;
> +
> + metadata->size = resp.info.size;
> + metadata->cols = resp.info.cols;
> + metadata->rows = resp.info.rows;
> +
> + metadata->version.major = resp.info.major;
> + metadata->version.minor = resp.info.minor;
> +
> + metadata->core.row_count = resp.info.core_rows;
> + metadata->core.row_start = resp.info.core_row_start;
> + metadata->core.dma_channel_count = resp.info.core_dma_channels;
> + metadata->core.lock_count = resp.info.core_locks;
> + metadata->core.event_reg_count = resp.info.core_events;
> +
> + metadata->mem.row_count = resp.info.mem_rows;
> + metadata->mem.row_start = resp.info.mem_row_start;
> + metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
> + metadata->mem.lock_count = resp.info.mem_locks;
> + metadata->mem.event_reg_count = resp.info.mem_events;
> +
> + metadata->shim.row_count = resp.info.shim_rows;
> + metadata->shim.row_start = resp.info.shim_row_start;
> + metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
> + metadata->shim.lock_count = resp.info.shim_locks;
> + metadata->shim.event_reg_count = resp.info.shim_events;
> +
> + return 0;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 7faa01ca3436..69e220e40900 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -18,6 +18,7 @@ enum aie4_msg_opcode {
> AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
> AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003,
> AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004,
> + AIE4_MSG_OP_AIE_TILE_INFO = 0x30006,
> };
>
> enum aie4_msg_status {
> @@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
> enum aie4_msg_status status;
> } __packed;
>
> +struct aie4_tile_info {
> + __u32 size;
> + __u16 major;
> + __u16 minor;
> + __u16 cols;
> + __u16 rows;
> + __u16 core_rows;
> + __u16 mem_rows;
> + __u16 shim_rows;
> + __u16 core_row_start;
> + __u16 mem_row_start;
> + __u16 shim_row_start;
> + __u16 core_dma_channels;
> + __u16 mem_dma_channels;
> + __u16 shim_dma_channels;
> + __u16 core_locks;
> + __u16 mem_locks;
> + __u16 shim_locks;
> + __u16 core_events;
> + __u16 mem_events;
> + __u16 shim_events;
> + __u16 resvd;
> +} __packed;
> +
> +struct aie4_msg_aie4_tile_info_req {
> + __u32 resvd;
> +} __packed;
> +
> +struct aie4_msg_aie4_tile_info_resp {
> + enum aie4_msg_status status;
> + struct aie4_tile_info info;
> +} __packed;
> +
> #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 9ff34ce57fcb..8b5eff0e45c1 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -269,6 +269,11 @@ static void aie4_partition_fini(struct amdxdna_dev_hdl *ndev)
> XDNA_ERR(xdna, "partition fini failed: %d", ret);
> }
>
> +static int aie4_query(struct amdxdna_dev_hdl *ndev)
> +{
> + return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
> +}
> +
> static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
> {
> int ret;
> @@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct amdxdna_dev_hdl *ndev)
> if (ret)
> return ret;
>
> + ret = aie4_query(ndev);
> + if (ret)
> + goto mailbox_fini;
> +
> ret = aie4_partition_init(ndev);
> if (ret)
> goto mailbox_fini;
> @@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_stru
> return ret;
> }
>
> +static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
> +{
> + struct amdxdna_dev *xdna = client->xdna;
> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> + int ret;
> +
> + switch (args->param) {
> + case DRM_AMDXDNA_QUERY_AIE_METADATA:
> + ret = amdxdna_get_metadata(&ndev->aie, client, args);
> + break;
> + default:
> + XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> + ret = -EOPNOTSUPP;
> + }
> +
> + XDNA_DBG(xdna, "Got param %d", args->param);
> +
> + return ret;
> +}
> +
> static int aie4_pf_init(struct amdxdna_dev *xdna)
> {
> int ret;
> @@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
> .hwctx_fini = aie4_hwctx_fini,
> .mmap = aie4_doorbell_mmap,
> .cmd_wait = aie4_cmd_wait,
> + .get_aie_info = aie4_get_info,
> };
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index b69489acd53d..1886cffc62db 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
> };
>
> /* aie4_message.c */
> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
> int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>
> /* aie4_ctx.c */
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support
2026-05-05 17:14 ` Mario Limonciello
@ 2026-05-05 18:03 ` Lizhi Hou
0 siblings, 0 replies; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 18:03 UTC (permalink / raw)
To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel,
karol.wachowski
Cc: David Zhang, linux-kernel, max.zhen, sonal.santan,
Hayden Laccabue
On 5/5/26 10:14, Mario Limonciello wrote:
>
>
> On 5/5/26 11:09, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@amd.com>
>>
>> Add support for querying device metadata on AIE4 via a mailbox message.
>> Refactor aie2_get_aie_metadata() into a common helper by moving it to
>> aie.c and renaming it to amdxdna_get_metadata(), allowing both AIE2
>> and AIE4 to reuse the implementation.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@amd.com>
>> Signed-off-by: David Zhang <yidong.zhang@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>> drivers/accel/amdxdna/aie.c | 45 ++++++++++++++++++++++
>> drivers/accel/amdxdna/aie.h | 27 ++++++++++++++
>> drivers/accel/amdxdna/aie2_ctx.c | 4 +-
>> drivers/accel/amdxdna/aie2_message.c | 2 +-
>> drivers/accel/amdxdna/aie2_pci.c | 54 ++-------------------------
>> drivers/accel/amdxdna/aie2_pci.h | 24 ------------
>> drivers/accel/amdxdna/aie4_message.c | 37 ++++++++++++++++++
>> drivers/accel/amdxdna/aie4_msg_priv.h | 34 +++++++++++++++++
>> drivers/accel/amdxdna/aie4_pci.c | 30 +++++++++++++++
>> drivers/accel/amdxdna/aie4_pci.h | 1 +
>> 10 files changed, 181 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
>> index 66849ba9026a..a31051cc1ec8 100644
>> --- a/drivers/accel/amdxdna/aie.c
>> +++ b/drivers/accel/amdxdna/aie.c
>> @@ -117,3 +117,48 @@ void amdxdna_vbnv_init(struct amdxdna_dev *xdna)
>> amdxdna_update_vbnv(xdna, info->rev_vbnv_tbl, rev);
>> }
>> +
>> +int amdxdna_get_metadata(struct aie_device *aie,
>> + struct amdxdna_client *client,
>> + struct amdxdna_drm_get_info *args)
>> +{
>> + struct amdxdna_drm_query_aie_metadata *meta;
>> + int ret = 0;
>> + u32 buf_sz;
>> +
>> + meta = kzalloc_obj(*meta);
>> + if (!meta)
>> + return -ENOMEM;
>> +
>> + meta->col_size = aie->metadata.size;
>> + meta->cols = aie->metadata.cols;
>> + meta->rows = aie->metadata.rows;
>> +
>> + meta->version.major = aie->metadata.version.major;
>> + meta->version.minor = aie->metadata.version.minor;
>> +
>> + meta->core.row_count = aie->metadata.core.row_count;
>> + meta->core.row_start = aie->metadata.core.row_start;
>> + meta->core.dma_channel_count =
>> aie->metadata.core.dma_channel_count;
>> + meta->core.lock_count = aie->metadata.core.lock_count;
>> + meta->core.event_reg_count = aie->metadata.core.event_reg_count;
>> +
>> + meta->mem.row_count = aie->metadata.mem.row_count;
>> + meta->mem.row_start = aie->metadata.mem.row_start;
>> + meta->mem.dma_channel_count = aie->metadata.mem.dma_channel_count;
>> + meta->mem.lock_count = aie->metadata.mem.lock_count;
>> + meta->mem.event_reg_count = aie->metadata.mem.event_reg_count;
>> +
>> + meta->shim.row_count = aie->metadata.shim.row_count;
>> + meta->shim.row_start = aie->metadata.shim.row_start;
>> + meta->shim.dma_channel_count =
>> aie->metadata.shim.dma_channel_count;
>> + meta->shim.lock_count = aie->metadata.shim.lock_count;
>> + meta->shim.event_reg_count = aie->metadata.shim.event_reg_count;
>
> Looking at the code the structures for
>
> struct amdxdna_drm_query_aie_metadata
> and
> struct aie_metadata
>
> Look identical. Rather than copying every member, can you just use
> copy everything from aie->metadata to args->buffer directly?
>
> That could let you save the kzalloc/kfree call.
Agree. I will just remove the redundant structures in V2.
Thanks,
Lizhi
>
>
>> +
>> + buf_sz = min(args->buffer_size, sizeof(*meta));
>> + if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
>> + ret = -EFAULT;
>> +
>> + kfree(meta);
>> + return ret;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
>> index 7a68b114f235..4bb3719ee0c0 100644
>> --- a/drivers/accel/amdxdna/aie.h
>> +++ b/drivers/accel/amdxdna/aie.h
>> @@ -14,6 +14,29 @@
>> struct psp_device;
>> struct smu_device;
>> +struct aie_version {
>> + u16 major;
>> + u16 minor;
>> +};
>> +
>> +struct aie_tile_metadata {
>> + u16 row_count;
>> + u16 row_start;
>> + u16 dma_channel_count;
>> + u16 lock_count;
>> + u16 event_reg_count;
>> +};
>> +
>> +struct aie_metadata {
>> + u32 size;
>> + u16 cols;
>> + u16 rows;
>> + struct aie_version version;
>> + struct aie_tile_metadata core;
>> + struct aie_tile_metadata mem;
>> + struct aie_tile_metadata shim;
>> +};
>> +
>> struct aie_device {
>> struct amdxdna_dev *xdna;
>> struct mailbox_channel *mgmt_chann;
>> @@ -26,6 +49,8 @@ struct aie_device {
>> struct psp_device *psp_hdl;
>> struct smu_device *smu_hdl;
>> +
>> + struct aie_metadata metadata;
>> };
>> #define DECLARE_AIE_MSG(name, op) \
>> @@ -94,6 +119,8 @@ void aie_destroy_chann(struct aie_device *aie,
>> struct mailbox_channel **chann);
>> int aie_send_mgmt_msg_wait(struct aie_device *aie, struct
>> xdna_mailbox_msg *msg);
>> int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32
>> fw_minor);
>> void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
>> +int amdxdna_get_metadata(struct aie_device *aie, struct
>> amdxdna_client *client,
>> + struct amdxdna_drm_get_info *args);
>> /* aie_psp.c */
>> struct psp_device *aiem_psp_create(struct drm_device *ddev, struct
>> psp_config *conf);
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 139825ac8515..7d6094aefb6f 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -489,12 +489,12 @@ static int aie2_hwctx_col_list(struct
>> amdxdna_hwctx *hwctx)
>> }
>> ndev = xdna->dev_handle;
>> - if (unlikely(!ndev->metadata.core.row_count)) {
>> + if (unlikely(!ndev->aie.metadata.core.row_count)) {
>> XDNA_WARN(xdna, "Core tile row count is zero");
>> return -EINVAL;
>> }
>> - hwctx->num_col = hwctx->num_tiles /
>> ndev->metadata.core.row_count;
>> + hwctx->num_col = hwctx->num_tiles /
>> ndev->aie.metadata.core.row_count;
>> if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
>> XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
>> return -EINVAL;
>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 6e98af7b74db..f555ffecea6f 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -375,7 +375,7 @@ int aie2_query_status(struct amdxdna_dev_hdl
>> *ndev, char __user *buf,
>> u8 *buff_addr;
>> int ret;
>> - buf_sz = ndev->metadata.cols * ndev->metadata.size;
>> + buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
>> buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
>> if (IS_ERR(buff_addr))
>> return PTR_ERR(buff_addr);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index f0ddb843eb21..6c8a0f70b73d 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -219,13 +219,13 @@ static int aie2_mgmt_fw_query(struct
>> amdxdna_dev_hdl *ndev)
>> return ret;
>> }
>> - ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
>> + ret = aie2_query_aie_metadata(ndev, &ndev->aie.metadata);
>> if (ret) {
>> XDNA_ERR(ndev->aie.xdna, "Query AIE metadata failed");
>> return ret;
>> }
>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>> + ndev->total_col = min(aie2_max_col, ndev->aie.metadata.cols);
>> return 0;
>> }
>> @@ -658,53 +658,6 @@ static int aie2_get_aie_status(struct
>> amdxdna_client *client,
>> return 0;
>> }
>> -static int aie2_get_aie_metadata(struct amdxdna_client *client,
>> - struct amdxdna_drm_get_info *args)
>> -{
>> - struct amdxdna_drm_query_aie_metadata *meta;
>> - struct amdxdna_dev *xdna = client->xdna;
>> - struct amdxdna_dev_hdl *ndev;
>> - int ret = 0;
>> - u32 buf_sz;
>> -
>> - ndev = xdna->dev_handle;
>> - meta = kzalloc_obj(*meta);
>> - if (!meta)
>> - return -ENOMEM;
>> -
>> - meta->col_size = ndev->metadata.size;
>> - meta->cols = ndev->metadata.cols;
>> - meta->rows = ndev->metadata.rows;
>> -
>> - meta->version.major = ndev->metadata.version.major;
>> - meta->version.minor = ndev->metadata.version.minor;
>> -
>> - meta->core.row_count = ndev->metadata.core.row_count;
>> - meta->core.row_start = ndev->metadata.core.row_start;
>> - meta->core.dma_channel_count =
>> ndev->metadata.core.dma_channel_count;
>> - meta->core.lock_count = ndev->metadata.core.lock_count;
>> - meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
>> -
>> - meta->mem.row_count = ndev->metadata.mem.row_count;
>> - meta->mem.row_start = ndev->metadata.mem.row_start;
>> - meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
>> - meta->mem.lock_count = ndev->metadata.mem.lock_count;
>> - meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
>> -
>> - meta->shim.row_count = ndev->metadata.shim.row_count;
>> - meta->shim.row_start = ndev->metadata.shim.row_start;
>> - meta->shim.dma_channel_count =
>> ndev->metadata.shim.dma_channel_count;
>> - meta->shim.lock_count = ndev->metadata.shim.lock_count;
>> - meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
>> -
>> - buf_sz = min(args->buffer_size, sizeof(*meta));
>> - if (copy_to_user(u64_to_user_ptr(args->buffer), meta, buf_sz))
>> - ret = -EFAULT;
>> -
>> - kfree(meta);
>> - return ret;
>> -}
>> -
>> static int aie2_get_aie_version(struct amdxdna_client *client,
>> struct amdxdna_drm_get_info *args)
>> {
>> @@ -1039,6 +992,7 @@ static int aie2_get_preempt_state(struct
>> amdxdna_client *client,
>> static int aie2_get_info(struct amdxdna_client *client, struct
>> amdxdna_drm_get_info *args)
>> {
>> struct amdxdna_dev *xdna = client->xdna;
>> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> int ret, idx;
>> if (!drm_dev_enter(&xdna->ddev, &idx))
>> @@ -1053,7 +1007,7 @@ static int aie2_get_info(struct amdxdna_client
>> *client, struct amdxdna_drm_get_i
>> ret = aie2_get_aie_status(client, args);
>> break;
>> case DRM_AMDXDNA_QUERY_AIE_METADATA:
>> - ret = aie2_get_aie_metadata(client, args);
>> + ret = amdxdna_get_metadata(&ndev->aie, client, args);
>> break;
>> case DRM_AMDXDNA_QUERY_AIE_VERSION:
>> ret = aie2_get_aie_version(client, args);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index f12073175676..c884fed610f9 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -77,29 +77,6 @@ struct amdxdna_fw_ver;
>> struct amdxdna_hwctx;
>> struct amdxdna_sched_job;
>> -struct aie_version {
>> - u16 major;
>> - u16 minor;
>> -};
>> -
>> -struct aie_tile_metadata {
>> - u16 row_count;
>> - u16 row_start;
>> - u16 dma_channel_count;
>> - u16 lock_count;
>> - u16 event_reg_count;
>> -};
>> -
>> -struct aie_metadata {
>> - u32 size;
>> - u16 cols;
>> - u16 rows;
>> - struct aie_version version;
>> - struct aie_tile_metadata core;
>> - struct aie_tile_metadata mem;
>> - struct aie_tile_metadata shim;
>> -};
>> -
>> enum rt_config_category {
>> AIE2_RT_CFG_INIT,
>> AIE2_RT_CFG_CLK_GATING,
>> @@ -178,7 +155,6 @@ struct amdxdna_dev_hdl {
>> u32 total_col;
>> struct aie_version version;
>> - struct aie_metadata metadata;
>> struct aie2_exec_msg_ops *exec_msg_ops;
>> /* power management and clock*/
>> diff --git a/drivers/accel/amdxdna/aie4_message.c
>> b/drivers/accel/amdxdna/aie4_message.c
>> index d621dd32ac40..ac89a9a842b2 100644
>> --- a/drivers/accel/amdxdna/aie4_message.c
>> +++ b/drivers/accel/amdxdna/aie4_message.c
>> @@ -25,3 +25,40 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev)
>> return ret;
>> }
>> +
>> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct
>> aie_metadata *metadata)
>> +{
>> + DECLARE_AIE_MSG(aie4_msg_aie4_tile_info,
>> AIE4_MSG_OP_AIE_TILE_INFO);
>> + int ret;
>> +
>> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>> + if (ret)
>> + return ret;
>> +
>> + metadata->size = resp.info.size;
>> + metadata->cols = resp.info.cols;
>> + metadata->rows = resp.info.rows;
>> +
>> + metadata->version.major = resp.info.major;
>> + metadata->version.minor = resp.info.minor;
>> +
>> + metadata->core.row_count = resp.info.core_rows;
>> + metadata->core.row_start = resp.info.core_row_start;
>> + metadata->core.dma_channel_count = resp.info.core_dma_channels;
>> + metadata->core.lock_count = resp.info.core_locks;
>> + metadata->core.event_reg_count = resp.info.core_events;
>> +
>> + metadata->mem.row_count = resp.info.mem_rows;
>> + metadata->mem.row_start = resp.info.mem_row_start;
>> + metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
>> + metadata->mem.lock_count = resp.info.mem_locks;
>> + metadata->mem.event_reg_count = resp.info.mem_events;
>> +
>> + metadata->shim.row_count = resp.info.shim_rows;
>> + metadata->shim.row_start = resp.info.shim_row_start;
>> + metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
>> + metadata->shim.lock_count = resp.info.shim_locks;
>> + metadata->shim.event_reg_count = resp.info.shim_events;
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h
>> b/drivers/accel/amdxdna/aie4_msg_priv.h
>> index 7faa01ca3436..69e220e40900 100644
>> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
>> @@ -18,6 +18,7 @@ enum aie4_msg_opcode {
>> AIE4_MSG_OP_DESTROY_PARTITION = 0x30002,
>> AIE4_MSG_OP_CREATE_HW_CONTEXT = 0x30003,
>> AIE4_MSG_OP_DESTROY_HW_CONTEXT = 0x30004,
>> + AIE4_MSG_OP_AIE_TILE_INFO = 0x30006,
>> };
>> enum aie4_msg_status {
>> @@ -96,4 +97,37 @@ struct aie4_msg_destroy_hw_context_resp {
>> enum aie4_msg_status status;
>> } __packed;
>> +struct aie4_tile_info {
>> + __u32 size;
>> + __u16 major;
>> + __u16 minor;
>> + __u16 cols;
>> + __u16 rows;
>> + __u16 core_rows;
>> + __u16 mem_rows;
>> + __u16 shim_rows;
>> + __u16 core_row_start;
>> + __u16 mem_row_start;
>> + __u16 shim_row_start;
>> + __u16 core_dma_channels;
>> + __u16 mem_dma_channels;
>> + __u16 shim_dma_channels;
>> + __u16 core_locks;
>> + __u16 mem_locks;
>> + __u16 shim_locks;
>> + __u16 core_events;
>> + __u16 mem_events;
>> + __u16 shim_events;
>> + __u16 resvd;
>> +} __packed;
>> +
>> +struct aie4_msg_aie4_tile_info_req {
>> + __u32 resvd;
>> +} __packed;
>> +
>> +struct aie4_msg_aie4_tile_info_resp {
>> + enum aie4_msg_status status;
>> + struct aie4_tile_info info;
>> +} __packed;
>> +
>> #endif /* _AIE4_MSG_PRIV_H_ */
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c
>> b/drivers/accel/amdxdna/aie4_pci.c
>> index 9ff34ce57fcb..8b5eff0e45c1 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -269,6 +269,11 @@ static void aie4_partition_fini(struct
>> amdxdna_dev_hdl *ndev)
>> XDNA_ERR(xdna, "partition fini failed: %d", ret);
>> }
>> +static int aie4_query(struct amdxdna_dev_hdl *ndev)
>> +{
>> + return aie4_query_aie_metadata(ndev, &ndev->aie.metadata);
>> +}
>> +
>> static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
>> {
>> int ret;
>> @@ -308,6 +313,10 @@ static int aie4_vf_hw_start(struct
>> amdxdna_dev_hdl *ndev)
>> if (ret)
>> return ret;
>> + ret = aie4_query(ndev);
>> + if (ret)
>> + goto mailbox_fini;
>> +
>> ret = aie4_partition_init(ndev);
>> if (ret)
>> goto mailbox_fini;
>> @@ -535,6 +544,26 @@ static int aie4_doorbell_mmap(struct
>> amdxdna_client *client, struct vm_area_stru
>> return ret;
>> }
>> +static int aie4_get_info(struct amdxdna_client *client, struct
>> amdxdna_drm_get_info *args)
>> +{
>> + struct amdxdna_dev *xdna = client->xdna;
>> + struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> + int ret;
>> +
>> + switch (args->param) {
>> + case DRM_AMDXDNA_QUERY_AIE_METADATA:
>> + ret = amdxdna_get_metadata(&ndev->aie, client, args);
>> + break;
>> + default:
>> + XDNA_ERR(xdna, "Not supported request parameter %u",
>> args->param);
>> + ret = -EOPNOTSUPP;
>> + }
>> +
>> + XDNA_DBG(xdna, "Got param %d", args->param);
>> +
>> + return ret;
>> +}
>> +
>> static int aie4_pf_init(struct amdxdna_dev *xdna)
>> {
>> int ret;
>> @@ -581,4 +610,5 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
>> .hwctx_fini = aie4_hwctx_fini,
>> .mmap = aie4_doorbell_mmap,
>> .cmd_wait = aie4_cmd_wait,
>> + .get_aie_info = aie4_get_info,
>> };
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h
>> b/drivers/accel/amdxdna/aie4_pci.h
>> index b69489acd53d..1886cffc62db 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -56,6 +56,7 @@ struct amdxdna_dev_hdl {
>> };
>> /* aie4_message.c */
>> +int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct
>> aie_metadata *metadata);
>> int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
>> /* aie4_ctx.c */
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization
2026-05-05 16:09 [PATCH V1 0/6] SR-IOV Virtual Function support for AIE4 platform Lizhi Hou
` (4 preceding siblings ...)
2026-05-05 16:09 ` [PATCH V1 5/6] accel/amdxdna: Add AIE4 metadata query support Lizhi Hou
@ 2026-05-05 16:09 ` Lizhi Hou
2026-05-05 20:36 ` Mario Limonciello
5 siblings, 1 reply; 16+ messages in thread
From: Lizhi Hou @ 2026-05-05 16:09 UTC (permalink / raw)
To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
karol.wachowski
Cc: Nishad Saraf, linux-kernel, max.zhen, sonal.santan, Lizhi Hou
From: Nishad Saraf <nishads@amd.com>
NPU firmware requires a host-allocated work buffer for hardware contexts.
Allocate a 4 MB host buffer and attach it to device during device init.
Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common
helpers by moving them to aie.c and renaming them to
amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both
AIE2 and AIE4 to reuse the implementation.
Signed-off-by: Nishad Saraf <nishads@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie.c | 34 +++++++++++++++
drivers/accel/amdxdna/aie.h | 4 ++
drivers/accel/amdxdna/aie2_error.c | 7 ++--
drivers/accel/amdxdna/aie2_message.c | 49 +++-------------------
drivers/accel/amdxdna/aie2_pci.h | 4 --
drivers/accel/amdxdna/aie4_message.c | 18 ++++++++
drivers/accel/amdxdna/aie4_msg_priv.h | 14 +++++++
drivers/accel/amdxdna/aie4_pci.c | 55 ++++++++++++++++++++++++-
drivers/accel/amdxdna/aie4_pci.h | 5 +++
drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +-
10 files changed, 141 insertions(+), 52 deletions(-)
diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
index a31051cc1ec8..4db2fd80a032 100644
--- a/drivers/accel/amdxdna/aie.c
+++ b/drivers/accel/amdxdna/aie.c
@@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie,
kfree(meta);
return ret;
}
+
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+ dma_addr_t *dma_addr)
+{
+ void *vaddr;
+ int order;
+
+ *size = max_t(u32, *size, SZ_8K);
+ order = get_order(*size);
+ if (order > MAX_PAGE_ORDER)
+ return ERR_PTR(-EINVAL);
+ *size = PAGE_SIZE << order;
+
+ if (amdxdna_iova_on(xdna))
+ return amdxdna_iommu_alloc(xdna, *size, dma_addr);
+
+ vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
+ DMA_FROM_DEVICE, GFP_KERNEL);
+ if (!vaddr)
+ return ERR_PTR(-ENOMEM);
+
+ return vaddr;
+}
+
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr)
+{
+ if (amdxdna_iova_on(xdna)) {
+ amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
+ return;
+ }
+
+ dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
+}
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 4bb3719ee0c0..70618204c0ab 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
struct amdxdna_drm_get_info *args);
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+ dma_addr_t *dma_addr);
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr);
/* aie_psp.c */
struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
index 70007b4363cd..babdac0157ab 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -11,6 +11,7 @@
#include <linux/kthread.h>
#include <linux/kernel.h>
+#include "aie.h"
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
#include "amdxdna_error.h"
@@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
destroy_workqueue(events->wq);
mutex_lock(&xdna->dev_lock);
- aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+ amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
kfree(events);
}
@@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
if (!events)
return -ENOMEM;
- events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr);
+ events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, &events->addr);
if (IS_ERR(events->buf)) {
ret = PTR_ERR(events->buf);
goto free_events;
@@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
free_wq:
destroy_workqueue(events->wq);
free_buf:
- aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+ amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
free_events:
kfree(events);
return ret;
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index f555ffecea6f..0417c6a4c80a 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,43 +27,6 @@
#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
-void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
- dma_addr_t *dma_addr)
-{
- struct amdxdna_dev *xdna = ndev->aie.xdna;
- void *vaddr;
- int order;
-
- *size = max(*size, SZ_8K);
- order = get_order(*size);
- if (order > MAX_PAGE_ORDER)
- return ERR_PTR(-EINVAL);
- *size = PAGE_SIZE << order;
-
- if (amdxdna_iova_on(xdna))
- return amdxdna_iommu_alloc(xdna, *size, dma_addr);
-
- vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
- DMA_FROM_DEVICE, GFP_KERNEL);
- if (!vaddr)
- return ERR_PTR(-ENOMEM);
-
- return vaddr;
-}
-
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr)
-{
- struct amdxdna_dev *xdna = ndev->aie.xdna;
-
- if (amdxdna_iova_on(xdna)) {
- amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
- return;
- }
-
- dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
-}
-
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
{
DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND);
@@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
int ret;
buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
- buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+ buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
if (IS_ERR(buff_addr))
return PTR_ERR(buff_addr);
@@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
*cols_filled = aie_bitmap;
fail:
- aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
+ amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr);
return ret;
}
@@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
return -EINVAL;
buf_sz = min(size, SZ_4M);
- addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+ addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
if (IS_ERR(addr))
return PTR_ERR(addr);
@@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
header->minor = resp.minor;
free_buf:
- aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
+ amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr);
return ret;
}
@@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
}
buf_size = sizeof(*report);
- buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
+ buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr);
if (IS_ERR(buf)) {
XDNA_ERR(xdna, "Failed to allocate buffer for app health");
return PTR_ERR(buf);
@@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
memcpy(report, buf, sizeof(*report));
free_buf:
- aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
+ amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr);
return ret;
}
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index c884fed610f9..33b6c84e8b6e 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
-void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
- dma_addr_t *dma_addr);
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr);
/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
index ac89a9a842b2..d85df04c5f6b 100644
--- a/drivers/accel/amdxdna/aie4_message.c
+++ b/drivers/accel/amdxdna/aie4_message.c
@@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *m
return 0;
}
+
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+ DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, AIE4_MSG_OP_ATTACH_WORK_BUFFER);
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ int ret;
+
+ req.buff_addr = ndev->work_buf_addr;
+ req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+ ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+ if (ret)
+ XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret);
+ else
+ XDNA_DBG(xdna, "Attached work buffer");
+
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
index 69e220e40900..af0866045b91 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -6,10 +6,12 @@
#ifndef _AIE4_MSG_PRIV_H_
#define _AIE4_MSG_PRIV_H_
+#include <linux/sizes.h>
#include <linux/types.h>
enum aie4_msg_opcode {
AIE4_MSG_OP_SUSPEND = 0x10003,
+ AIE4_MSG_OP_ATTACH_WORK_BUFFER = 0x1000D,
AIE4_MSG_OP_CREATE_VFS = 0x20001,
AIE4_MSG_OP_DESTROY_VFS = 0x20002,
@@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp {
struct aie4_tile_info info;
} __packed;
+#define AIE4_WORK_BUFFER_MIN_SIZE SZ_4M
+
+struct aie4_msg_attach_work_buffer_req {
+ __u64 buff_addr;
+ __u32 reserved;
+ __u32 buff_size;
+} __packed;
+
+struct aie4_msg_attach_work_buffer_resp {
+ enum aie4_msg_status status;
+} __packed;
+
#endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 8b5eff0e45c1..a58a83af42a4 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
if (ret)
goto stop_fw;
+ ret = aie4_attach_work_buffer(ndev);
+ if (ret)
+ goto mbox_fini;
+
return 0;
+mbox_fini:
+ aie4_mailbox_fini(ndev);
stop_fw:
aie4_fw_stop(ndev);
@@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
return ret;
}
+static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+ ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size,
+ &ndev->work_buf_addr);
+ if (IS_ERR(ndev->work_buf)) {
+ int ret = PTR_ERR(ndev->work_buf);
+
+ XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x",
+ AIE4_WORK_BUFFER_MIN_SIZE);
+ ndev->work_buf = NULL;
+ return ret;
+ }
+
+ ndev->work_buf_size = buf_size;
+ XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size);
+
+ return 0;
+}
+
+static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+
+ if (!ndev->work_buf)
+ return;
+
+ amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf,
+ ndev->work_buf_addr);
+ ndev->work_buf = NULL;
+}
+
static int aie4_pf_init(struct amdxdna_dev *xdna)
{
int ret;
@@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna)
if (ret)
return ret;
- return aie4_pf_hw_start(xdna->dev_handle);
+ ret = aie4_alloc_work_buffer(xdna->dev_handle);
+ if (ret)
+ return ret;
+
+ ret = aie4_pf_hw_start(xdna->dev_handle);
+ if (ret)
+ goto free_work_buf;
+
+ return 0;
+
+free_work_buf:
+ aie4_free_work_buffer(xdna->dev_handle);
+ return ret;
}
static int aie4_vf_init(struct amdxdna_dev *xdna)
@@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna)
{
aie4_sriov_stop(xdna->dev_handle);
aie4_pf_hw_stop(xdna->dev_handle);
+ aie4_free_work_buffer(xdna->dev_handle);
}
static void aie4_vf_fini(struct amdxdna_dev *xdna)
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 1886cffc62db..390864876ca5 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -53,11 +53,16 @@ struct amdxdna_dev_hdl {
struct xarray cert_comp_xa; /* device level indexed by msix id */
struct mutex cert_comp_lock; /* protects cert_comp operations*/
+
+ void *work_buf;
+ dma_addr_t work_buf_addr;
+ u32 work_buf_size;
};
/* aie4_message.c */
int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev);
/* aie4_ctx.c */
int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index c0d00db25cde..a6e9be7960c2 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
* 0.7: Support getting power and utilization data
* 0.8: Support BO usage query
* 0.9: Add new device type AMDXDNA_DEV_TYPE_PF
+ * 0.10: Support AIE4 UMQ
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 9
+#define AMDXDNA_DRIVER_MINOR 10
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization
2026-05-05 16:09 ` [PATCH V1 6/6] accel/amdxdna: Add AIE4 work buffer initialization Lizhi Hou
@ 2026-05-05 20:36 ` Mario Limonciello
0 siblings, 0 replies; 16+ messages in thread
From: Mario Limonciello @ 2026-05-05 20:36 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, karol.wachowski
Cc: Nishad Saraf, linux-kernel, max.zhen, sonal.santan
On 5/5/26 11:09, Lizhi Hou wrote:
> From: Nishad Saraf <nishads@amd.com>
>
> NPU firmware requires a host-allocated work buffer for hardware contexts.
> Allocate a 4 MB host buffer and attach it to device during device init.
>
> Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common
> helpers by moving them to aie.c and renaming them to
> amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both
> AIE2 and AIE4 to reuse the implementation.
>
> Signed-off-by: Nishad Saraf <nishads@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/aie.c | 34 +++++++++++++++
> drivers/accel/amdxdna/aie.h | 4 ++
> drivers/accel/amdxdna/aie2_error.c | 7 ++--
> drivers/accel/amdxdna/aie2_message.c | 49 +++-------------------
> drivers/accel/amdxdna/aie2_pci.h | 4 --
> drivers/accel/amdxdna/aie4_message.c | 18 ++++++++
> drivers/accel/amdxdna/aie4_msg_priv.h | 14 +++++++
> drivers/accel/amdxdna/aie4_pci.c | 55 ++++++++++++++++++++++++-
> drivers/accel/amdxdna/aie4_pci.h | 5 +++
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +-
> 10 files changed, 141 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
> index a31051cc1ec8..4db2fd80a032 100644
> --- a/drivers/accel/amdxdna/aie.c
> +++ b/drivers/accel/amdxdna/aie.c
> @@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie,
> kfree(meta);
> return ret;
> }
> +
> +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
> + dma_addr_t *dma_addr)
> +{
> + void *vaddr;
> + int order;
> +
> + *size = max_t(u32, *size, SZ_8K);
> + order = get_order(*size);
> + if (order > MAX_PAGE_ORDER)
> + return ERR_PTR(-EINVAL);
> + *size = PAGE_SIZE << order;
> +
> + if (amdxdna_iova_on(xdna))
> + return amdxdna_iommu_alloc(xdna, *size, dma_addr);
> +
> + vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
> + DMA_FROM_DEVICE, GFP_KERNEL);
> + if (!vaddr)
> + return ERR_PTR(-ENOMEM);
> +
> + return vaddr;
> +}
> +
> +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
> + void *cpu_addr, dma_addr_t dma_addr)
> +{
> + if (amdxdna_iova_on(xdna)) {
> + amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
> + return;
> + }
> +
> + dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
> +}
> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
> index 4bb3719ee0c0..70618204c0ab 100644
> --- a/drivers/accel/amdxdna/aie.h
> +++ b/drivers/accel/amdxdna/aie.h
> @@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 fw_major, u32 fw_minor);
> void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
> int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client *client,
> struct amdxdna_drm_get_info *args);
> +void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
> + dma_addr_t *dma_addr);
> +void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
> + void *cpu_addr, dma_addr_t dma_addr);
>
> /* aie_psp.c */
> struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf);
> diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c
> index 70007b4363cd..babdac0157ab 100644
> --- a/drivers/accel/amdxdna/aie2_error.c
> +++ b/drivers/accel/amdxdna/aie2_error.c
> @@ -11,6 +11,7 @@
> #include <linux/kthread.h>
> #include <linux/kernel.h>
>
> +#include "aie.h"
> #include "aie2_msg_priv.h"
> #include "aie2_pci.h"
> #include "amdxdna_error.h"
> @@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
> destroy_workqueue(events->wq);
> mutex_lock(&xdna->dev_lock);
>
> - aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
> + amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
> kfree(events);
> }
>
> @@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
> if (!events)
> return -ENOMEM;
>
> - events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr);
> + events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, &events->addr);
> if (IS_ERR(events->buf)) {
> ret = PTR_ERR(events->buf);
> goto free_events;
> @@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
> free_wq:
> destroy_workqueue(events->wq);
> free_buf:
> - aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
> + amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
> free_events:
> kfree(events);
> return ret;
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index f555ffecea6f..0417c6a4c80a 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -27,43 +27,6 @@
>
> #define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
>
> -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
> - dma_addr_t *dma_addr)
> -{
> - struct amdxdna_dev *xdna = ndev->aie.xdna;
> - void *vaddr;
> - int order;
> -
> - *size = max(*size, SZ_8K);
> - order = get_order(*size);
> - if (order > MAX_PAGE_ORDER)
> - return ERR_PTR(-EINVAL);
> - *size = PAGE_SIZE << order;
> -
> - if (amdxdna_iova_on(xdna))
> - return amdxdna_iommu_alloc(xdna, *size, dma_addr);
> -
> - vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
> - DMA_FROM_DEVICE, GFP_KERNEL);
> - if (!vaddr)
> - return ERR_PTR(-ENOMEM);
> -
> - return vaddr;
> -}
> -
> -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
> - void *cpu_addr, dma_addr_t dma_addr)
> -{
> - struct amdxdna_dev *xdna = ndev->aie.xdna;
> -
> - if (amdxdna_iova_on(xdna)) {
> - amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
> - return;
> - }
> -
> - dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
> -}
> -
> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
> {
> DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND);
> @@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
> int ret;
>
> buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
> - buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
> + buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
> if (IS_ERR(buff_addr))
> return PTR_ERR(buff_addr);
>
> @@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
> *cols_filled = aie_bitmap;
>
> fail:
> - aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
> + amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr);
> return ret;
> }
>
> @@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
> return -EINVAL;
>
> buf_sz = min(size, SZ_4M);
> - addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
> + addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
> if (IS_ERR(addr))
> return PTR_ERR(addr);
>
> @@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
> header->minor = resp.minor;
>
> free_buf:
> - aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
> + amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr);
> return ret;
> }
>
> @@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
> }
>
> buf_size = sizeof(*report);
> - buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
> + buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr);
> if (IS_ERR(buf)) {
> XDNA_ERR(xdna, "Failed to allocate buffer for app health");
> return PTR_ERR(buf);
> @@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
> memcpy(report, buf, sizeof(*report));
>
> free_buf:
> - aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
> + amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr);
> return ret;
> }
>
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index c884fed610f9..33b6c84e8b6e 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> int (*notify_cb)(void *, void __iomem *, size_t));
> int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
> -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
> - dma_addr_t *dma_addr);
> -void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
> - void *cpu_addr, dma_addr_t dma_addr);
>
> /* aie2_hwctx.c */
> int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
> diff --git a/drivers/accel/amdxdna/aie4_message.c b/drivers/accel/amdxdna/aie4_message.c
> index ac89a9a842b2..d85df04c5f6b 100644
> --- a/drivers/accel/amdxdna/aie4_message.c
> +++ b/drivers/accel/amdxdna/aie4_message.c
> @@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *m
>
> return 0;
> }
> +
> +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> + DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, AIE4_MSG_OP_ATTACH_WORK_BUFFER);
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> + int ret;
> +
> + req.buff_addr = ndev->work_buf_addr;
> + req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE;
> +
> + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
> + if (ret)
> + XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret);
> + else
> + XDNA_DBG(xdna, "Attached work buffer");
> +
> + return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h b/drivers/accel/amdxdna/aie4_msg_priv.h
> index 69e220e40900..af0866045b91 100644
> --- a/drivers/accel/amdxdna/aie4_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie4_msg_priv.h
> @@ -6,10 +6,12 @@
> #ifndef _AIE4_MSG_PRIV_H_
> #define _AIE4_MSG_PRIV_H_
>
> +#include <linux/sizes.h>
> #include <linux/types.h>
>
> enum aie4_msg_opcode {
> AIE4_MSG_OP_SUSPEND = 0x10003,
> + AIE4_MSG_OP_ATTACH_WORK_BUFFER = 0x1000D,
>
> AIE4_MSG_OP_CREATE_VFS = 0x20001,
> AIE4_MSG_OP_DESTROY_VFS = 0x20002,
> @@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp {
> struct aie4_tile_info info;
> } __packed;
>
> +#define AIE4_WORK_BUFFER_MIN_SIZE SZ_4M
> +
> +struct aie4_msg_attach_work_buffer_req {
> + __u64 buff_addr;
> + __u32 reserved;
> + __u32 buff_size;
> +} __packed;
> +
> +struct aie4_msg_attach_work_buffer_resp {
> + enum aie4_msg_status status;
> +} __packed;
> +
> #endif /* _AIE4_MSG_PRIV_H_ */
> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
> index 8b5eff0e45c1..a58a83af42a4 100644
> --- a/drivers/accel/amdxdna/aie4_pci.c
> +++ b/drivers/accel/amdxdna/aie4_pci.c
> @@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
> if (ret)
> goto stop_fw;
>
> + ret = aie4_attach_work_buffer(ndev);
> + if (ret)
> + goto mbox_fini;
> +
> return 0;
>
> +mbox_fini:
> + aie4_mailbox_fini(ndev);
> stop_fw:
> aie4_fw_stop(ndev);
>
> @@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
> return ret;
> }
>
> +static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> + u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE;
> +
> + ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size,
> + &ndev->work_buf_addr);
> + if (IS_ERR(ndev->work_buf)) {
> + int ret = PTR_ERR(ndev->work_buf);
> +
> + XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x",
> + AIE4_WORK_BUFFER_MIN_SIZE);
> + ndev->work_buf = NULL;
> + return ret;
> + }
> +
> + ndev->work_buf_size = buf_size;
> + XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size);
> +
> + return 0;
> +}
> +
> +static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev)
> +{
> + struct amdxdna_dev *xdna = ndev->aie.xdna;
> +
> + if (!ndev->work_buf)
> + return;
> +
> + amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf,
> + ndev->work_buf_addr);
> + ndev->work_buf = NULL;
> +}
> +
> static int aie4_pf_init(struct amdxdna_dev *xdna)
> {
> int ret;
> @@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna)
> if (ret)
> return ret;
>
> - return aie4_pf_hw_start(xdna->dev_handle);
> + ret = aie4_alloc_work_buffer(xdna->dev_handle);
> + if (ret)
> + return ret;
> +
> + ret = aie4_pf_hw_start(xdna->dev_handle);
> + if (ret)
> + goto free_work_buf;
> +
> + return 0;
> +
> +free_work_buf:
> + aie4_free_work_buffer(xdna->dev_handle);
> + return ret;
> }
>
> static int aie4_vf_init(struct amdxdna_dev *xdna)
> @@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna)
> {
> aie4_sriov_stop(xdna->dev_handle);
> aie4_pf_hw_stop(xdna->dev_handle);
> + aie4_free_work_buffer(xdna->dev_handle);
> }
>
> static void aie4_vf_fini(struct amdxdna_dev *xdna)
> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
> index 1886cffc62db..390864876ca5 100644
> --- a/drivers/accel/amdxdna/aie4_pci.h
> +++ b/drivers/accel/amdxdna/aie4_pci.h
> @@ -53,11 +53,16 @@ struct amdxdna_dev_hdl {
>
> struct xarray cert_comp_xa; /* device level indexed by msix id */
> struct mutex cert_comp_lock; /* protects cert_comp operations*/
> +
> + void *work_buf;
> + dma_addr_t work_buf_addr;
> + u32 work_buf_size;
> };
>
> /* aie4_message.c */
> int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
> int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
> +int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev);
>
> /* aie4_ctx.c */
> int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index c0d00db25cde..a6e9be7960c2 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
> * 0.7: Support getting power and utilization data
> * 0.8: Support BO usage query
> * 0.9: Add new device type AMDXDNA_DEV_TYPE_PF
> + * 0.10: Support AIE4 UMQ
> */
> #define AMDXDNA_DRIVER_MAJOR 0
> -#define AMDXDNA_DRIVER_MINOR 9
> +#define AMDXDNA_DRIVER_MINOR 10
>
> /*
> * Bind the driver base on (vendor_id, device_id) pair and later use the
^ permalink raw reply [flat|nested] 16+ messages in thread