public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 0/8] AMD NPU driver improvements
@ 2024-12-06 21:59 Lizhi Hou
  2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
                   ` (8 more replies)
  0 siblings, 9 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

Add recent improvements and bug fixes for amdxdna driver (depends on [1])
1. Support recent hardware and firmware.
2. Replace idr APIs with xarray.
3. Fix the bugs been found.

[1]: https://lore.kernel.org/all/20241118172942.2014541-1-lizhi.hou@amd.com/

Changes since v1:
- Add one patch to fix possible compiling failure
- Minor fixes for code review comments

Lizhi Hou (8):
  accel/amdxdna: Add device status for aie2 devices
  accel/amdxdna: Replace mmput with mmput_async to avoid dead lock
  accel/amdxdna: Add RyzenAI-npu6 support
  accel/amdxdna: Replace idr api with xarray
  accel/amdxdna: Add query firmware version
  accel/amdxdna: Enhance power management settings
  accel/amdxdna: Read firmware interface version from registers
  accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c

 drivers/accel/amdxdna/Makefile          |   4 +-
 drivers/accel/amdxdna/TODO              |   2 -
 drivers/accel/amdxdna/aie2_ctx.c        |  12 +-
 drivers/accel/amdxdna/aie2_message.c    |  37 +---
 drivers/accel/amdxdna/aie2_pci.c        | 252 +++++++++++++++++++-----
 drivers/accel/amdxdna/aie2_pci.h        |  68 +++++--
 drivers/accel/amdxdna/aie2_pm.c         | 108 ++++++++++
 drivers/accel/amdxdna/aie2_smu.c        |  85 ++++----
 drivers/accel/amdxdna/aie2_solver.c     |  59 +++++-
 drivers/accel/amdxdna/aie2_solver.h     |   1 +
 drivers/accel/amdxdna/amdxdna_ctx.c     |  46 ++---
 drivers/accel/amdxdna/amdxdna_mailbox.c |  60 +++---
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  23 ++-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   8 +-
 drivers/accel/amdxdna/npu1_regs.c       |  31 ++-
 drivers/accel/amdxdna/npu2_regs.c       |  17 +-
 drivers/accel/amdxdna/npu4_regs.c       |  34 +++-
 drivers/accel/amdxdna/npu5_regs.c       |  17 +-
 drivers/accel/amdxdna/npu6_regs.c       | 114 +++++++++++
 include/uapi/drm/amdxdna_accel.h        |  68 ++++++-
 20 files changed, 802 insertions(+), 244 deletions(-)
 create mode 100644 drivers/accel/amdxdna/aie2_pm.c
 create mode 100644 drivers/accel/amdxdna/npu6_regs.c

-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-11  0:31   ` Mario Limonciello
  2024-12-13 16:31   ` Jeffrey Hugo
  2024-12-06 21:59 ` [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock Lizhi Hou
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

Add device status to track if aie2_hw_start() or aie2_hw_stop() is
re-entered. In aie2_hw_stop(), call drmm_kfree to free mbox.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c        | 17 +++++++++++++++++
 drivers/accel/amdxdna/aie2_pci.h        |  7 +++++++
 drivers/accel/amdxdna/amdxdna_mailbox.c |  6 ------
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 349ada697e48..19c76b2b204b 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -267,12 +267,22 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
 	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
 	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
 
+	if (ndev->dev_status <= AIE2_DEV_INIT) {
+		XDNA_ERR(xdna, "device is already stopped");
+		return;
+	}
+
 	aie2_mgmt_fw_fini(ndev);
 	xdna_mailbox_stop_channel(ndev->mgmt_chann);
 	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+	ndev->mgmt_chann = NULL;
+	drmm_kfree(&xdna->ddev, ndev->mbox);
+	ndev->mbox = NULL;
 	aie2_psp_stop(ndev->psp_hdl);
 	aie2_smu_fini(ndev);
 	pci_disable_device(pdev);
+
+	ndev->dev_status = AIE2_DEV_INIT;
 }
 
 static int aie2_hw_start(struct amdxdna_dev *xdna)
@@ -283,6 +293,11 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 	u32 xdna_mailbox_intr_reg;
 	int mgmt_mb_irq, ret;
 
+	if (ndev->dev_status >= AIE2_DEV_START) {
+		XDNA_INFO(xdna, "device is already started");
+		return 0;
+	}
+
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
@@ -345,6 +360,8 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 		goto destroy_mgmt_chann;
 	}
 
+	ndev->dev_status = AIE2_DEV_START;
+
 	return 0;
 
 destroy_mgmt_chann:
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 6a2686255c9c..1c6f07d9b805 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -149,6 +149,11 @@ struct amdxdna_hwctx_priv {
 	struct drm_syncobj		*syncobj;
 };
 
+enum aie2_dev_status {
+	AIE2_DEV_INIT,
+	AIE2_DEV_START,
+};
+
 struct amdxdna_dev_hdl {
 	struct amdxdna_dev		*xdna;
 	const struct amdxdna_dev_priv	*priv;
@@ -171,6 +176,8 @@ struct amdxdna_dev_hdl {
 	struct mailbox			*mbox;
 	struct mailbox_channel		*mgmt_chann;
 	struct async_events		*async_events;
+
+	enum aie2_dev_status		dev_status;
 };
 
 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 415d99abaaa3..eab79dbb8b25 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -530,9 +530,6 @@ xdna_mailbox_create_channel(struct mailbox *mb,
 
 int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
 {
-	if (!mb_chann)
-		return 0;
-
 	MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
 	free_irq(mb_chann->msix_irq, mb_chann);
 	destroy_workqueue(mb_chann->work_q);
@@ -548,9 +545,6 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
 
 void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
 {
-	if (!mb_chann)
-		return;
-
 	/* Disable an irq and wait. This might sleep. */
 	disable_irq(mb_chann->msix_irq);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
  2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-13 16:33   ` Jeffrey Hugo
  2024-12-06 21:59 ` [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support Lizhi Hou
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

Hardware mailbox message receiving handler calls mmput to release the
process mm. If the process has already exited, the mmput here may call mmu
notifier handler, amdxdna_hmm_invalidate, which will cause a dead lock.
Using mmput_async instead prevents this dead lock.

Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_ctx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 90e8d87666a9..b5282555bbf2 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -179,7 +179,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
 	up(&job->hwctx->priv->job_sem);
 	job->job_done = true;
 	dma_fence_put(fence);
-	mmput(job->mm);
+	mmput_async(job->mm);
 	aie2_job_put(job);
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
  2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
  2024-12-06 21:59 ` [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-11  0:30   ` Mario Limonciello
  2024-12-13 16:37   ` Jeffrey Hugo
  2024-12-06 21:59 ` [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray Lizhi Hou
                   ` (5 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Xiaoming Ren

Add NPU6 registers and other private configurations.

Co-developed-by: Xiaoming Ren <xiaoming.ren@amd.com>
Signed-off-by: Xiaoming Ren <xiaoming.ren@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/Makefile    |   3 +-
 drivers/accel/amdxdna/npu6_regs.c | 121 ++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 drivers/accel/amdxdna/npu6_regs.c

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index ed6f87910880..6baf181298de 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -17,5 +17,6 @@ amdxdna-y := \
 	npu1_regs.o \
 	npu2_regs.o \
 	npu4_regs.o \
-	npu5_regs.o
+	npu5_regs.o \
+	npu6_regs.o
 obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
new file mode 100644
index 000000000000..d1168fc55533
--- /dev/null
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR             0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR         0x3010064
+#define MPNPU_PUB_SCRATCH0             0x301006C
+#define MPNPU_PUB_SCRATCH1             0x3010070
+#define MPNPU_PUB_SCRATCH2             0x3010074
+#define MPNPU_PUB_SCRATCH3             0x3010078
+#define MPNPU_PUB_SCRATCH4             0x301007C
+#define MPNPU_PUB_SCRATCH5             0x3010080
+#define MPNPU_PUB_SCRATCH6             0x3010084
+#define MPNPU_PUB_SCRATCH7             0x3010088
+#define MPNPU_PUB_SCRATCH8             0x301008C
+#define MPNPU_PUB_SCRATCH9             0x3010090
+#define MPNPU_PUB_SCRATCH10            0x3010094
+#define MPNPU_PUB_SCRATCH11            0x3010098
+#define MPNPU_PUB_SCRATCH12            0x301009C
+#define MPNPU_PUB_SCRATCH13            0x30100A0
+#define MPNPU_PUB_SCRATCH14            0x30100A4
+#define MPNPU_PUB_SCRATCH15            0x30100A8
+#define MP0_C2PMSG_73                  0x3810A24
+#define MP0_C2PMSG_123                 0x3810AEC
+
+#define MP1_C2PMSG_0                   0x3B10900
+#define MP1_C2PMSG_60                  0x3B109F0
+#define MP1_C2PMSG_61                  0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
+
+#define MMNPU_APERTURE0_BASE           0x3000000
+#define MMNPU_APERTURE1_BASE           0x3600000
+#define MMNPU_APERTURE3_BASE           0x3810000
+#define MMNPU_APERTURE4_BASE           0x3B10000
+
+/* PCIe BAR Index for NPU6 */
+#define NPU6_REG_BAR_INDEX	0
+#define NPU6_MBOX_BAR_INDEX	0
+#define NPU6_PSP_BAR_INDEX	4
+#define NPU6_SMU_BAR_INDEX	5
+#define NPU6_SRAM_BAR_INDEX	2
+/* Associated BARs and Apertures */
+#define NPU6_REG_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU6_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU6_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
+#define NPU6_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
+#define NPU6_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
+
+#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
+#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
+
+#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
+#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
+
+#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
+#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
+
+#define NPU6_MPNPUCLK_FREQ_MAX  1267
+#define NPU6_HCLK_FREQ_MAX      1800
+
+const struct amdxdna_dev_priv npu6_dev_priv = {
+	.fw_path        = "amdnpu/17f0_10/npu.sbin",
+	.protocol_major = 0x6,
+	.protocol_minor = 12,
+	.rt_config	= {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
+	.col_align	= COL_ALIGN_NATURE,
+	.mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
+	.mbox_size      = 0, /* Use BAR size */
+	.sram_dev_addr  = NPU6_SRAM_BAR_BASE,
+	.sram_offs      = {
+		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+	},
+	.psp_regs_off   = {
+		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU6_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU6_REG, MPNPU_PUB_SCRATCH3),
+		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU6_REG, MPNPU_PUB_SCRATCH4),
+		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU6_REG, MPNPU_PUB_SCRATCH9),
+		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU6_PSP, MP0_C2PMSG_73),
+		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU6_REG, MPNPU_PUB_SCRATCH3),
+	},
+	.smu_regs_off   = {
+		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU6_SMU, MP1_C2PMSG_0),
+		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU6_SMU, MP1_C2PMSG_60),
+		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
+		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
+		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
+	},
+};
+
+const struct amdxdna_dev_info dev_npu6_info = {
+	.reg_bar           = NPU6_REG_BAR_INDEX,
+	.mbox_bar          = NPU6_MBOX_BAR_INDEX,
+	.sram_bar          = NPU6_SRAM_BAR_INDEX,
+	.psp_bar           = NPU6_PSP_BAR_INDEX,
+	.smu_bar           = NPU6_SMU_BAR_INDEX,
+	.first_col         = 0,
+	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
+	.dev_mem_base      = AIE2_DEVM_BASE,
+	.dev_mem_size      = AIE2_DEVM_SIZE,
+	.vbnv              = "RyzenAI-npu6",
+	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
+	.dev_priv          = &npu6_dev_priv,
+	.ops               = &aie2_ops,
+};
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (2 preceding siblings ...)
  2024-12-06 21:59 ` [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-13 16:42   ` Jeffrey Hugo
  2024-12-06 21:59 ` [PATCH V2 5/8] accel/amdxdna: Add query firmware version Lizhi Hou
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

Switch mailbox message id and hardware context id management over from
the idr api to the xarray api.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/TODO              |  1 -
 drivers/accel/amdxdna/aie2_ctx.c        |  4 +-
 drivers/accel/amdxdna/aie2_message.c    |  4 +-
 drivers/accel/amdxdna/aie2_pci.c        |  5 +--
 drivers/accel/amdxdna/amdxdna_ctx.c     | 46 ++++++++-------------
 drivers/accel/amdxdna/amdxdna_mailbox.c | 55 ++++++++++---------------
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  4 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |  6 ++-
 8 files changed, 53 insertions(+), 72 deletions(-)

diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index a130259f5f70..de4e1dbc8868 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,4 +1,3 @@
-- Replace idr with xa
 - Add import and export BO support
 - Add debugfs support
 - Add debug BO support
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index b5282555bbf2..07eecb40767f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -90,11 +90,11 @@ void aie2_restart_ctx(struct amdxdna_client *client)
 {
 	struct amdxdna_dev *xdna = client->xdna;
 	struct amdxdna_hwctx *hwctx;
-	int next = 0;
+	unsigned long hwctx_id;
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
 	mutex_lock(&client->hwctx_lock);
-	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+	amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
 		if (hwctx->status != HWCTX_STAT_STOP)
 			continue;
 
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index c01a1d957b56..fc33a158d223 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -315,10 +315,10 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	struct amdxdna_dev *xdna = ndev->xdna;
 	struct amdxdna_client *client;
 	struct amdxdna_hwctx *hwctx;
+	unsigned long hwctx_id;
 	dma_addr_t dma_addr;
 	u32 aie_bitmap = 0;
 	u8 *buff_addr;
-	int next = 0;
 	int ret, idx;
 
 	buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
@@ -329,7 +329,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	/* Go through each hardware context and mark the AIE columns that are active */
 	list_for_each_entry(client, &xdna->client_list, node) {
 		idx = srcu_read_lock(&client->hwctx_srcu);
-		idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+		amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
 			aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
 		srcu_read_unlock(&client->hwctx_srcu, idx);
 	}
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 19c76b2b204b..1c8170325837 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -674,11 +674,11 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
 	struct amdxdna_drm_query_hwctx *tmp;
 	struct amdxdna_client *tmp_client;
 	struct amdxdna_hwctx *hwctx;
+	unsigned long hwctx_id;
 	bool overflow = false;
 	u32 req_bytes = 0;
 	u32 hw_i = 0;
 	int ret = 0;
-	int next;
 	int idx;
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
@@ -690,8 +690,7 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
 	buf = u64_to_user_ptr(args->buffer);
 	list_for_each_entry(tmp_client, &xdna->client_list, node) {
 		idx = srcu_read_lock(&tmp_client->hwctx_srcu);
-		next = 0;
-		idr_for_each_entry_continue(&tmp_client->hwctx_idr, hwctx, next) {
+		amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
 			req_bytes += sizeof(*tmp);
 			if (args->buffer_size < req_bytes) {
 				/* Continue iterating to get the required size */
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 5478b631b73f..64fc59b20a24 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -63,11 +63,11 @@ void amdxdna_hwctx_suspend(struct amdxdna_client *client)
 {
 	struct amdxdna_dev *xdna = client->xdna;
 	struct amdxdna_hwctx *hwctx;
-	int next = 0;
+	unsigned long hwctx_id;
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
 	mutex_lock(&client->hwctx_lock);
-	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+	amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
 		xdna->dev_info->ops->hwctx_suspend(hwctx);
 	mutex_unlock(&client->hwctx_lock);
 }
@@ -76,11 +76,11 @@ void amdxdna_hwctx_resume(struct amdxdna_client *client)
 {
 	struct amdxdna_dev *xdna = client->xdna;
 	struct amdxdna_hwctx *hwctx;
-	int next = 0;
+	unsigned long hwctx_id;
 
 	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
 	mutex_lock(&client->hwctx_lock);
-	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+	amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
 		xdna->dev_info->ops->hwctx_resume(hwctx);
 	mutex_unlock(&client->hwctx_lock);
 }
@@ -149,13 +149,13 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
 void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
 {
 	struct amdxdna_hwctx *hwctx;
-	int next = 0;
+	unsigned long hwctx_id;
 
 	mutex_lock(&client->hwctx_lock);
-	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+	amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
 		XDNA_DBG(client->xdna, "PID %d close HW context %d",
 			 client->pid, hwctx->id);
-		idr_remove(&client->hwctx_idr, hwctx->id);
+		xa_erase(&client->hwctx_xa, hwctx->id);
 		mutex_unlock(&client->hwctx_lock);
 		amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
 		mutex_lock(&client->hwctx_lock);
@@ -194,15 +194,13 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 	hwctx->num_tiles = args->num_tiles;
 	hwctx->mem_size = args->mem_size;
 	hwctx->max_opc = args->max_opc;
-	mutex_lock(&client->hwctx_lock);
-	ret = idr_alloc_cyclic(&client->hwctx_idr, hwctx, 0, MAX_HWCTX_ID, GFP_KERNEL);
+	ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+			      XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+			      &client->next_hwctxid, GFP_KERNEL);
 	if (ret < 0) {
-		mutex_unlock(&client->hwctx_lock);
 		XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
 		goto free_hwctx;
 	}
-	hwctx->id = ret;
-	mutex_unlock(&client->hwctx_lock);
 
 	hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
 	if (!hwctx->name) {
@@ -228,9 +226,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 free_name:
 	kfree(hwctx->name);
 rm_id:
-	mutex_lock(&client->hwctx_lock);
-	idr_remove(&client->hwctx_idr, hwctx->id);
-	mutex_unlock(&client->hwctx_lock);
+	xa_erase(&client->hwctx_xa, hwctx->id);
 free_hwctx:
 	kfree(hwctx);
 exit:
@@ -249,24 +245,18 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
 	if (!drm_dev_enter(dev, &idx))
 		return -ENODEV;
 
-	/*
-	 * Use hwctx_lock to achieve exclusion with other hwctx writers,
-	 * SRCU to synchronize with exec/wait command ioctls.
-	 *
-	 * The pushed jobs are handled by DRM scheduler during destroy.
-	 */
-	mutex_lock(&client->hwctx_lock);
-	hwctx = idr_find(&client->hwctx_idr, args->handle);
+	hwctx = xa_erase(&client->hwctx_xa, args->handle);
 	if (!hwctx) {
-		mutex_unlock(&client->hwctx_lock);
 		ret = -EINVAL;
 		XDNA_DBG(xdna, "PID %d HW context %d not exist",
 			 client->pid, args->handle);
 		goto out;
 	}
-	idr_remove(&client->hwctx_idr, hwctx->id);
-	mutex_unlock(&client->hwctx_lock);
 
+	/*
+	 * The pushed jobs are handled by DRM scheduler during destroy.
+	 * SRCU to synchronize with exec command ioctls.
+	 */
 	amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
 
 	XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
@@ -324,7 +314,7 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
 
 	mutex_lock(&xdna->dev_lock);
 	idx = srcu_read_lock(&client->hwctx_srcu);
-	hwctx = idr_find(&client->hwctx_idr, args->handle);
+	hwctx = xa_load(&client->hwctx_xa, args->handle);
 	if (!hwctx) {
 		XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
 		ret = -EINVAL;
@@ -436,7 +426,7 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
 	}
 
 	idx = srcu_read_lock(&client->hwctx_srcu);
-	hwctx = idr_find(&client->hwctx_idr, hwctx_hdl);
+	hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
 	if (!hwctx) {
 		XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
 			 client->pid, hwctx_hdl);
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index eab79dbb8b25..fe684f463b94 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -54,8 +54,8 @@ struct mailbox_channel {
 	struct xdna_mailbox_chann_res	res[CHAN_RES_NUM];
 	int				msix_irq;
 	u32				iohub_int_addr;
-	struct idr			chan_idr;
-	spinlock_t			chan_idr_lock; /* protect chan_idr */
+	struct xarray			chan_xa;
+	u32				next_msgid;
 	u32				x2i_tail;
 
 	/* Received msg related fields */
@@ -164,19 +164,17 @@ static inline int mailbox_validate_msgid(int msg_id)
 
 static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
 {
-	unsigned long flags;
-	int msg_id;
+	u32 msg_id;
+	int ret;
 
-	spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
-	msg_id = idr_alloc_cyclic(&mb_chann->chan_idr, mb_msg, 0,
-				  MAX_MSG_ID_ENTRIES, GFP_NOWAIT);
-	spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
-	if (msg_id < 0)
-		return msg_id;
+	ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
+				  XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
+				  &mb_chann->next_msgid, GFP_NOWAIT);
+	if (ret < 0)
+		return ret;
 
 	/*
-	 * The IDR becomes less efficient when dealing with larger IDs.
-	 * Thus, add MAGIC_VAL to the higher bits.
+	 * Add MAGIC_VAL to the higher bits.
 	 */
 	msg_id |= MAGIC_VAL;
 	return msg_id;
@@ -184,25 +182,17 @@ static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbo
 
 static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
 {
-	unsigned long flags;
-
 	msg_id &= ~MAGIC_VAL_MASK;
-	spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
-	idr_remove(&mb_chann->chan_idr, msg_id);
-	spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
+	xa_erase_irq(&mb_chann->chan_xa, msg_id);
 }
 
-static int mailbox_release_msg(int id, void *p, void *data)
+static void mailbox_release_msg(struct mailbox_channel *mb_chann,
+				struct mailbox_msg *mb_msg)
 {
-	struct mailbox_channel *mb_chann = data;
-	struct mailbox_msg *mb_msg = p;
-
 	MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
 	       mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
 	mb_msg->notify_cb(mb_msg->handle, NULL, 0);
 	kfree(mb_msg);
-
-	return 0;
 }
 
 static int
@@ -254,7 +244,6 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
 		 void *data)
 {
 	struct mailbox_msg *mb_msg;
-	unsigned long flags;
 	int msg_id;
 	int ret;
 
@@ -265,15 +254,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
 	}
 
 	msg_id &= ~MAGIC_VAL_MASK;
-	spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
-	mb_msg = idr_find(&mb_chann->chan_idr, msg_id);
+	mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
 	if (!mb_msg) {
 		MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
-		spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
 		return -EINVAL;
 	}
-	idr_remove(&mb_chann->chan_idr, msg_id);
-	spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
 
 	MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
 	       header->opcode, header->total_size, header->id);
@@ -497,8 +482,7 @@ xdna_mailbox_create_channel(struct mailbox *mb,
 	memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
 	memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
 
-	spin_lock_init(&mb_chann->chan_idr_lock);
-	idr_init(&mb_chann->chan_idr);
+	xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 	mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
 	mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
 
@@ -530,13 +514,18 @@ xdna_mailbox_create_channel(struct mailbox *mb,
 
 int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
 {
+	struct mailbox_msg *mb_msg;
+	unsigned long msg_id;
+
 	MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
 	free_irq(mb_chann->msix_irq, mb_chann);
 	destroy_workqueue(mb_chann->work_q);
 	/* We can clean up and release resources */
 
-	idr_for_each(&mb_chann->chan_idr, mailbox_release_msg, mb_chann);
-	idr_destroy(&mb_chann->chan_idr);
+	xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+		mailbox_release_msg(mb_chann, mb_msg);
+
+	xa_destroy(&mb_chann->chan_xa);
 
 	MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
 	kfree(mb_chann);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 02533732d4ca..c3541796d189 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -77,7 +77,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 	}
 	mutex_init(&client->hwctx_lock);
 	init_srcu_struct(&client->hwctx_srcu);
-	idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1);
+	xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
 	mutex_init(&client->mm_lock);
 
 	mutex_lock(&xdna->dev_lock);
@@ -108,7 +108,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
 
 	XDNA_DBG(xdna, "closing pid %d", client->pid);
 
-	idr_destroy(&client->hwctx_idr);
+	xa_destroy(&client->hwctx_xa);
 	cleanup_srcu_struct(&client->hwctx_srcu);
 	mutex_destroy(&client->hwctx_lock);
 	mutex_destroy(&client->mm_lock);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index c50d65a050ad..f5b830fb14bb 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -100,7 +100,8 @@ struct amdxdna_client {
 	struct mutex			hwctx_lock; /* protect hwctx */
 	/* do NOT wait this srcu when hwctx_lock is held */
 	struct srcu_struct		hwctx_srcu;
-	struct idr			hwctx_idr;
+	struct xarray			hwctx_xa;
+	u32				next_hwctxid;
 	struct amdxdna_dev		*xdna;
 	struct drm_file			*filp;
 
@@ -111,6 +112,9 @@ struct amdxdna_client {
 	int				pasid;
 };
 
+#define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\
+	xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
+
 /* Add device info below */
 extern const struct amdxdna_dev_info dev_npu1_info;
 extern const struct amdxdna_dev_info dev_npu2_info;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 5/8] accel/amdxdna: Add query firmware version
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (3 preceding siblings ...)
  2024-12-06 21:59 ` [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-11  0:28   ` Mario Limonciello
  2024-12-13 16:48   ` Jeffrey Hugo
  2024-12-06 21:59 ` [PATCH V2 6/8] accel/amdxdna: Enhance power management settings Lizhi Hou
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

Enhance GET_INFO ioctl to support retrieving firmware version.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c | 20 ++++++++++++++++++++
 include/uapi/drm/amdxdna_accel.h | 16 +++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 1c8170325837..83abd16ade11 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -640,6 +640,23 @@ static int aie2_get_aie_version(struct amdxdna_client *client,
 	return 0;
 }
 
+static int aie2_get_firmware_version(struct amdxdna_client *client,
+				     struct amdxdna_drm_get_info *args)
+{
+	struct amdxdna_drm_query_firmware_version version;
+	struct amdxdna_dev *xdna = client->xdna;
+
+	version.major = xdna->fw_ver.major;
+	version.minor = xdna->fw_ver.minor;
+	version.patch = xdna->fw_ver.sub;
+	version.build = xdna->fw_ver.build;
+
+	if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int aie2_get_clock_metadata(struct amdxdna_client *client,
 				   struct amdxdna_drm_get_info *args)
 {
@@ -752,6 +769,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
 	case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
 		ret = aie2_get_hwctx_status(client, args);
 		break;
+	case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
+		ret = aie2_get_firmware_version(client, args);
+		break;
 	default:
 		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
 		ret = -EOPNOTSUPP;
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index af12af8bd699..4f15e53a548d 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -375,6 +375,20 @@ struct amdxdna_drm_query_hwctx {
 	__u64 errors;
 };
 
+/**
+ * struct amdxdna_drm_query_firmware_version - Query the firmware version
+ * @major: The major version number
+ * @minor: The minor version number
+ * @patch: The patch level version number
+ * @build: The build ID
+ */
+struct amdxdna_drm_query_firmware_version {
+	__u32 major; /* out */
+	__u32 minor; /* out */
+	__u32 patch; /* out */
+	__u32 build; /* out */
+};
+
 enum amdxdna_drm_get_param {
 	DRM_AMDXDNA_QUERY_AIE_STATUS,
 	DRM_AMDXDNA_QUERY_AIE_METADATA,
@@ -382,7 +396,7 @@ enum amdxdna_drm_get_param {
 	DRM_AMDXDNA_QUERY_CLOCK_METADATA,
 	DRM_AMDXDNA_QUERY_SENSORS,
 	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
-	DRM_AMDXDNA_NUM_GET_PARAM,
+	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
 };
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (4 preceding siblings ...)
  2024-12-06 21:59 ` [PATCH V2 5/8] accel/amdxdna: Add query firmware version Lizhi Hou
@ 2024-12-06 21:59 ` Lizhi Hou
  2024-12-11  0:28   ` Mario Limonciello
  2024-12-13 16:55   ` Jeffrey Hugo
  2024-12-06 22:00 ` [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers Lizhi Hou
                   ` (2 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 21:59 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Narendra Gutta, George Yang

Add SET_STATE ioctl to configure device power mode for aie2 device.
Three modes are supported initially.

POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
Management) level to value which has been set by resource solver or
maximum DPM level the device supports.

POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
level the device supports.

POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
level the device supports.

Disabling clock gating means all clocks always run on full speed. And
the different clock frequency are used based on DPM level been set.
Initially, the driver set the power mode to default mode.

Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Co-developed-by: George Yang <George.Yang@amd.com>
Signed-off-by: George Yang <George.Yang@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/Makefile          |   1 +
 drivers/accel/amdxdna/TODO              |   1 -
 drivers/accel/amdxdna/aie2_ctx.c        |   6 ++
 drivers/accel/amdxdna/aie2_message.c    |   9 +-
 drivers/accel/amdxdna/aie2_pci.c        | 136 +++++++++++++++++++-----
 drivers/accel/amdxdna/aie2_pci.h        |  55 ++++++++--
 drivers/accel/amdxdna/aie2_pm.c         | 108 +++++++++++++++++++
 drivers/accel/amdxdna/aie2_smu.c        |  85 +++++++++------
 drivers/accel/amdxdna/aie2_solver.c     |  59 +++++++++-
 drivers/accel/amdxdna/aie2_solver.h     |   1 +
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  19 ++++
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   2 +
 drivers/accel/amdxdna/npu1_regs.c       |  29 +++--
 drivers/accel/amdxdna/npu2_regs.c       |  15 +--
 drivers/accel/amdxdna/npu4_regs.c       |  32 ++++--
 drivers/accel/amdxdna/npu5_regs.c       |  15 +--
 drivers/accel/amdxdna/npu6_regs.c       |  19 ++--
 include/uapi/drm/amdxdna_accel.h        |  52 +++++++++
 18 files changed, 516 insertions(+), 128 deletions(-)
 create mode 100644 drivers/accel/amdxdna/aie2_pm.c

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 6baf181298de..0e9adf6890a0 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -5,6 +5,7 @@ amdxdna-y := \
 	aie2_error.o \
 	aie2_message.o \
 	aie2_pci.o \
+	aie2_pm.o \
 	aie2_psp.o \
 	aie2_smu.o \
 	aie2_solver.o \
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index de4e1dbc8868..5119bccd1917 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,4 +1,3 @@
 - Add import and export BO support
 - Add debugfs support
 - Add debug BO support
-- Improve power management
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 07eecb40767f..6b4e6fcb7794 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -518,6 +518,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 	struct drm_gpu_scheduler *sched;
 	struct amdxdna_hwctx_priv *priv;
 	struct amdxdna_gem_obj *heap;
+	struct amdxdna_dev_hdl *ndev;
 	int i, ret;
 
 	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
@@ -612,6 +613,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 	}
 
 	hwctx->status = HWCTX_STAT_INIT;
+	ndev = xdna->dev_handle;
+	ndev->hwctx_num++;
 
 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
 
@@ -641,10 +644,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 
 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
 {
+	struct amdxdna_dev_hdl *ndev;
 	struct amdxdna_dev *xdna;
 	int idx;
 
 	xdna = hwctx->client->xdna;
+	ndev = xdna->dev_handle;
+	ndev->hwctx_num--;
 	drm_sched_wqueue_stop(&hwctx->priv->sched);
 
 	/* Now, scheduler will not send command to device. */
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index fc33a158d223..13b5a96f8d25 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -70,11 +70,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
 {
 	DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+	int ret;
 
 	req.type = type;
 	req.value = value;
 
-	return aie2_send_mgmt_msg_wait(ndev, &msg);
+	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
+		return ret;
+	}
+
+	return 0;
 }
 
 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 83abd16ade11..489744a2e226 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -109,28 +109,26 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
 	return 0;
 }
 
-static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+		     enum rt_config_category category, u32 *val)
 {
-	const struct rt_config *cfg = &ndev->priv->rt_config;
-	u64 value;
+	const struct rt_config *cfg;
+	u32 value;
 	int ret;
 
-	ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
-	if (ret) {
-		XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
-			 cfg->type, cfg->value);
-		return ret;
-	}
+	for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+		if (cfg->category != category)
+			continue;
 
-	ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
-	if (ret) {
-		XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
-		return ret;
+		value = val ? *val : cfg->value;
+		ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+		if (ret) {
+			XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+				 cfg->type, value);
+			return ret;
+		}
 	}
 
-	if (value != cfg->value)
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -163,7 +161,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
 		return ret;
 	}
 
-	ret = aie2_runtime_cfg(ndev);
+	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
 	if (ret) {
 		XDNA_ERR(ndev->xdna, "Runtime config failed");
 		return ret;
@@ -257,9 +255,25 @@ static int aie2_xrs_unload(void *cb_arg)
 	return ret;
 }
 
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+	struct amdxdna_dev_hdl *ndev;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	ndev = xdna->dev_handle;
+	ndev->dft_dpm_level = dpm_level;
+	if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+		return 0;
+
+	return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
 static struct xrs_action_ops aie2_xrs_actions = {
 	.load = aie2_xrs_load,
 	.unload = aie2_xrs_unload,
+	.set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
 };
 
 static void aie2_hw_stop(struct amdxdna_dev *xdna)
@@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 		goto stop_psp;
 	}
 
+	ret = aie2_pm_init(ndev);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+		goto destroy_mgmt_chann;
+	}
+
 	ret = aie2_mgmt_fw_init(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
@@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
 	}
 	ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
 
-	xrs_cfg.clk_list.num_levels = 3;
-	xrs_cfg.clk_list.cu_clk_list[0] = 0;
-	xrs_cfg.clk_list.cu_clk_list[1] = 800;
-	xrs_cfg.clk_list.cu_clk_list[2] = 1000;
+	xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+	for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+		xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
 	xrs_cfg.sys_eff_factor = 1;
 	xrs_cfg.ddev = &xdna->ddev;
 	xrs_cfg.actions = &aie2_xrs_actions;
@@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct amdxdna_client *client,
 	return 0;
 }
 
+static int aie2_get_power_mode(struct amdxdna_client *client,
+			       struct amdxdna_drm_get_info *args)
+{
+	struct amdxdna_drm_get_power_mode mode = {};
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_dev_hdl *ndev;
+
+	ndev = xdna->dev_handle;
+	mode.power_mode = ndev->pw_mode;
+
+	if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int aie2_get_clock_metadata(struct amdxdna_client *client,
 				   struct amdxdna_drm_get_info *args)
 {
@@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
 	if (!clock)
 		return -ENOMEM;
 
-	memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
-	       sizeof(clock->mp_npu_clock.name));
-	clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
-	memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name));
-	clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
+	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+		 "MP-NPU Clock");
+	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+	snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+	clock->h_clock.freq_mhz = ndev->hclk_freq;
 
 	if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
 		ret = -EFAULT;
@@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
 	case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
 		ret = aie2_get_firmware_version(client, args);
 		break;
+	case DRM_AMDXDNA_GET_POWER_MODE:
+		ret = aie2_get_power_mode(client, args);
+		break;
 	default:
 		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
 		ret = -EOPNOTSUPP;
@@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
 	return ret;
 }
 
+static int aie2_set_power_mode(struct amdxdna_client *client,
+			       struct amdxdna_drm_set_state *args)
+{
+	struct amdxdna_drm_set_power_mode power_state;
+	enum amdxdna_power_mode_type power_mode;
+	struct amdxdna_dev *xdna = client->xdna;
+
+	if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+			   sizeof(power_state))) {
+		XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+		return -EFAULT;
+	}
+
+	power_mode = power_state.power_mode;
+	if (power_mode > POWER_MODE_TURBO) {
+		XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+		return -EINVAL;
+	}
+
+	return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+			  struct amdxdna_drm_set_state *args)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+	int ret, idx;
+
+	if (!drm_dev_enter(&xdna->ddev, &idx))
+		return -ENODEV;
+
+	switch (args->param) {
+	case DRM_AMDXDNA_SET_POWER_MODE:
+		ret = aie2_set_power_mode(client, args);
+		break;
+	default:
+		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	drm_dev_exit(idx);
+	return ret;
+}
+
 const struct amdxdna_dev_ops aie2_ops = {
 	.init           = aie2_init,
 	.fini           = aie2_fini,
 	.resume         = aie2_hw_start,
 	.suspend        = aie2_hw_stop,
 	.get_aie_info   = aie2_get_info,
+	.set_aie_state	= aie2_set_state,
 	.hwctx_init     = aie2_hwctx_init,
 	.hwctx_fini     = aie2_hwctx_fini,
 	.hwctx_config   = aie2_hwctx_config,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 1c6f07d9b805..8c17b74654ce 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -6,6 +6,7 @@
 #ifndef _AIE2_PCI_H_
 #define _AIE2_PCI_H_
 
+#include <drm/amdxdna_accel.h>
 #include <linux/semaphore.h>
 
 #include "amdxdna_mailbox.h"
@@ -48,9 +49,6 @@
 	pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
 })
 
-#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max)
-#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
-
 enum aie2_smu_reg_idx {
 	SMU_CMD_REG = 0,
 	SMU_ARG_REG,
@@ -112,14 +110,20 @@ struct aie_metadata {
 	struct aie_tile_metadata shim;
 };
 
-struct clock_entry {
-	char name[16];
-	u32 freq_mhz;
+enum rt_config_category {
+	AIE2_RT_CFG_INIT,
+	AIE2_RT_CFG_CLK_GATING,
 };
 
 struct rt_config {
 	u32	type;
 	u32	value;
+	u32	category;
+};
+
+struct dpm_clk_freq {
+	u32	npuclk;
+	u32	hclk;
 };
 
 /*
@@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
 };
 
 enum aie2_dev_status {
+	AIE2_DEV_UNINIT,
 	AIE2_DEV_INIT,
 	AIE2_DEV_START,
 };
@@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
 	u32				total_col;
 	struct aie_version		version;
 	struct aie_metadata		metadata;
-	struct clock_entry		mp_npu_clock;
-	struct clock_entry		h_clock;
+
+	/* power management and clock*/
+	enum amdxdna_power_mode_type	pw_mode;
+	u32				dpm_level;
+	u32				dft_dpm_level;
+	u32				max_dpm_level;
+	u32				clk_gating;
+	u32				npuclk_freq;
+	u32				hclk_freq;
 
 	/* Mailbox and the management channel */
 	struct mailbox			*mbox;
@@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
 	struct async_events		*async_events;
 
 	enum aie2_dev_status		dev_status;
+	u32				hwctx_num;
 };
 
 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
 	u32	offset;
 };
 
+struct aie2_hw_ops {
+	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
 struct amdxdna_dev_priv {
 	const char			*fw_path;
 	u64				protocol_major;
 	u64				protocol_minor;
-	struct rt_config		rt_config;
+	const struct rt_config		*rt_config;
+	const struct dpm_clk_freq	*dpm_clk_tbl;
+
 #define COL_ALIGN_NONE   0
 #define COL_ALIGN_NATURE 1
 	u32				col_align;
@@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
 	struct aie2_bar_off_pair	sram_offs[SRAM_MAX_INDEX];
 	struct aie2_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
 	struct aie2_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
-	u32				smu_mpnpuclk_freq_max;
-	u32				smu_hclk_freq_max;
+	struct aie2_hw_ops		hw_ops;
 };
 
 extern const struct amdxdna_dev_ops aie2_ops;
 
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+		     enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
 /* aie2_smu.c */
 int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
 
 /* aie2_psp.c */
 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE	1
+#define AIE2_CLK_GATING_DISABLE	0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+	int ret;
+
+	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+	if (ret)
+		return ret;
+
+	ndev->clk_gating = val;
+	return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+	int ret;
+
+	if (ndev->dev_status != AIE2_DEV_UNINIT) {
+		/* Resume device */
+		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+		if (ret)
+			return ret;
+
+		ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+		if (ret)
+			return ret;
+
+		return 0;
+	}
+
+	while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+		ndev->max_dpm_level++;
+	ndev->max_dpm_level--;
+
+	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+	if (ret)
+		return ret;
+
+	ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+	if (ret)
+		return ret;
+
+	ndev->pw_mode = POWER_MODE_DEFAULT;
+	ndev->dft_dpm_level = ndev->max_dpm_level;
+
+	return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+	struct amdxdna_dev *xdna = ndev->xdna;
+	u32 clk_gating, dpm_level;
+	int ret;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	if (ndev->pw_mode == target)
+		return 0;
+
+	switch (target) {
+	case POWER_MODE_TURBO:
+		if (ndev->hwctx_num) {
+			XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+			return -EINVAL;
+		}
+
+		clk_gating = AIE2_CLK_GATING_DISABLE;
+		dpm_level = ndev->max_dpm_level;
+		break;
+	case POWER_MODE_HIGH:
+		clk_gating = AIE2_CLK_GATING_ENABLE;
+		dpm_level = ndev->max_dpm_level;
+		break;
+	case POWER_MODE_DEFAULT:
+		clk_gating = AIE2_CLK_GATING_ENABLE;
+		dpm_level = ndev->dft_dpm_level;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+	if (ret)
+		return ret;
+
+	ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+	if (ret)
+		return ret;
+
+	ndev->pw_mode = target;
+
+	return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index 91893d438da7..73388443c676 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -19,8 +19,11 @@
 #define AIE2_SMU_POWER_OFF		0x4
 #define AIE2_SMU_SET_MPNPUCLK_FREQ	0x5
 #define AIE2_SMU_SET_HCLK_FREQ		0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL	0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL	0x8
 
-static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+			 u32 reg_arg, u32 *out)
 {
 	u32 resp;
 	int ret;
@@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
 		return ret;
 	}
 
+	if (out)
+		*out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
 	if (resp != SMU_RESULT_OK) {
 		XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
 		return -EINVAL;
@@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
 	return 0;
 }
 
-static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
+	u32 freq;
 	int ret;
 
-	if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
-		XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
-		return -EINVAL;
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+			    ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+			 ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
 	}
+	ndev->npuclk_freq = freq;
 
-	ndev->mp_npu_clock.freq_mhz = freq_mhz;
-	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
-	if (!ret)
-		XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
-
-	return ret;
-}
-
-static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
-{
-	int ret;
-
-	if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
-		XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
-		return -EINVAL;
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+			    ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+			 ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
 	}
+	ndev->hclk_freq = freq;
+	ndev->dpm_level = dpm_level;
 
-	ndev->h_clock.freq_mhz = freq_mhz;
-	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
-	if (!ret)
-		XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
+	XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+		 ndev->npuclk_freq, ndev->hclk_freq);
 
-	return ret;
+	return 0;
 }
 
-int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
 	int ret;
 
-	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
 	if (ret) {
-		XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+		XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+			 dpm_level, ret);
 		return ret;
 	}
 
-	ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
 	if (ret) {
-		XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
+		XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+			 dpm_level, ret);
 		return ret;
 	}
-	snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
 
-	ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
+	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+	ndev->dpm_level = dpm_level;
+
+	XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+		 ndev->npuclk_freq, ndev->hclk_freq);
+
+	return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+	int ret;
+
+	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
 	if (ret) {
-		XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
+		XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
 		return ret;
 	}
-	snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
 
 	return 0;
 }
@@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
 {
 	int ret;
 
-	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
+	ndev->priv->hw_ops.set_dpm(ndev, 0);
+	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
 	if (ret)
 		XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
 }
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
index a537c66589a4..1939625d6027 100644
--- a/drivers/accel/amdxdna/aie2_solver.c
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -25,6 +25,7 @@ struct solver_node {
 
 	struct partition_node	*pt_node;
 	void			*cb_arg;
+	u32			dpm_level;
 	u32			cols_len;
 	u32			start_cols[] __counted_by(cols_len);
 };
@@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
 	return 0;
 }
 
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+	/*
+	 * gops is retrieved from the xmodel, so it's always set
+	 * fps and latency are the configurable params from the application
+	 */
+	if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
+		return true;
+
+	return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+	struct solver_rgroup *rgp = &xrs->rgp;
+	struct cdo_parts *cdop = &req->cdo;
+	struct aie_qos *rqos = &req->rqos;
+	u32 freq, max_dpm_level, level;
+	struct solver_node *node;
+
+	max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+	/* If no QoS parameters are passed, set it to the max DPM level */
+	if (!is_valid_qos_dpm_params(rqos)) {
+		level = max_dpm_level;
+		goto set_dpm;
+	}
+
+	/* Find one CDO group that meet the GOPs requirement. */
+	for (level = 0; level < max_dpm_level; level++) {
+		freq = xrs->cfg.clk_list.cu_clk_list[level];
+		if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+			break;
+	}
+
+	/* set the dpm level which fits all the sessions */
+	list_for_each_entry(node, &rgp->node_list, list) {
+		if (node->dpm_level > level)
+			level = node->dpm_level;
+	}
+
+set_dpm:
+	*dpm_level = level;
+	return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
 static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
 {
 	struct solver_node *node;
@@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs,
 	pt_node->ncols = ncols;
 
 	/*
-	 * Before fully support latency in QoS, if a request
-	 * specifies a non-zero latency value, it will not share
-	 * the partition with other requests.
+	 * Always set exclusive to false for now.
 	 */
-	if (req->rqos.latency)
-		pt_node->exclusive = true;
+	pt_node->exclusive = false;
 
 	list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
 	xrs->rgp.npartition_node++;
@@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
 	struct xrs_action_load load_act;
 	struct solver_node *snode;
 	struct solver_state *xrs;
+	u32 dpm_level;
 	int ret;
 
 	xrs = (struct solver_state *)hdl;
@@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
 	if (ret)
 		goto free_node;
 
+	ret = set_dpm_level(xrs, req, &dpm_level);
+	if (ret)
+		goto free_node;
+
+	snode->dpm_level = dpm_level;
 	snode->cb_arg = cb_arg;
 
 	drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
index 9b1847bb46a6..a2e3c52229e9 100644
--- a/drivers/accel/amdxdna/aie2_solver.h
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -99,6 +99,7 @@ struct clk_list_info {
 struct xrs_action_ops {
 	int (*load)(void *cb_arg, struct xrs_action_load *action);
 	int (*unload)(void *cb_arg);
+	int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
 };
 
 /*
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index c3541796d189..6bbd437d48d8 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
 	return ret;
 }
 
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct amdxdna_client *client = filp->driver_priv;
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	struct amdxdna_drm_set_state *args = data;
+	int ret;
+
+	if (!xdna->dev_info->ops->set_aie_state)
+		return -EOPNOTSUPP;
+
+	XDNA_DBG(xdna, "Request parameter %u", args->param);
+	mutex_lock(&xdna->dev_lock);
+	ret = xdna->dev_info->ops->set_aie_state(client, args);
+	mutex_unlock(&xdna->dev_lock);
+
+	return ret;
+}
+
 static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
 	/* Context */
 	DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -173,6 +191,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
 	/* AIE hardware */
 	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
 };
 
 static const struct file_operations amdxdna_fops = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index f5b830fb14bb..e2071e31d949 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
 struct amdxdna_client;
 struct amdxdna_dev;
 struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
 struct amdxdna_gem_obj;
 struct amdxdna_hwctx;
 struct amdxdna_sched_job;
@@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
 	void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
 	int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
 	int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+	int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
 };
 
 /*
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index f00c50461b09..c8f4d1cac65d 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -44,18 +44,30 @@
 #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
 #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
 
-#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
-#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu1_default_rt_cfg[] = {
+	{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+	{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+	{ 0 },
+};
 
-#define NPU1_MPNPUCLK_FREQ_MAX  600
-#define NPU1_HCLK_FREQ_MAX      1024
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+	{400, 800},
+	{600, 1024},
+	{600, 1024},
+	{600, 1024},
+	{600, 1024},
+	{720, 1309},
+	{720, 1309},
+	{847, 1600},
+	{ 0 }
+};
 
 const struct amdxdna_dev_priv npu1_dev_priv = {
 	.fw_path        = "amdnpu/1502_00/npu.sbin",
 	.protocol_major = 0x5,
 	.protocol_minor = 0x1,
-	.rt_config	= {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
+	.rt_config	= npu1_default_rt_cfg,
+	.dpm_clk_tbl	= npu1_dpm_clk_table,
 	.col_align	= COL_ALIGN_NONE,
 	.mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
 	.mbox_size      = 0, /* Use BAR size */
@@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
 	},
-	.smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
-	.smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
+	.hw_ops		= {
+		.set_dpm = npu1_set_dpm,
+	},
 };
 
 const struct amdxdna_dev_info dev_npu1_info = {
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index 00cb381031d2..ac63131f9c7c 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -61,18 +61,12 @@
 #define NPU2_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU2_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU2_MPNPUCLK_FREQ_MAX  1267
-#define NPU2_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu2_dev_priv = {
 	.fw_path        = "amdnpu/17f0_00/npu.sbin",
 	.protocol_major = 0x6,
 	.protocol_minor = 0x1,
-	.rt_config	= {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
+	.rt_config	= npu4_default_rt_cfg,
+	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
 	.mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
 	.mbox_size      = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
 	},
-	.smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
-	.smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
+	.hw_ops	=	 {
+		.set_dpm = npu4_set_dpm,
+	},
 };
 
 const struct amdxdna_dev_info dev_npu2_info = {
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index b6dae9667cca..a713ac18adfc 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -61,18 +61,33 @@
 #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu4_default_rt_cfg[] = {
+	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+	{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+	{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+	{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+	{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+	{ 0 },
+};
 
-#define NPU4_MPNPUCLK_FREQ_MAX  1267
-#define NPU4_HCLK_FREQ_MAX      1800
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+	{396, 792},
+	{600, 1056},
+	{792, 1152},
+	{975, 1267},
+	{975, 1267},
+	{1056, 1408},
+	{1152, 1584},
+	{1267, 1800},
+	{ 0 }
+};
 
 const struct amdxdna_dev_priv npu4_dev_priv = {
 	.fw_path        = "amdnpu/17f0_10/npu.sbin",
 	.protocol_major = 0x6,
 	.protocol_minor = 0x1,
-	.rt_config	= {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
+	.rt_config	= npu4_default_rt_cfg,
+	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
 	.mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
 	.mbox_size      = 0, /* Use BAR size */
@@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
 	},
-	.smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
-	.smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
+	.hw_ops		= {
+		.set_dpm = npu4_set_dpm,
+	},
 };
 
 const struct amdxdna_dev_info dev_npu4_info = {
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index bed1baf8e160..67a5d5bc8a49 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -61,18 +61,12 @@
 #define NPU5_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU5_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU5_MPNPUCLK_FREQ_MAX  1267
-#define NPU5_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu5_dev_priv = {
 	.fw_path        = "amdnpu/17f0_11/npu.sbin",
 	.protocol_major = 0x6,
 	.protocol_minor = 0x1,
-	.rt_config	= {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
+	.rt_config	= npu4_default_rt_cfg,
+	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
 	.mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
 	.mbox_size      = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
 	},
-	.smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
-	.smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
+	.hw_ops		= {
+		.set_dpm = npu4_set_dpm,
+	},
 };
 
 const struct amdxdna_dev_info dev_npu5_info = {
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index d1168fc55533..f46c760cefc7 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -61,23 +61,12 @@
 #define NPU6_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU6_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
-
-#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
-#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
-
-#define NPU6_MPNPUCLK_FREQ_MAX  1267
-#define NPU6_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu6_dev_priv = {
 	.fw_path        = "amdnpu/17f0_10/npu.sbin",
 	.protocol_major = 0x6,
 	.protocol_minor = 12,
-	.rt_config	= {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
+	.rt_config	= npu4_default_rt_cfg,
+	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
 	.mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
 	.mbox_size      = 0, /* Use BAR size */
@@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
 	},
+	.hw_ops         = {
+		.set_dpm = npu4_set_dpm,
+	},
+
 };
 
 const struct amdxdna_dev_info dev_npu6_info = {
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 4f15e53a548d..9af9302baf90 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
 	DRM_AMDXDNA_SYNC_BO,
 	DRM_AMDXDNA_EXEC_CMD,
 	DRM_AMDXDNA_GET_INFO,
+	DRM_AMDXDNA_SET_STATE,
 };
 
 /**
@@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
 	__u64 errors;
 };
 
+enum amdxdna_power_mode_type {
+	POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
+	POWER_MODE_LOW,     /* Set frequency to lowest DPM */
+	POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
+	POWER_MODE_HIGH,    /* Set frequency to highest DPM */
+	POWER_MODE_TURBO,   /* Maximum power */
+};
+
+/**
+ * struct amdxdna_drm_get_power_mode - Get the configured power mode
+ * @power_mode: The mode type from enum amdxdna_power_mode_type
+ * @pad: MBZ.
+ */
+struct amdxdna_drm_get_power_mode {
+	__u8 power_mode;
+	__u8 pad[7];
+};
+
 /**
  * struct amdxdna_drm_query_firmware_version - Query the firmware version
  * @major: The major version number
@@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
 	DRM_AMDXDNA_QUERY_SENSORS,
 	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
 	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
+	DRM_AMDXDNA_GET_POWER_MODE,
 };
 
 /**
@@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
 	__u64 buffer; /* in/out */
 };
 
+enum amdxdna_drm_set_param {
+	DRM_AMDXDNA_SET_POWER_MODE,
+	DRM_AMDXDNA_WRITE_AIE_MEM,
+	DRM_AMDXDNA_WRITE_AIE_REG,
+};
+
+/**
+ * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
+ * @param: Value in enum amdxdna_drm_set_param.
+ * @buffer_size: Size of the input param.
+ * @buffer: Input param.
+ */
+struct amdxdna_drm_set_state {
+	__u32 param; /* in */
+	__u32 buffer_size; /* in */
+	__u64 buffer; /* in */
+};
+
+/**
+ * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
+ * @power_mode: The sensor type from enum amdxdna_power_mode_type
+ * @pad: MBZ.
+ */
+struct amdxdna_drm_set_power_mode {
+	__u8 power_mode;
+	__u8 pad[7];
+};
+
 #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
 		 struct amdxdna_drm_create_hwctx)
@@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
 		 struct amdxdna_drm_get_info)
 
+#define DRM_IOCTL_AMDXDNA_SET_STATE \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
+		 struct amdxdna_drm_set_state)
+
 #if defined(__cplusplus)
 } /* extern c end */
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (5 preceding siblings ...)
  2024-12-06 21:59 ` [PATCH V2 6/8] accel/amdxdna: Enhance power management settings Lizhi Hou
@ 2024-12-06 22:00 ` Lizhi Hou
  2024-12-11  0:20   ` Mario Limonciello
  2024-12-13 16:58   ` Jeffrey Hugo
  2024-12-06 22:00 ` [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c Lizhi Hou
  2024-12-13 17:00 ` [PATCH V2 0/8] AMD NPU driver improvements Jeffrey Hugo
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 22:00 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

The latest released firmware supports reading firmware interface version
from registers directly. The driver's probe routine reads the major and
minor version numbers. If the firmware interface does not compatible with
the driver, the driver's probe routine returns failure.

Co-developed-by: Min Ma <min.ma@amd.com>
Signed-off-by: Min Ma <min.ma@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_message.c | 26 ----------
 drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
 drivers/accel/amdxdna/aie2_pci.h     |  6 +--
 drivers/accel/amdxdna/npu1_regs.c    |  2 +-
 drivers/accel/amdxdna/npu2_regs.c    |  2 +-
 drivers/accel/amdxdna/npu4_regs.c    |  2 +-
 drivers/accel/amdxdna/npu5_regs.c    |  2 +-
 7 files changed, 64 insertions(+), 50 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 13b5a96f8d25..f6d46e1e5086 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -100,32 +100,6 @@ int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
 	return 0;
 }
 
-int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev)
-{
-	DECLARE_AIE2_MSG(protocol_version, MSG_OP_GET_PROTOCOL_VERSION);
-	struct amdxdna_dev *xdna = ndev->xdna;
-	int ret;
-
-	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
-	if (ret) {
-		XDNA_ERR(xdna, "Failed to get protocol version, ret %d", ret);
-		return ret;
-	}
-
-	if (resp.major != ndev->priv->protocol_major) {
-		XDNA_ERR(xdna, "Incompatible firmware protocol version major %d minor %d",
-			 resp.major, resp.minor);
-		return -EINVAL;
-	}
-
-	if (resp.minor < ndev->priv->protocol_minor) {
-		XDNA_ERR(xdna, "Firmware minor version smaller than supported");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
 {
 	DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 489744a2e226..2d2b6b66617a 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -33,17 +33,51 @@ MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
  * The related register and ring buffer information is on SRAM BAR.
  * This struct is the register layout.
  */
+#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
 struct mgmt_mbox_chann_info {
-	u32	x2i_tail;
-	u32	x2i_head;
-	u32	x2i_buf;
-	u32	x2i_buf_sz;
-	u32	i2x_tail;
-	u32	i2x_head;
-	u32	i2x_buf;
-	u32	i2x_buf_sz;
+	__u32	x2i_tail;
+	__u32	x2i_head;
+	__u32	x2i_buf;
+	__u32	x2i_buf_sz;
+	__u32	i2x_tail;
+	__u32	i2x_head;
+	__u32	i2x_buf;
+	__u32	i2x_buf_sz;
+	__u32	magic;
+	__u32	msi_id;
+	__u32	prot_major;
+	__u32	prot_minor;
+	__u32	rsvd[4];
 };
 
+static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
+{
+	struct amdxdna_dev *xdna = ndev->xdna;
+
+	/*
+	 * The driver supported mailbox behavior is defined by
+	 * ndev->priv->protocol_major and protocol_minor.
+	 *
+	 * When protocol_major and fw_major are different, it means driver
+	 * and firmware are incompatible.
+	 */
+	if (ndev->priv->protocol_major != fw_major) {
+		XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
+			 fw_major, fw_minor);
+		return -EINVAL;
+	}
+
+	/*
+	 * When protocol_minor is greater then fw_minor, that means driver
+	 * relies on operation the installed firmware does not support.
+	 */
+	if (ndev->priv->protocol_minor > fw_minor) {
+		XDNA_ERR(xdna, "Firmware minor version smaller than supported");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
 {
 	struct amdxdna_dev *xdna = ndev->xdna;
@@ -57,6 +91,8 @@ static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
 	XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
 	XDNA_DBG(xdna, "x2i rsize   0x%x", ndev->mgmt_x2i.rb_size);
 	XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
+	XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
+	XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
 }
 
 static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
@@ -87,6 +123,12 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
 	for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
 		reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
 
+	if (info_regs.magic != MGMT_MBOX_MAGIC) {
+		XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
+		ret = -EINVAL;
+		goto done;
+	}
+
 	i2x = &ndev->mgmt_i2x;
 	x2i = &ndev->mgmt_x2i;
 
@@ -99,14 +141,20 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
 	x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
 	x2i->rb_start_addr   = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
 	x2i->rb_size         = info_regs.x2i_buf_sz;
-	ndev->mgmt_chan_idx  = CHANN_INDEX(ndev, x2i->rb_start_addr);
 
+	ndev->mgmt_chan_idx  = info_regs.msi_id;
+	ndev->mgmt_prot_major = info_regs.prot_major;
+	ndev->mgmt_prot_minor = info_regs.prot_minor;
+
+	ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
+
+done:
 	aie2_dump_chann_info_debug(ndev);
 
 	/* Must clear address at FW_ALIVE_OFF */
 	writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
 
-	return 0;
+	return ret;
 }
 
 int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
@@ -155,12 +203,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
 {
 	int ret;
 
-	ret = aie2_check_protocol_version(ndev);
-	if (ret) {
-		XDNA_ERR(ndev->xdna, "Check header hash failed");
-		return ret;
-	}
-
 	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
 	if (ret) {
 		XDNA_ERR(ndev->xdna, "Runtime config failed");
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 8c17b74654ce..cc159cadff9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -39,9 +39,6 @@
 })
 
 #define CHAN_SLOT_SZ SZ_8K
-#define CHANN_INDEX(ndev, rbuf_off) \
-	(((rbuf_off) - SRAM_REG_OFF((ndev), MBOX_CHANN_OFF)) / CHAN_SLOT_SZ)
-
 #define MBOX_SIZE(ndev) \
 ({ \
 	typeof(ndev) _ndev = (ndev); \
@@ -170,6 +167,8 @@ struct amdxdna_dev_hdl {
 	struct xdna_mailbox_chann_res	mgmt_x2i;
 	struct xdna_mailbox_chann_res	mgmt_i2x;
 	u32				mgmt_chan_idx;
+	u32				mgmt_prot_major;
+	u32				mgmt_prot_minor;
 
 	u32				total_col;
 	struct aie_version		version;
@@ -262,7 +261,6 @@ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
-int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev);
 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index c8f4d1cac65d..e408af57e378 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -65,7 +65,7 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
 const struct amdxdna_dev_priv npu1_dev_priv = {
 	.fw_path        = "amdnpu/1502_00/npu.sbin",
 	.protocol_major = 0x5,
-	.protocol_minor = 0x1,
+	.protocol_minor = 0x7,
 	.rt_config	= npu1_default_rt_cfg,
 	.dpm_clk_tbl	= npu1_dpm_clk_table,
 	.col_align	= COL_ALIGN_NONE,
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index ac63131f9c7c..286bd0d475e2 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -64,7 +64,7 @@
 const struct amdxdna_dev_priv npu2_dev_priv = {
 	.fw_path        = "amdnpu/17f0_00/npu.sbin",
 	.protocol_major = 0x6,
-	.protocol_minor = 0x1,
+	.protocol_minor = 0x6,
 	.rt_config	= npu4_default_rt_cfg,
 	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index a713ac18adfc..00c52833ce89 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -85,7 +85,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
 const struct amdxdna_dev_priv npu4_dev_priv = {
 	.fw_path        = "amdnpu/17f0_10/npu.sbin",
 	.protocol_major = 0x6,
-	.protocol_minor = 0x1,
+	.protocol_minor = 12,
 	.rt_config	= npu4_default_rt_cfg,
 	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index 67a5d5bc8a49..118849272f27 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -64,7 +64,7 @@
 const struct amdxdna_dev_priv npu5_dev_priv = {
 	.fw_path        = "amdnpu/17f0_11/npu.sbin",
 	.protocol_major = 0x6,
-	.protocol_minor = 0x1,
+	.protocol_minor = 12,
 	.rt_config	= npu4_default_rt_cfg,
 	.dpm_clk_tbl	= npu4_dpm_clk_table,
 	.col_align	= COL_ALIGN_NATURE,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (6 preceding siblings ...)
  2024-12-06 22:00 ` [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers Lizhi Hou
@ 2024-12-06 22:00 ` Lizhi Hou
  2024-12-10 20:54   ` Mario Limonciello
  2024-12-13 16:20   ` Jeffrey Hugo
  2024-12-13 17:00 ` [PATCH V2 0/8] AMD NPU driver improvements Jeffrey Hugo
  8 siblings, 2 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-06 22:00 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel
  Cc: Lizhi Hou, linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Mike Lothian

For amdxdna_mailbox.c, linux/interrupt.h is indirectly included by
trace/events/amdxdna.h. So if TRACING is disabled, driver compiling will
fail.

Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
Reported-by: Mike Lothian <mike@fireburn.co.uk>
Closes: https://lore.kernel.org/dri-devel/CAHbf0-E+Z2O7rW-x+-EKNQ-nLbf=_ohaNzXxE7WD2cj9kFJERQ@mail.gmail.com/
Signed-off-by: Mike Lothian <mike@fireburn.co.uk>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index fe684f463b94..79b9801935e7 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -6,6 +6,7 @@
 #include <drm/drm_device.h>
 #include <drm/drm_managed.h>
 #include <linux/bitfield.h>
+#include <linux/interrupt.h>
 #include <linux/iopoll.h>
 
 #define CREATE_TRACE_POINTS
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c
  2024-12-06 22:00 ` [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c Lizhi Hou
@ 2024-12-10 20:54   ` Mario Limonciello
  2024-12-13 16:20   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Mario Limonciello @ 2024-12-10 20:54 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	Mike Lothian

On 12/6/2024 16:00, Lizhi Hou wrote:
> For amdxdna_mailbox.c, linux/interrupt.h is indirectly included by
> trace/events/amdxdna.h. So if TRACING is disabled, driver compiling will
> fail.
> 
> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
> Reported-by: Mike Lothian <mike@fireburn.co.uk>
> Closes: https://lore.kernel.org/dri-devel/CAHbf0-E+Z2O7rW-x+-EKNQ-nLbf=_ohaNzXxE7WD2cj9kFJERQ@mail.gmail.com/
> Signed-off-by: Mike Lothian <mike@fireburn.co.uk>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>   drivers/accel/amdxdna/amdxdna_mailbox.c | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
> index fe684f463b94..79b9801935e7 100644
> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
> @@ -6,6 +6,7 @@
>   #include <drm/drm_device.h>
>   #include <drm/drm_managed.h>
>   #include <linux/bitfield.h>
> +#include <linux/interrupt.h>
>   #include <linux/iopoll.h>
>   
>   #define CREATE_TRACE_POINTS


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-06 22:00 ` [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers Lizhi Hou
@ 2024-12-11  0:20   ` Mario Limonciello
  2024-12-11  5:32     ` Lizhi Hou
  2024-12-13 16:58   ` Jeffrey Hugo
  1 sibling, 1 reply; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11  0:20 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam

On 12/6/2024 16:00, Lizhi Hou wrote:
> The latest released firmware supports reading firmware interface version
> from registers directly. The driver's probe routine reads the major and
> minor version numbers. If the firmware interface does not compatible with
s/does/is/
> the driver, the driver's probe routine returns failure.
> 
> Co-developed-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Just to confirm you're not backing yourself into a corner the plan is 
not to bump this major version any time soon for anything already 
supported by the driver; right?

Because once you do that this is going to get messy quickly.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_message.c | 26 ----------
>   drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
>   drivers/accel/amdxdna/aie2_pci.h     |  6 +--
>   drivers/accel/amdxdna/npu1_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu2_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu4_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu5_regs.c    |  2 +-
>   7 files changed, 64 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 13b5a96f8d25..f6d46e1e5086 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -100,32 +100,6 @@ int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
>   	return 0;
>   }
>   
> -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev)
> -{
> -	DECLARE_AIE2_MSG(protocol_version, MSG_OP_GET_PROTOCOL_VERSION);
> -	struct amdxdna_dev *xdna = ndev->xdna;
> -	int ret;
> -
> -	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> -	if (ret) {
> -		XDNA_ERR(xdna, "Failed to get protocol version, ret %d", ret);
> -		return ret;
> -	}
> -
> -	if (resp.major != ndev->priv->protocol_major) {
> -		XDNA_ERR(xdna, "Incompatible firmware protocol version major %d minor %d",
> -			 resp.major, resp.minor);
> -		return -EINVAL;
> -	}
> -
> -	if (resp.minor < ndev->priv->protocol_minor) {
> -		XDNA_ERR(xdna, "Firmware minor version smaller than supported");
> -		return -EINVAL;
> -	}
> -
> -	return 0;
> -}
> -
>   int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
>   {
>   	DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 489744a2e226..2d2b6b66617a 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -33,17 +33,51 @@ MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
>    * The related register and ring buffer information is on SRAM BAR.
>    * This struct is the register layout.
>    */
> +#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
>   struct mgmt_mbox_chann_info {
> -	u32	x2i_tail;
> -	u32	x2i_head;
> -	u32	x2i_buf;
> -	u32	x2i_buf_sz;
> -	u32	i2x_tail;
> -	u32	i2x_head;
> -	u32	i2x_buf;
> -	u32	i2x_buf_sz;
> +	__u32	x2i_tail;
> +	__u32	x2i_head;
> +	__u32	x2i_buf;
> +	__u32	x2i_buf_sz;
> +	__u32	i2x_tail;
> +	__u32	i2x_head;
> +	__u32	i2x_buf;
> +	__u32	i2x_buf_sz;
> +	__u32	magic;
> +	__u32	msi_id;
> +	__u32	prot_major;
> +	__u32	prot_minor;
> +	__u32	rsvd[4];
>   };
>   
> +static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
> +{
> +	struct amdxdna_dev *xdna = ndev->xdna;
> +
> +	/*
> +	 * The driver supported mailbox behavior is defined by
> +	 * ndev->priv->protocol_major and protocol_minor.
> +	 *
> +	 * When protocol_major and fw_major are different, it means driver
> +	 * and firmware are incompatible.
> +	 */
> +	if (ndev->priv->protocol_major != fw_major) {
> +		XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
> +			 fw_major, fw_minor);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * When protocol_minor is greater then fw_minor, that means driver
> +	 * relies on operation the installed firmware does not support.
> +	 */
> +	if (ndev->priv->protocol_minor > fw_minor) {
> +		XDNA_ERR(xdna, "Firmware minor version smaller than supported");
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
>   static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
>   {
>   	struct amdxdna_dev *xdna = ndev->xdna;
> @@ -57,6 +91,8 @@ static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
>   	XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
>   	XDNA_DBG(xdna, "x2i rsize   0x%x", ndev->mgmt_x2i.rb_size);
>   	XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
> +	XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
> +	XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
>   }
>   
>   static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
> @@ -87,6 +123,12 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
>   	for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
>   		reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
>   
> +	if (info_regs.magic != MGMT_MBOX_MAGIC) {
> +		XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
> +		ret = -EINVAL;
> +		goto done;
> +	}
> +
>   	i2x = &ndev->mgmt_i2x;
>   	x2i = &ndev->mgmt_x2i;
>   
> @@ -99,14 +141,20 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
>   	x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
>   	x2i->rb_start_addr   = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
>   	x2i->rb_size         = info_regs.x2i_buf_sz;
> -	ndev->mgmt_chan_idx  = CHANN_INDEX(ndev, x2i->rb_start_addr);
>   
> +	ndev->mgmt_chan_idx  = info_regs.msi_id;
> +	ndev->mgmt_prot_major = info_regs.prot_major;
> +	ndev->mgmt_prot_minor = info_regs.prot_minor;
> +
> +	ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
> +
> +done:
>   	aie2_dump_chann_info_debug(ndev);
>   
>   	/* Must clear address at FW_ALIVE_OFF */
>   	writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
>   
> -	return 0;
> +	return ret;
>   }
>   
>   int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
> @@ -155,12 +203,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
>   {
>   	int ret;
>   
> -	ret = aie2_check_protocol_version(ndev);
> -	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Check header hash failed");
> -		return ret;
> -	}
> -
>   	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>   	if (ret) {
>   		XDNA_ERR(ndev->xdna, "Runtime config failed");
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 8c17b74654ce..cc159cadff9f 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -39,9 +39,6 @@
>   })
>   
>   #define CHAN_SLOT_SZ SZ_8K
> -#define CHANN_INDEX(ndev, rbuf_off) \
> -	(((rbuf_off) - SRAM_REG_OFF((ndev), MBOX_CHANN_OFF)) / CHAN_SLOT_SZ)
> -
>   #define MBOX_SIZE(ndev) \
>   ({ \
>   	typeof(ndev) _ndev = (ndev); \
> @@ -170,6 +167,8 @@ struct amdxdna_dev_hdl {
>   	struct xdna_mailbox_chann_res	mgmt_x2i;
>   	struct xdna_mailbox_chann_res	mgmt_i2x;
>   	u32				mgmt_chan_idx;
> +	u32				mgmt_prot_major;
> +	u32				mgmt_prot_minor;
>   
>   	u32				total_col;
>   	struct aie_version		version;
> @@ -262,7 +261,6 @@ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>   int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
>   int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
> -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev);
>   int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
>   int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
>   int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index c8f4d1cac65d..e408af57e378 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -65,7 +65,7 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>   const struct amdxdna_dev_priv npu1_dev_priv = {
>   	.fw_path        = "amdnpu/1502_00/npu.sbin",
>   	.protocol_major = 0x5,
> -	.protocol_minor = 0x1,
> +	.protocol_minor = 0x7,
>   	.rt_config	= npu1_default_rt_cfg,
>   	.dpm_clk_tbl	= npu1_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NONE,
> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
> index ac63131f9c7c..286bd0d475e2 100644
> --- a/drivers/accel/amdxdna/npu2_regs.c
> +++ b/drivers/accel/amdxdna/npu2_regs.c
> @@ -64,7 +64,7 @@
>   const struct amdxdna_dev_priv npu2_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_00/npu.sbin",
>   	.protocol_major = 0x6,
> -	.protocol_minor = 0x1,
> +	.protocol_minor = 0x6,
>   	.rt_config	= npu4_default_rt_cfg,
>   	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index a713ac18adfc..00c52833ce89 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -85,7 +85,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>   const struct amdxdna_dev_priv npu4_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_10/npu.sbin",
>   	.protocol_major = 0x6,
> -	.protocol_minor = 0x1,
> +	.protocol_minor = 12,
>   	.rt_config	= npu4_default_rt_cfg,
>   	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index 67a5d5bc8a49..118849272f27 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -64,7 +64,7 @@
>   const struct amdxdna_dev_priv npu5_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_11/npu.sbin",
>   	.protocol_major = 0x6,
> -	.protocol_minor = 0x1,
> +	.protocol_minor = 12,
>   	.rt_config	= npu4_default_rt_cfg,
>   	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-06 21:59 ` [PATCH V2 6/8] accel/amdxdna: Enhance power management settings Lizhi Hou
@ 2024-12-11  0:28   ` Mario Limonciello
  2024-12-11  5:28     ` Lizhi Hou
  2024-12-13 16:55   ` Jeffrey Hugo
  1 sibling, 1 reply; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11  0:28 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	Narendra Gutta, George Yang

On 12/6/2024 15:59, Lizhi Hou wrote:
> Add SET_STATE ioctl to configure device power mode for aie2 device.
> Three modes are supported initially.
> 
> POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
> Management) level to value which has been set by resource solver or
> maximum DPM level the device supports.
> 
> POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
> level the device supports.
> 
> POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
> level the device supports.
> 
> Disabling clock gating means all clocks always run on full speed. And
> the different clock frequency are used based on DPM level been set.
> Initially, the driver set the power mode to default mode.
> 
> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
> Co-developed-by: George Yang <George.Yang@amd.com>
> Signed-off-by: George Yang <George.Yang@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/Makefile          |   1 +
>   drivers/accel/amdxdna/TODO              |   1 -
>   drivers/accel/amdxdna/aie2_ctx.c        |   6 ++
>   drivers/accel/amdxdna/aie2_message.c    |   9 +-
>   drivers/accel/amdxdna/aie2_pci.c        | 136 +++++++++++++++++++-----
>   drivers/accel/amdxdna/aie2_pci.h        |  55 ++++++++--
>   drivers/accel/amdxdna/aie2_pm.c         | 108 +++++++++++++++++++
>   drivers/accel/amdxdna/aie2_smu.c        |  85 +++++++++------
>   drivers/accel/amdxdna/aie2_solver.c     |  59 +++++++++-
>   drivers/accel/amdxdna/aie2_solver.h     |   1 +
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  19 ++++
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   2 +
>   drivers/accel/amdxdna/npu1_regs.c       |  29 +++--
>   drivers/accel/amdxdna/npu2_regs.c       |  15 +--
>   drivers/accel/amdxdna/npu4_regs.c       |  32 ++++--
>   drivers/accel/amdxdna/npu5_regs.c       |  15 +--
>   drivers/accel/amdxdna/npu6_regs.c       |  19 ++--
>   include/uapi/drm/amdxdna_accel.h        |  52 +++++++++
>   18 files changed, 516 insertions(+), 128 deletions(-)
>   create mode 100644 drivers/accel/amdxdna/aie2_pm.c
> 
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index 6baf181298de..0e9adf6890a0 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -5,6 +5,7 @@ amdxdna-y := \
>   	aie2_error.o \
>   	aie2_message.o \
>   	aie2_pci.o \
> +	aie2_pm.o \
>   	aie2_psp.o \
>   	aie2_smu.o \
>   	aie2_solver.o \
> diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
> index de4e1dbc8868..5119bccd1917 100644
> --- a/drivers/accel/amdxdna/TODO
> +++ b/drivers/accel/amdxdna/TODO
> @@ -1,4 +1,3 @@
>   - Add import and export BO support
>   - Add debugfs support
>   - Add debug BO support
> -- Improve power management
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 07eecb40767f..6b4e6fcb7794 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -518,6 +518,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>   	struct drm_gpu_scheduler *sched;
>   	struct amdxdna_hwctx_priv *priv;
>   	struct amdxdna_gem_obj *heap;
> +	struct amdxdna_dev_hdl *ndev;
>   	int i, ret;
>   
>   	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
> @@ -612,6 +613,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>   	}
>   
>   	hwctx->status = HWCTX_STAT_INIT;
> +	ndev = xdna->dev_handle;
> +	ndev->hwctx_num++;
>   
>   	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>   
> @@ -641,10 +644,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>   
>   void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>   {
> +	struct amdxdna_dev_hdl *ndev;
>   	struct amdxdna_dev *xdna;
>   	int idx;
>   
>   	xdna = hwctx->client->xdna;
> +	ndev = xdna->dev_handle;
> +	ndev->hwctx_num--;
>   	drm_sched_wqueue_stop(&hwctx->priv->sched);
>   
>   	/* Now, scheduler will not send command to device. */
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index fc33a158d223..13b5a96f8d25 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -70,11 +70,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
>   {
>   	DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
> +	int ret;
>   
>   	req.type = type;
>   	req.value = value;
>   
> -	return aie2_send_mgmt_msg_wait(ndev, &msg);
> +	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> +	if (ret) {
> +		XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
> +		return ret;
> +	}
> +
> +	return 0;
>   }
>   
>   int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 83abd16ade11..489744a2e226 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -109,28 +109,26 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
>   	return 0;
>   }
>   
> -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
> +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
> +		     enum rt_config_category category, u32 *val)
>   {
> -	const struct rt_config *cfg = &ndev->priv->rt_config;
> -	u64 value;
> +	const struct rt_config *cfg;
> +	u32 value;
>   	int ret;
>   
> -	ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
> -	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
> -			 cfg->type, cfg->value);
> -		return ret;
> -	}
> +	for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
> +		if (cfg->category != category)
> +			continue;
>   
> -	ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
> -	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
> -		return ret;
> +		value = val ? *val : cfg->value;
> +		ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
> +		if (ret) {
> +			XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
> +				 cfg->type, value);
> +			return ret;
> +		}
>   	}
>   
> -	if (value != cfg->value)
> -		return -EINVAL;
> -
>   	return 0;
>   }
>   
> @@ -163,7 +161,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
>   		return ret;
>   	}
>   
> -	ret = aie2_runtime_cfg(ndev);
> +	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>   	if (ret) {
>   		XDNA_ERR(ndev->xdna, "Runtime config failed");
>   		return ret;
> @@ -257,9 +255,25 @@ static int aie2_xrs_unload(void *cb_arg)
>   	return ret;
>   }
>   
> +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
> +{
> +	struct amdxdna_dev *xdna = to_xdna_dev(ddev);
> +	struct amdxdna_dev_hdl *ndev;
> +
> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));

This is a reinvented lockdep_assert_held() no?
Or is there some nuance I'm missing?

I would suggest switching to lockdep_assert_held().

> +
> +	ndev = xdna->dev_handle;
> +	ndev->dft_dpm_level = dpm_level;
> +	if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
> +		return 0;
> +
> +	return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
> +}
> +
>   static struct xrs_action_ops aie2_xrs_actions = {
>   	.load = aie2_xrs_load,
>   	.unload = aie2_xrs_unload,
> +	.set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
>   };
>   
>   static void aie2_hw_stop(struct amdxdna_dev *xdna)
> @@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>   		goto stop_psp;
>   	}
>   
> +	ret = aie2_pm_init(ndev);
> +	if (ret) {
> +		XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
> +		goto destroy_mgmt_chann;
> +	}
> +
>   	ret = aie2_mgmt_fw_init(ndev);
>   	if (ret) {
>   		XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
> @@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
>   	}
>   	ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>   
> -	xrs_cfg.clk_list.num_levels = 3;
> -	xrs_cfg.clk_list.cu_clk_list[0] = 0;
> -	xrs_cfg.clk_list.cu_clk_list[1] = 800;
> -	xrs_cfg.clk_list.cu_clk_list[2] = 1000;
> +	xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
> +	for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
> +		xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
>   	xrs_cfg.sys_eff_factor = 1;
>   	xrs_cfg.ddev = &xdna->ddev;
>   	xrs_cfg.actions = &aie2_xrs_actions;
> @@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct amdxdna_client *client,
>   	return 0;
>   }
>   
> +static int aie2_get_power_mode(struct amdxdna_client *client,
> +			       struct amdxdna_drm_get_info *args)
> +{
> +	struct amdxdna_drm_get_power_mode mode = {};
> +	struct amdxdna_dev *xdna = client->xdna;
> +	struct amdxdna_dev_hdl *ndev;
> +
> +	ndev = xdna->dev_handle;
> +	mode.power_mode = ndev->pw_mode;
> +
> +	if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   				   struct amdxdna_drm_get_info *args)
>   {
> @@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   	if (!clock)
>   		return -ENOMEM;
>   
> -	memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
> -	       sizeof(clock->mp_npu_clock.name));
> -	clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
> -	memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name));
> -	clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
> +	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
> +		 "MP-NPU Clock");
> +	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
> +	snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
> +	clock->h_clock.freq_mhz = ndev->hclk_freq;
>   
>   	if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
>   		ret = -EFAULT;
> @@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>   	case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
>   		ret = aie2_get_firmware_version(client, args);
>   		break;
> +	case DRM_AMDXDNA_GET_POWER_MODE:
> +		ret = aie2_get_power_mode(client, args);
> +		break;
>   	default:
>   		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
>   		ret = -EOPNOTSUPP;
> @@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>   	return ret;
>   }
>   
> +static int aie2_set_power_mode(struct amdxdna_client *client,
> +			       struct amdxdna_drm_set_state *args)
> +{
> +	struct amdxdna_drm_set_power_mode power_state;
> +	enum amdxdna_power_mode_type power_mode;
> +	struct amdxdna_dev *xdna = client->xdna;
> +
> +	if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
> +			   sizeof(power_state))) {
> +		XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
> +		return -EFAULT;
> +	}
> +
> +	power_mode = power_state.power_mode;
> +	if (power_mode > POWER_MODE_TURBO) {
> +		XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
> +		return -EINVAL;
> +	}
> +
> +	return aie2_pm_set_mode(xdna->dev_handle, power_mode);
> +}
> +
> +static int aie2_set_state(struct amdxdna_client *client,
> +			  struct amdxdna_drm_set_state *args)
> +{
> +	struct amdxdna_dev *xdna = client->xdna;
> +	int ret, idx;
> +
> +	if (!drm_dev_enter(&xdna->ddev, &idx))
> +		return -ENODEV;
> +
> +	switch (args->param) {
> +	case DRM_AMDXDNA_SET_POWER_MODE:
> +		ret = aie2_set_power_mode(client, args);
> +		break;
> +	default:
> +		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> +		ret = -EOPNOTSUPP;
> +		break;
> +	}
> +
> +	drm_dev_exit(idx);
> +	return ret;
> +}
> +
>   const struct amdxdna_dev_ops aie2_ops = {
>   	.init           = aie2_init,
>   	.fini           = aie2_fini,
>   	.resume         = aie2_hw_start,
>   	.suspend        = aie2_hw_stop,
>   	.get_aie_info   = aie2_get_info,
> +	.set_aie_state	= aie2_set_state,
>   	.hwctx_init     = aie2_hwctx_init,
>   	.hwctx_fini     = aie2_hwctx_fini,
>   	.hwctx_config   = aie2_hwctx_config,
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 1c6f07d9b805..8c17b74654ce 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -6,6 +6,7 @@
>   #ifndef _AIE2_PCI_H_
>   #define _AIE2_PCI_H_
>   
> +#include <drm/amdxdna_accel.h>
>   #include <linux/semaphore.h>
>   
>   #include "amdxdna_mailbox.h"
> @@ -48,9 +49,6 @@
>   	pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
>   })
>   
> -#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max)
> -#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
> -
>   enum aie2_smu_reg_idx {
>   	SMU_CMD_REG = 0,
>   	SMU_ARG_REG,
> @@ -112,14 +110,20 @@ struct aie_metadata {
>   	struct aie_tile_metadata shim;
>   };
>   
> -struct clock_entry {
> -	char name[16];
> -	u32 freq_mhz;
> +enum rt_config_category {
> +	AIE2_RT_CFG_INIT,
> +	AIE2_RT_CFG_CLK_GATING,
>   };
>   
>   struct rt_config {
>   	u32	type;
>   	u32	value;
> +	u32	category;
> +};
> +
> +struct dpm_clk_freq {
> +	u32	npuclk;
> +	u32	hclk;
>   };
>   
>   /*
> @@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
>   };
>   
>   enum aie2_dev_status {
> +	AIE2_DEV_UNINIT,
>   	AIE2_DEV_INIT,
>   	AIE2_DEV_START,
>   };
> @@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
>   	u32				total_col;
>   	struct aie_version		version;
>   	struct aie_metadata		metadata;
> -	struct clock_entry		mp_npu_clock;
> -	struct clock_entry		h_clock;
> +
> +	/* power management and clock*/
> +	enum amdxdna_power_mode_type	pw_mode;
> +	u32				dpm_level;
> +	u32				dft_dpm_level;
> +	u32				max_dpm_level;
> +	u32				clk_gating;
> +	u32				npuclk_freq;
> +	u32				hclk_freq;
>   
>   	/* Mailbox and the management channel */
>   	struct mailbox			*mbox;
> @@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
>   	struct async_events		*async_events;
>   
>   	enum aie2_dev_status		dev_status;
> +	u32				hwctx_num;
>   };
>   
>   #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
> @@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
>   	u32	offset;
>   };
>   
> +struct aie2_hw_ops {
> +	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +};
> +
>   struct amdxdna_dev_priv {
>   	const char			*fw_path;
>   	u64				protocol_major;
>   	u64				protocol_minor;
> -	struct rt_config		rt_config;
> +	const struct rt_config		*rt_config;
> +	const struct dpm_clk_freq	*dpm_clk_tbl;
> +
>   #define COL_ALIGN_NONE   0
>   #define COL_ALIGN_NATURE 1
>   	u32				col_align;
> @@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
>   	struct aie2_bar_off_pair	sram_offs[SRAM_MAX_INDEX];
>   	struct aie2_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
>   	struct aie2_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
> -	u32				smu_mpnpuclk_freq_max;
> -	u32				smu_hclk_freq_max;
> +	struct aie2_hw_ops		hw_ops;
>   };
>   
>   extern const struct amdxdna_dev_ops aie2_ops;
>   
> +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
> +		     enum rt_config_category category, u32 *val);
> +
> +/* aie2 npu hw config */
> +extern const struct dpm_clk_freq npu1_dpm_clk_table[];
> +extern const struct dpm_clk_freq npu4_dpm_clk_table[];
> +extern const struct rt_config npu1_default_rt_cfg[];
> +extern const struct rt_config npu4_default_rt_cfg[];
> +
>   /* aie2_smu.c */
>   int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>   void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +
> +/* aie2_pm.c */
> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
>   
>   /* aie2_psp.c */
>   struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
> diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
> new file mode 100644
> index 000000000000..426c38fce848
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie2_pm.c
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_device.h>
> +#include <drm/drm_print.h>
> +#include <drm/gpu_scheduler.h>
> +
> +#include "aie2_pci.h"
> +#include "amdxdna_pci_drv.h"
> +
> +#define AIE2_CLK_GATING_ENABLE	1
> +#define AIE2_CLK_GATING_DISABLE	0
> +
> +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
> +{
> +	int ret;
> +
> +	ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
> +	if (ret)
> +		return ret;
> +
> +	ndev->clk_gating = val;
> +	return 0;
> +}
> +
> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
> +{
> +	int ret;
> +
> +	if (ndev->dev_status != AIE2_DEV_UNINIT) {
> +		/* Resume device */
> +		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
> +		if (ret)
> +			return ret;
> +
> +		ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
> +		if (ret)
> +			return ret;
> +
> +		return 0;
> +	}
> +
> +	while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
> +		ndev->max_dpm_level++;
> +	ndev->max_dpm_level--;
> +
> +	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
> +	if (ret)
> +		return ret;
> +
> +	ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
> +	if (ret)
> +		return ret;

In the event of a failure do you want to try to restore dpm where it was?

> +
> +	ndev->pw_mode = POWER_MODE_DEFAULT;
> +	ndev->dft_dpm_level = ndev->max_dpm_level;
> +
> +	return 0;
> +}
> +
> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
> +{
> +	struct amdxdna_dev *xdna = ndev->xdna;
> +	u32 clk_gating, dpm_level;
> +	int ret;
> +
> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));

lockdep_assert_held()

> +
> +	if (ndev->pw_mode == target)
> +		return 0;
> +
> +	switch (target) {
> +	case POWER_MODE_TURBO:
> +		if (ndev->hwctx_num) {
> +			XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
> +			return -EINVAL;
> +		}
> +
> +		clk_gating = AIE2_CLK_GATING_DISABLE;
> +		dpm_level = ndev->max_dpm_level;
> +		break;
> +	case POWER_MODE_HIGH:
> +		clk_gating = AIE2_CLK_GATING_ENABLE;
> +		dpm_level = ndev->max_dpm_level;
> +		break;
> +	case POWER_MODE_DEFAULT:
> +		clk_gating = AIE2_CLK_GATING_ENABLE;
> +		dpm_level = ndev->dft_dpm_level;
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +
> +	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
> +	if (ret)
> +		return ret;
> +
> +	ret = aie2_pm_set_clk_gating(ndev, clk_gating);
> +	if (ret)
> +		return ret;
> +
> +	ndev->pw_mode = target;
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
> index 91893d438da7..73388443c676 100644
> --- a/drivers/accel/amdxdna/aie2_smu.c
> +++ b/drivers/accel/amdxdna/aie2_smu.c
> @@ -19,8 +19,11 @@
>   #define AIE2_SMU_POWER_OFF		0x4
>   #define AIE2_SMU_SET_MPNPUCLK_FREQ	0x5
>   #define AIE2_SMU_SET_HCLK_FREQ		0x6
> +#define AIE2_SMU_SET_SOFT_DPMLEVEL	0x7
> +#define AIE2_SMU_SET_HARD_DPMLEVEL	0x8
>   
> -static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
> +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
> +			 u32 reg_arg, u32 *out)
>   {
>   	u32 resp;
>   	int ret;
> @@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
>   		return ret;
>   	}
>   
> +	if (out)
> +		*out = readl(SMU_REG(ndev, SMU_OUT_REG));
> +
>   	if (resp != SMU_RESULT_OK) {
>   		XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
>   		return -EINVAL;
> @@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
>   	return 0;
>   }
>   
> -static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   {
> +	u32 freq;
>   	int ret;
>   
> -	if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
> -		XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
> -		return -EINVAL;
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
> +			    ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
> +	if (ret) {
> +		XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
> +			 ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>   	}
> +	ndev->npuclk_freq = freq;
>   
> -	ndev->mp_npu_clock.freq_mhz = freq_mhz;
> -	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
> -	if (!ret)
> -		XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
> -
> -	return ret;
> -}
> -
> -static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
> -{
> -	int ret;
> -
> -	if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
> -		XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
> -		return -EINVAL;
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
> +			    ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
> +	if (ret) {
> +		XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
> +			 ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>   	}
> +	ndev->hclk_freq = freq;
> +	ndev->dpm_level = dpm_level;
>   
> -	ndev->h_clock.freq_mhz = freq_mhz;
> -	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
> -	if (!ret)
> -		XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
> +	XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
> +		 ndev->npuclk_freq, ndev->hclk_freq);
>   
> -	return ret;
> +	return 0;
>   }
>   
> -int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   {
>   	int ret;
>   
> -	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
>   	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
> +		XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
> +			 dpm_level, ret);
>   		return ret;
>   	}
>   
> -	ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
>   	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
> +		XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
> +			 dpm_level, ret);
>   		return ret;
>   	}
> -	snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
>   
> -	ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
> +	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
> +	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
> +	ndev->dpm_level = dpm_level;
> +
> +	XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
> +		 ndev->npuclk_freq, ndev->hclk_freq);
> +
> +	return 0;
> +}
> +
> +int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
> +{
> +	int ret;
> +
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
>   	if (ret) {
> -		XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
> +		XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>   		return ret;
>   	}
> -	snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
>   
>   	return 0;
>   }
> @@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>   {
>   	int ret;
>   
> -	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
> +	ndev->priv->hw_ops.set_dpm(ndev, 0);
> +	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
>   	if (ret)
>   		XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
>   }
> diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
> index a537c66589a4..1939625d6027 100644
> --- a/drivers/accel/amdxdna/aie2_solver.c
> +++ b/drivers/accel/amdxdna/aie2_solver.c
> @@ -25,6 +25,7 @@ struct solver_node {
>   
>   	struct partition_node	*pt_node;
>   	void			*cb_arg;
> +	u32			dpm_level;
>   	u32			cols_len;
>   	u32			start_cols[] __counted_by(cols_len);
>   };
> @@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
>   	return 0;
>   }
>   
> +static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
> +{
> +	/*
> +	 * gops is retrieved from the xmodel, so it's always set
> +	 * fps and latency are the configurable params from the application
> +	 */
> +	if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
> +{
> +	struct solver_rgroup *rgp = &xrs->rgp;
> +	struct cdo_parts *cdop = &req->cdo;
> +	struct aie_qos *rqos = &req->rqos;
> +	u32 freq, max_dpm_level, level;
> +	struct solver_node *node;
> +
> +	max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
> +	/* If no QoS parameters are passed, set it to the max DPM level */
> +	if (!is_valid_qos_dpm_params(rqos)) {
> +		level = max_dpm_level;
> +		goto set_dpm;
> +	}
> +
> +	/* Find one CDO group that meet the GOPs requirement. */
> +	for (level = 0; level < max_dpm_level; level++) {
> +		freq = xrs->cfg.clk_list.cu_clk_list[level];
> +		if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
> +			break;
> +	}
> +
> +	/* set the dpm level which fits all the sessions */
> +	list_for_each_entry(node, &rgp->node_list, list) {
> +		if (node->dpm_level > level)
> +			level = node->dpm_level;
> +	}
> +
> +set_dpm:
> +	*dpm_level = level;
> +	return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
> +}
> +
>   static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
>   {
>   	struct solver_node *node;
> @@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs,
>   	pt_node->ncols = ncols;
>   
>   	/*
> -	 * Before fully support latency in QoS, if a request
> -	 * specifies a non-zero latency value, it will not share
> -	 * the partition with other requests.
> +	 * Always set exclusive to false for now.
>   	 */
> -	if (req->rqos.latency)
> -		pt_node->exclusive = true;
> +	pt_node->exclusive = false;
>   
>   	list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
>   	xrs->rgp.npartition_node++;
> @@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
>   	struct xrs_action_load load_act;
>   	struct solver_node *snode;
>   	struct solver_state *xrs;
> +	u32 dpm_level;
>   	int ret;
>   
>   	xrs = (struct solver_state *)hdl;
> @@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
>   	if (ret)
>   		goto free_node;
>   
> +	ret = set_dpm_level(xrs, req, &dpm_level);
> +	if (ret)
> +		goto free_node;
> +
> +	snode->dpm_level = dpm_level;
>   	snode->cb_arg = cb_arg;
>   
>   	drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
> diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
> index 9b1847bb46a6..a2e3c52229e9 100644
> --- a/drivers/accel/amdxdna/aie2_solver.h
> +++ b/drivers/accel/amdxdna/aie2_solver.h
> @@ -99,6 +99,7 @@ struct clk_list_info {
>   struct xrs_action_ops {
>   	int (*load)(void *cb_arg, struct xrs_action_load *action);
>   	int (*unload)(void *cb_arg);
> +	int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
>   };
>   
>   /*
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index c3541796d189..6bbd437d48d8 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
>   	return ret;
>   }
>   
> +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> +	struct amdxdna_client *client = filp->driver_priv;
> +	struct amdxdna_dev *xdna = to_xdna_dev(dev);
> +	struct amdxdna_drm_set_state *args = data;
> +	int ret;
> +
> +	if (!xdna->dev_info->ops->set_aie_state)
> +		return -EOPNOTSUPP;
> +
> +	XDNA_DBG(xdna, "Request parameter %u", args->param);
> +	mutex_lock(&xdna->dev_lock);
> +	ret = xdna->dev_info->ops->set_aie_state(client, args);
> +	mutex_unlock(&xdna->dev_lock);
> +
> +	return ret;
> +}
> +
>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>   	/* Context */
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
> @@ -173,6 +191,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
>   	/* AIE hardware */
>   	DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
> +	DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
>   };
>   
>   static const struct file_operations amdxdna_fops = {
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index f5b830fb14bb..e2071e31d949 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
>   struct amdxdna_client;
>   struct amdxdna_dev;
>   struct amdxdna_drm_get_info;
> +struct amdxdna_drm_set_state;
>   struct amdxdna_gem_obj;
>   struct amdxdna_hwctx;
>   struct amdxdna_sched_job;
> @@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
>   	void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
>   	int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
>   	int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
> +	int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
>   };
>   
>   /*
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index f00c50461b09..c8f4d1cac65d 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -44,18 +44,30 @@
>   #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
>   #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
>   
> -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
> -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
> -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
> +const struct rt_config npu1_default_rt_cfg[] = {
> +	{ 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
> +	{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
> +	{ 0 },
> +};
>   
> -#define NPU1_MPNPUCLK_FREQ_MAX  600
> -#define NPU1_HCLK_FREQ_MAX      1024
> +const struct dpm_clk_freq npu1_dpm_clk_table[] = {
> +	{400, 800},
> +	{600, 1024},
> +	{600, 1024},
> +	{600, 1024},
> +	{600, 1024},
> +	{720, 1309},
> +	{720, 1309},
> +	{847, 1600},
> +	{ 0 }
> +};
>   
>   const struct amdxdna_dev_priv npu1_dev_priv = {
>   	.fw_path        = "amdnpu/1502_00/npu.sbin",
>   	.protocol_major = 0x5,
>   	.protocol_minor = 0x1,
> -	.rt_config	= {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
> +	.rt_config	= npu1_default_rt_cfg,
> +	.dpm_clk_tbl	= npu1_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NONE,
>   	.mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
>   	.mbox_size      = 0, /* Use BAR size */
> @@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>   	},
> -	.smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
> -	.smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
> +	.hw_ops		= {
> +		.set_dpm = npu1_set_dpm,
> +	},
>   };
>   
>   const struct amdxdna_dev_info dev_npu1_info = {
> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
> index 00cb381031d2..ac63131f9c7c 100644
> --- a/drivers/accel/amdxdna/npu2_regs.c
> +++ b/drivers/accel/amdxdna/npu2_regs.c
> @@ -61,18 +61,12 @@
>   #define NPU2_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU2_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
> -#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
> -#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
> -
> -#define NPU2_MPNPUCLK_FREQ_MAX  1267
> -#define NPU2_HCLK_FREQ_MAX      1800
> -
>   const struct amdxdna_dev_priv npu2_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_00/npu.sbin",
>   	.protocol_major = 0x6,
>   	.protocol_minor = 0x1,
> -	.rt_config	= {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
> +	.rt_config	= npu4_default_rt_cfg,
> +	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
>   	.mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
>   	.mbox_size      = 0, /* Use BAR size */
> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
>   	},
> -	.smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
> -	.smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
> +	.hw_ops	=	 {
> +		.set_dpm = npu4_set_dpm,
> +	},
>   };
>   
>   const struct amdxdna_dev_info dev_npu2_info = {
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index b6dae9667cca..a713ac18adfc 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -61,18 +61,33 @@
>   #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
> -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
> -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
> +const struct rt_config npu4_default_rt_cfg[] = {
> +	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
> +	{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
> +	{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
> +	{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
> +	{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
> +	{ 0 },
> +};
>   
> -#define NPU4_MPNPUCLK_FREQ_MAX  1267
> -#define NPU4_HCLK_FREQ_MAX      1800
> +const struct dpm_clk_freq npu4_dpm_clk_table[] = {
> +	{396, 792},
> +	{600, 1056},
> +	{792, 1152},
> +	{975, 1267},
> +	{975, 1267},
> +	{1056, 1408},
> +	{1152, 1584},
> +	{1267, 1800},
> +	{ 0 }
> +};
>   
>   const struct amdxdna_dev_priv npu4_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_10/npu.sbin",
>   	.protocol_major = 0x6,
>   	.protocol_minor = 0x1,
> -	.rt_config	= {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
> +	.rt_config	= npu4_default_rt_cfg,
> +	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
>   	.mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
>   	.mbox_size      = 0, /* Use BAR size */
> @@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>   	},
> -	.smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
> -	.smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
> +	.hw_ops		= {
> +		.set_dpm = npu4_set_dpm,
> +	},
>   };
>   
>   const struct amdxdna_dev_info dev_npu4_info = {
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index bed1baf8e160..67a5d5bc8a49 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -61,18 +61,12 @@
>   #define NPU5_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU5_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
> -#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
> -#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
> -
> -#define NPU5_MPNPUCLK_FREQ_MAX  1267
> -#define NPU5_HCLK_FREQ_MAX      1800
> -
>   const struct amdxdna_dev_priv npu5_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_11/npu.sbin",
>   	.protocol_major = 0x6,
>   	.protocol_minor = 0x1,
> -	.rt_config	= {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
> +	.rt_config	= npu4_default_rt_cfg,
> +	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
>   	.mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
>   	.mbox_size      = 0, /* Use BAR size */
> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>   	},
> -	.smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
> -	.smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
> +	.hw_ops		= {
> +		.set_dpm = npu4_set_dpm,
> +	},
>   };
>   
>   const struct amdxdna_dev_info dev_npu5_info = {
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> index d1168fc55533..f46c760cefc7 100644
> --- a/drivers/accel/amdxdna/npu6_regs.c
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -61,23 +61,12 @@
>   #define NPU6_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU6_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
> -#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
> -
> -#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
> -#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
> -
> -#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
> -#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
> -
> -#define NPU6_MPNPUCLK_FREQ_MAX  1267
> -#define NPU6_HCLK_FREQ_MAX      1800
> -
>   const struct amdxdna_dev_priv npu6_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_10/npu.sbin",
>   	.protocol_major = 0x6,
>   	.protocol_minor = 12,
> -	.rt_config	= {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
> +	.rt_config	= npu4_default_rt_cfg,
> +	.dpm_clk_tbl	= npu4_dpm_clk_table,
>   	.col_align	= COL_ALIGN_NATURE,
>   	.mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
>   	.mbox_size      = 0, /* Use BAR size */
> @@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>   	},
> +	.hw_ops         = {
> +		.set_dpm = npu4_set_dpm,
> +	},
> +
>   };
>   
>   const struct amdxdna_dev_info dev_npu6_info = {
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 4f15e53a548d..9af9302baf90 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
>   	DRM_AMDXDNA_SYNC_BO,
>   	DRM_AMDXDNA_EXEC_CMD,
>   	DRM_AMDXDNA_GET_INFO,
> +	DRM_AMDXDNA_SET_STATE,
>   };
>   
>   /**
> @@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
>   	__u64 errors;
>   };
>   
> +enum amdxdna_power_mode_type {
> +	POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
> +	POWER_MODE_LOW,     /* Set frequency to lowest DPM */
> +	POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
> +	POWER_MODE_HIGH,    /* Set frequency to highest DPM */
> +	POWER_MODE_TURBO,   /* Maximum power */
> +};
> +
> +/**
> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
> + * @power_mode: The mode type from enum amdxdna_power_mode_type
> + * @pad: MBZ.
> + */
> +struct amdxdna_drm_get_power_mode {
> +	__u8 power_mode;
> +	__u8 pad[7];
> +};
> +
>   /**
>    * struct amdxdna_drm_query_firmware_version - Query the firmware version
>    * @major: The major version number
> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_SENSORS,
>   	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>   	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
> +	DRM_AMDXDNA_GET_POWER_MODE,
>   };
>   
>   /**
> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>   	__u64 buffer; /* in/out */
>   };
>   
> +enum amdxdna_drm_set_param {
> +	DRM_AMDXDNA_SET_POWER_MODE,
> +	DRM_AMDXDNA_WRITE_AIE_MEM,
> +	DRM_AMDXDNA_WRITE_AIE_REG,
> +};
> +
> +/**
> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
> + * @param: Value in enum amdxdna_drm_set_param.
> + * @buffer_size: Size of the input param.
> + * @buffer: Input param.
> + */
> +struct amdxdna_drm_set_state {
> +	__u32 param; /* in */
> +	__u32 buffer_size; /* in */
> +	__u64 buffer; /* in */
> +};
> +
> +/**
> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
> + * @pad: MBZ.
> + */
> +struct amdxdna_drm_set_power_mode {
> +	__u8 power_mode;
> +	__u8 pad[7];
> +};
> +
>   #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
>   	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
>   		 struct amdxdna_drm_create_hwctx)
> @@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
>   	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
>   		 struct amdxdna_drm_get_info)
>   
> +#define DRM_IOCTL_AMDXDNA_SET_STATE \
> +	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
> +		 struct amdxdna_drm_set_state)
> +
>   #if defined(__cplusplus)
>   } /* extern c end */
>   #endif


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 5/8] accel/amdxdna: Add query firmware version
  2024-12-06 21:59 ` [PATCH V2 5/8] accel/amdxdna: Add query firmware version Lizhi Hou
@ 2024-12-11  0:28   ` Mario Limonciello
  2024-12-13 16:48   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11  0:28 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam

On 12/6/2024 15:59, Lizhi Hou wrote:
> Enhance GET_INFO ioctl to support retrieving firmware version.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_pci.c | 20 ++++++++++++++++++++
>   include/uapi/drm/amdxdna_accel.h | 16 +++++++++++++++-
>   2 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 1c8170325837..83abd16ade11 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -640,6 +640,23 @@ static int aie2_get_aie_version(struct amdxdna_client *client,
>   	return 0;
>   }
>   
> +static int aie2_get_firmware_version(struct amdxdna_client *client,
> +				     struct amdxdna_drm_get_info *args)
> +{
> +	struct amdxdna_drm_query_firmware_version version;
> +	struct amdxdna_dev *xdna = client->xdna;
> +
> +	version.major = xdna->fw_ver.major;
> +	version.minor = xdna->fw_ver.minor;
> +	version.patch = xdna->fw_ver.sub;
> +	version.build = xdna->fw_ver.build;
> +
> +	if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   				   struct amdxdna_drm_get_info *args)
>   {
> @@ -752,6 +769,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>   	case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
>   		ret = aie2_get_hwctx_status(client, args);
>   		break;
> +	case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
> +		ret = aie2_get_firmware_version(client, args);
> +		break;
>   	default:
>   		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
>   		ret = -EOPNOTSUPP;
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index af12af8bd699..4f15e53a548d 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -375,6 +375,20 @@ struct amdxdna_drm_query_hwctx {
>   	__u64 errors;
>   };
>   
> +/**
> + * struct amdxdna_drm_query_firmware_version - Query the firmware version
> + * @major: The major version number
> + * @minor: The minor version number
> + * @patch: The patch level version number
> + * @build: The build ID
> + */
> +struct amdxdna_drm_query_firmware_version {
> +	__u32 major; /* out */
> +	__u32 minor; /* out */
> +	__u32 patch; /* out */
> +	__u32 build; /* out */
> +};
> +
>   enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_AIE_STATUS,
>   	DRM_AMDXDNA_QUERY_AIE_METADATA,
> @@ -382,7 +396,7 @@ enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_CLOCK_METADATA,
>   	DRM_AMDXDNA_QUERY_SENSORS,
>   	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
> -	DRM_AMDXDNA_NUM_GET_PARAM,
> +	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
>   };
>   
>   /**


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support
  2024-12-06 21:59 ` [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support Lizhi Hou
@ 2024-12-11  0:30   ` Mario Limonciello
  2024-12-13 16:37   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11  0:30 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	Xiaoming Ren

On 12/6/2024 15:59, Lizhi Hou wrote:
> Add NPU6 registers and other private configurations.
> 
> Co-developed-by: Xiaoming Ren <xiaoming.ren@amd.com>
> Signed-off-by: Xiaoming Ren <xiaoming.ren@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>   drivers/accel/amdxdna/Makefile    |   3 +-
>   drivers/accel/amdxdna/npu6_regs.c | 121 ++++++++++++++++++++++++++++++
>   2 files changed, 123 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/accel/amdxdna/npu6_regs.c
> 
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index ed6f87910880..6baf181298de 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -17,5 +17,6 @@ amdxdna-y := \
>   	npu1_regs.o \
>   	npu2_regs.o \
>   	npu4_regs.o \
> -	npu5_regs.o
> +	npu5_regs.o \
> +	npu6_regs.o
>   obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> new file mode 100644
> index 000000000000..d1168fc55533
> --- /dev/null
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -0,0 +1,121 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_device.h>
> +#include <drm/gpu_scheduler.h>
> +#include <linux/sizes.h>
> +
> +#include "aie2_pci.h"
> +#include "amdxdna_mailbox.h"
> +#include "amdxdna_pci_drv.h"
> +
> +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
> +#define MPNPU_PUB_SEC_INTR             0x3010060
> +#define MPNPU_PUB_PWRMGMT_INTR         0x3010064
> +#define MPNPU_PUB_SCRATCH0             0x301006C
> +#define MPNPU_PUB_SCRATCH1             0x3010070
> +#define MPNPU_PUB_SCRATCH2             0x3010074
> +#define MPNPU_PUB_SCRATCH3             0x3010078
> +#define MPNPU_PUB_SCRATCH4             0x301007C
> +#define MPNPU_PUB_SCRATCH5             0x3010080
> +#define MPNPU_PUB_SCRATCH6             0x3010084
> +#define MPNPU_PUB_SCRATCH7             0x3010088
> +#define MPNPU_PUB_SCRATCH8             0x301008C
> +#define MPNPU_PUB_SCRATCH9             0x3010090
> +#define MPNPU_PUB_SCRATCH10            0x3010094
> +#define MPNPU_PUB_SCRATCH11            0x3010098
> +#define MPNPU_PUB_SCRATCH12            0x301009C
> +#define MPNPU_PUB_SCRATCH13            0x30100A0
> +#define MPNPU_PUB_SCRATCH14            0x30100A4
> +#define MPNPU_PUB_SCRATCH15            0x30100A8
> +#define MP0_C2PMSG_73                  0x3810A24
> +#define MP0_C2PMSG_123                 0x3810AEC
> +
> +#define MP1_C2PMSG_0                   0x3B10900
> +#define MP1_C2PMSG_60                  0x3B109F0
> +#define MP1_C2PMSG_61                  0x3B109F4
> +
> +#define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
> +#define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
> +#define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
> +#define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
> +
> +#define MMNPU_APERTURE0_BASE           0x3000000
> +#define MMNPU_APERTURE1_BASE           0x3600000
> +#define MMNPU_APERTURE3_BASE           0x3810000
> +#define MMNPU_APERTURE4_BASE           0x3B10000
> +
> +/* PCIe BAR Index for NPU6 */
> +#define NPU6_REG_BAR_INDEX	0
> +#define NPU6_MBOX_BAR_INDEX	0
> +#define NPU6_PSP_BAR_INDEX	4
> +#define NPU6_SMU_BAR_INDEX	5
> +#define NPU6_SRAM_BAR_INDEX	2
> +/* Associated BARs and Apertures */
> +#define NPU6_REG_BAR_BASE	MMNPU_APERTURE0_BASE
> +#define NPU6_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
> +#define NPU6_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
> +#define NPU6_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
> +#define NPU6_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
> +
> +#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
> +#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
> +
> +#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
> +#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
> +
> +#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
> +#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
> +
> +#define NPU6_MPNPUCLK_FREQ_MAX  1267
> +#define NPU6_HCLK_FREQ_MAX      1800
> +
> +const struct amdxdna_dev_priv npu6_dev_priv = {
> +	.fw_path        = "amdnpu/17f0_10/npu.sbin",
> +	.protocol_major = 0x6,
> +	.protocol_minor = 12,
> +	.rt_config	= {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
> +	.col_align	= COL_ALIGN_NATURE,
> +	.mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
> +	.mbox_size      = 0, /* Use BAR size */
> +	.sram_dev_addr  = NPU6_SRAM_BAR_BASE,
> +	.sram_offs      = {
> +		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
> +		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
> +	},
> +	.psp_regs_off   = {
> +		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU6_PSP, MP0_C2PMSG_123),
> +		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU6_REG, MPNPU_PUB_SCRATCH3),
> +		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU6_REG, MPNPU_PUB_SCRATCH4),
> +		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU6_REG, MPNPU_PUB_SCRATCH9),
> +		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU6_PSP, MP0_C2PMSG_73),
> +		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
> +		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU6_REG, MPNPU_PUB_SCRATCH3),
> +	},
> +	.smu_regs_off   = {
> +		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU6_SMU, MP1_C2PMSG_0),
> +		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU6_SMU, MP1_C2PMSG_60),
> +		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
> +		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
> +		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
> +	},
> +};
> +
> +const struct amdxdna_dev_info dev_npu6_info = {
> +	.reg_bar           = NPU6_REG_BAR_INDEX,
> +	.mbox_bar          = NPU6_MBOX_BAR_INDEX,
> +	.sram_bar          = NPU6_SRAM_BAR_INDEX,
> +	.psp_bar           = NPU6_PSP_BAR_INDEX,
> +	.smu_bar           = NPU6_SMU_BAR_INDEX,
> +	.first_col         = 0,
> +	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
> +	.dev_mem_base      = AIE2_DEVM_BASE,
> +	.dev_mem_size      = AIE2_DEVM_SIZE,
> +	.vbnv              = "RyzenAI-npu6",
> +	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
> +	.dev_priv          = &npu6_dev_priv,
> +	.ops               = &aie2_ops,
> +};


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices
  2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
@ 2024-12-11  0:31   ` Mario Limonciello
  2024-12-13 16:31   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11  0:31 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam

On 12/6/2024 15:59, Lizhi Hou wrote:
> Add device status to track if aie2_hw_start() or aie2_hw_stop() is
> re-entered. In aie2_hw_stop(), call drmm_kfree to free mbox.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_pci.c        | 17 +++++++++++++++++
>   drivers/accel/amdxdna/aie2_pci.h        |  7 +++++++
>   drivers/accel/amdxdna/amdxdna_mailbox.c |  6 ------
>   3 files changed, 24 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 349ada697e48..19c76b2b204b 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -267,12 +267,22 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>   	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>   	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>   
> +	if (ndev->dev_status <= AIE2_DEV_INIT) {
> +		XDNA_ERR(xdna, "device is already stopped");
> +		return;
> +	}
> +
>   	aie2_mgmt_fw_fini(ndev);
>   	xdna_mailbox_stop_channel(ndev->mgmt_chann);
>   	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
> +	ndev->mgmt_chann = NULL;
> +	drmm_kfree(&xdna->ddev, ndev->mbox);
> +	ndev->mbox = NULL;
>   	aie2_psp_stop(ndev->psp_hdl);
>   	aie2_smu_fini(ndev);
>   	pci_disable_device(pdev);
> +
> +	ndev->dev_status = AIE2_DEV_INIT;
>   }
>   
>   static int aie2_hw_start(struct amdxdna_dev *xdna)
> @@ -283,6 +293,11 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>   	u32 xdna_mailbox_intr_reg;
>   	int mgmt_mb_irq, ret;
>   
> +	if (ndev->dev_status >= AIE2_DEV_START) {
> +		XDNA_INFO(xdna, "device is already started");
> +		return 0;
> +	}
> +
>   	ret = pci_enable_device(pdev);
>   	if (ret) {
>   		XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
> @@ -345,6 +360,8 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>   		goto destroy_mgmt_chann;
>   	}
>   
> +	ndev->dev_status = AIE2_DEV_START;
> +
>   	return 0;
>   
>   destroy_mgmt_chann:
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 6a2686255c9c..1c6f07d9b805 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -149,6 +149,11 @@ struct amdxdna_hwctx_priv {
>   	struct drm_syncobj		*syncobj;
>   };
>   
> +enum aie2_dev_status {
> +	AIE2_DEV_INIT,
> +	AIE2_DEV_START,
> +};
> +
>   struct amdxdna_dev_hdl {
>   	struct amdxdna_dev		*xdna;
>   	const struct amdxdna_dev_priv	*priv;
> @@ -171,6 +176,8 @@ struct amdxdna_dev_hdl {
>   	struct mailbox			*mbox;
>   	struct mailbox_channel		*mgmt_chann;
>   	struct async_events		*async_events;
> +
> +	enum aie2_dev_status		dev_status;
>   };
>   
>   #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
> index 415d99abaaa3..eab79dbb8b25 100644
> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
> @@ -530,9 +530,6 @@ xdna_mailbox_create_channel(struct mailbox *mb,
>   
>   int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
>   {
> -	if (!mb_chann)
> -		return 0;
> -
>   	MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
>   	free_irq(mb_chann->msix_irq, mb_chann);
>   	destroy_workqueue(mb_chann->work_q);
> @@ -548,9 +545,6 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
>   
>   void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
>   {
> -	if (!mb_chann)
> -		return;
> -
>   	/* Disable an irq and wait. This might sleep. */
>   	disable_irq(mb_chann->msix_irq);
>   


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-11  0:28   ` Mario Limonciello
@ 2024-12-11  5:28     ` Lizhi Hou
  2024-12-11 20:55       ` Mario Limonciello
  0 siblings, 1 reply; 33+ messages in thread
From: Lizhi Hou @ 2024-12-11  5:28 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	Narendra Gutta, George Yang


On 12/10/24 16:28, Mario Limonciello wrote:
> On 12/6/2024 15:59, Lizhi Hou wrote:
>> Add SET_STATE ioctl to configure device power mode for aie2 device.
>> Three modes are supported initially.
>>
>> POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
>> Management) level to value which has been set by resource solver or
>> maximum DPM level the device supports.
>>
>> POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
>> level the device supports.
>>
>> POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
>> level the device supports.
>>
>> Disabling clock gating means all clocks always run on full speed. And
>> the different clock frequency are used based on DPM level been set.
>> Initially, the driver set the power mode to default mode.
>>
>> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>> Co-developed-by: George Yang <George.Yang@amd.com>
>> Signed-off-by: George Yang <George.Yang@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/Makefile          |   1 +
>>   drivers/accel/amdxdna/TODO              |   1 -
>>   drivers/accel/amdxdna/aie2_ctx.c        |   6 ++
>>   drivers/accel/amdxdna/aie2_message.c    |   9 +-
>>   drivers/accel/amdxdna/aie2_pci.c        | 136 +++++++++++++++++++-----
>>   drivers/accel/amdxdna/aie2_pci.h        |  55 ++++++++--
>>   drivers/accel/amdxdna/aie2_pm.c         | 108 +++++++++++++++++++
>>   drivers/accel/amdxdna/aie2_smu.c        |  85 +++++++++------
>>   drivers/accel/amdxdna/aie2_solver.c     |  59 +++++++++-
>>   drivers/accel/amdxdna/aie2_solver.h     |   1 +
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  19 ++++
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   2 +
>>   drivers/accel/amdxdna/npu1_regs.c       |  29 +++--
>>   drivers/accel/amdxdna/npu2_regs.c       |  15 +--
>>   drivers/accel/amdxdna/npu4_regs.c       |  32 ++++--
>>   drivers/accel/amdxdna/npu5_regs.c       |  15 +--
>>   drivers/accel/amdxdna/npu6_regs.c       |  19 ++--
>>   include/uapi/drm/amdxdna_accel.h        |  52 +++++++++
>>   18 files changed, 516 insertions(+), 128 deletions(-)
>>   create mode 100644 drivers/accel/amdxdna/aie2_pm.c
>>
>> diff --git a/drivers/accel/amdxdna/Makefile 
>> b/drivers/accel/amdxdna/Makefile
>> index 6baf181298de..0e9adf6890a0 100644
>> --- a/drivers/accel/amdxdna/Makefile
>> +++ b/drivers/accel/amdxdna/Makefile
>> @@ -5,6 +5,7 @@ amdxdna-y := \
>>       aie2_error.o \
>>       aie2_message.o \
>>       aie2_pci.o \
>> +    aie2_pm.o \
>>       aie2_psp.o \
>>       aie2_smu.o \
>>       aie2_solver.o \
>> diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
>> index de4e1dbc8868..5119bccd1917 100644
>> --- a/drivers/accel/amdxdna/TODO
>> +++ b/drivers/accel/amdxdna/TODO
>> @@ -1,4 +1,3 @@
>>   - Add import and export BO support
>>   - Add debugfs support
>>   - Add debug BO support
>> -- Improve power management
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c 
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 07eecb40767f..6b4e6fcb7794 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -518,6 +518,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>       struct drm_gpu_scheduler *sched;
>>       struct amdxdna_hwctx_priv *priv;
>>       struct amdxdna_gem_obj *heap;
>> +    struct amdxdna_dev_hdl *ndev;
>>       int i, ret;
>>         priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
>> @@ -612,6 +613,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>       }
>>         hwctx->status = HWCTX_STAT_INIT;
>> +    ndev = xdna->dev_handle;
>> +    ndev->hwctx_num++;
>>         XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>>   @@ -641,10 +644,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>     void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>   {
>> +    struct amdxdna_dev_hdl *ndev;
>>       struct amdxdna_dev *xdna;
>>       int idx;
>>         xdna = hwctx->client->xdna;
>> +    ndev = xdna->dev_handle;
>> +    ndev->hwctx_num--;
>>       drm_sched_wqueue_stop(&hwctx->priv->sched);
>>         /* Now, scheduler will not send command to device. */
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index fc33a158d223..13b5a96f8d25 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -70,11 +70,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
>>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>> u64 value)
>>   {
>>       DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
>> +    int ret;
>>         req.type = type;
>>       req.value = value;
>>   -    return aie2_send_mgmt_msg_wait(ndev, &msg);
>> +    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>> +    if (ret) {
>> +        XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", 
>> ret);
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>>   }
>>     int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>> u64 *value)
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 83abd16ade11..489744a2e226 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -109,28 +109,26 @@ static int aie2_get_mgmt_chann_info(struct 
>> amdxdna_dev_hdl *ndev)
>>       return 0;
>>   }
>>   -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
>> +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>> +             enum rt_config_category category, u32 *val)
>>   {
>> -    const struct rt_config *cfg = &ndev->priv->rt_config;
>> -    u64 value;
>> +    const struct rt_config *cfg;
>> +    u32 value;
>>       int ret;
>>   -    ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
>> -    if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
>> -             cfg->type, cfg->value);
>> -        return ret;
>> -    }
>> +    for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
>> +        if (cfg->category != category)
>> +            continue;
>>   -    ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
>> -    if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
>> -        return ret;
>> +        value = val ? *val : cfg->value;
>> +        ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
>> +        if (ret) {
>> +            XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
>> +                 cfg->type, value);
>> +            return ret;
>> +        }
>>       }
>>   -    if (value != cfg->value)
>> -        return -EINVAL;
>> -
>>       return 0;
>>   }
>>   @@ -163,7 +161,7 @@ static int aie2_mgmt_fw_init(struct 
>> amdxdna_dev_hdl *ndev)
>>           return ret;
>>       }
>>   -    ret = aie2_runtime_cfg(ndev);
>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Runtime config failed");
>>           return ret;
>> @@ -257,9 +255,25 @@ static int aie2_xrs_unload(void *cb_arg)
>>       return ret;
>>   }
>>   +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 
>> dpm_level)
>> +{
>> +    struct amdxdna_dev *xdna = to_xdna_dev(ddev);
>> +    struct amdxdna_dev_hdl *ndev;
>> +
>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>
> This is a reinvented lockdep_assert_held() no?
> Or is there some nuance I'm missing?
>
> I would suggest switching to lockdep_assert_held().

lockdep_assert_held() relies on CONFIG_LOCKDEP which might be off.

And there are similar use cases in drm, e.g.

In drm_probe_helper.c:

    drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex));

>
>> +
>> +    ndev = xdna->dev_handle;
>> +    ndev->dft_dpm_level = dpm_level;
>> +    if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == 
>> dpm_level)
>> +        return 0;
>> +
>> +    return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +}
>> +
>>   static struct xrs_action_ops aie2_xrs_actions = {
>>       .load = aie2_xrs_load,
>>       .unload = aie2_xrs_unload,
>> +    .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
>>   };
>>     static void aie2_hw_stop(struct amdxdna_dev *xdna)
>> @@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>           goto stop_psp;
>>       }
>>   +    ret = aie2_pm_init(ndev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
>> +        goto destroy_mgmt_chann;
>> +    }
>> +
>>       ret = aie2_mgmt_fw_init(ndev);
>>       if (ret) {
>>           XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
>> @@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>       }
>>       ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>   -    xrs_cfg.clk_list.num_levels = 3;
>> -    xrs_cfg.clk_list.cu_clk_list[0] = 0;
>> -    xrs_cfg.clk_list.cu_clk_list[1] = 800;
>> -    xrs_cfg.clk_list.cu_clk_list[2] = 1000;
>> +    xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>> +    for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>> +        xrs_cfg.clk_list.cu_clk_list[i] = 
>> ndev->priv->dpm_clk_tbl[i].hclk;
>>       xrs_cfg.sys_eff_factor = 1;
>>       xrs_cfg.ddev = &xdna->ddev;
>>       xrs_cfg.actions = &aie2_xrs_actions;
>> @@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct 
>> amdxdna_client *client,
>>       return 0;
>>   }
>>   +static int aie2_get_power_mode(struct amdxdna_client *client,
>> +                   struct amdxdna_drm_get_info *args)
>> +{
>> +    struct amdxdna_drm_get_power_mode mode = {};
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +    struct amdxdna_dev_hdl *ndev;
>> +
>> +    ndev = xdna->dev_handle;
>> +    mode.power_mode = ndev->pw_mode;
>> +
>> +    if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, 
>> sizeof(mode)))
>> +        return -EFAULT;
>> +
>> +    return 0;
>> +}
>> +
>>   static int aie2_get_clock_metadata(struct amdxdna_client *client,
>>                      struct amdxdna_drm_get_info *args)
>>   {
>> @@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct 
>> amdxdna_client *client,
>>       if (!clock)
>>           return -ENOMEM;
>>   -    memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
>> -           sizeof(clock->mp_npu_clock.name));
>> -    clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
>> -    memcpy(clock->h_clock.name, ndev->h_clock.name, 
>> sizeof(clock->h_clock.name));
>> -    clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
>> +    snprintf(clock->mp_npu_clock.name, 
>> sizeof(clock->mp_npu_clock.name),
>> +         "MP-NPU Clock");
>> +    clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
>> +    snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H 
>> Clock");
>> +    clock->h_clock.freq_mhz = ndev->hclk_freq;
>>         if (copy_to_user(u64_to_user_ptr(args->buffer), clock, 
>> sizeof(*clock)))
>>           ret = -EFAULT;
>> @@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client 
>> *client, struct amdxdna_drm_get_i
>>       case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
>>           ret = aie2_get_firmware_version(client, args);
>>           break;
>> +    case DRM_AMDXDNA_GET_POWER_MODE:
>> +        ret = aie2_get_power_mode(client, args);
>> +        break;
>>       default:
>>           XDNA_ERR(xdna, "Not supported request parameter %u", 
>> args->param);
>>           ret = -EOPNOTSUPP;
>> @@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client 
>> *client, struct amdxdna_drm_get_i
>>       return ret;
>>   }
>>   +static int aie2_set_power_mode(struct amdxdna_client *client,
>> +                   struct amdxdna_drm_set_state *args)
>> +{
>> +    struct amdxdna_drm_set_power_mode power_state;
>> +    enum amdxdna_power_mode_type power_mode;
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +
>> +    if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
>> +               sizeof(power_state))) {
>> +        XDNA_ERR(xdna, "Failed to copy power mode request into 
>> kernel");
>> +        return -EFAULT;
>> +    }
>> +
>> +    power_mode = power_state.power_mode;
>> +    if (power_mode > POWER_MODE_TURBO) {
>> +        XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return aie2_pm_set_mode(xdna->dev_handle, power_mode);
>> +}
>> +
>> +static int aie2_set_state(struct amdxdna_client *client,
>> +              struct amdxdna_drm_set_state *args)
>> +{
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +    int ret, idx;
>> +
>> +    if (!drm_dev_enter(&xdna->ddev, &idx))
>> +        return -ENODEV;
>> +
>> +    switch (args->param) {
>> +    case DRM_AMDXDNA_SET_POWER_MODE:
>> +        ret = aie2_set_power_mode(client, args);
>> +        break;
>> +    default:
>> +        XDNA_ERR(xdna, "Not supported request parameter %u", 
>> args->param);
>> +        ret = -EOPNOTSUPP;
>> +        break;
>> +    }
>> +
>> +    drm_dev_exit(idx);
>> +    return ret;
>> +}
>> +
>>   const struct amdxdna_dev_ops aie2_ops = {
>>       .init           = aie2_init,
>>       .fini           = aie2_fini,
>>       .resume         = aie2_hw_start,
>>       .suspend        = aie2_hw_stop,
>>       .get_aie_info   = aie2_get_info,
>> +    .set_aie_state    = aie2_set_state,
>>       .hwctx_init     = aie2_hwctx_init,
>>       .hwctx_fini     = aie2_hwctx_fini,
>>       .hwctx_config   = aie2_hwctx_config,
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 1c6f07d9b805..8c17b74654ce 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -6,6 +6,7 @@
>>   #ifndef _AIE2_PCI_H_
>>   #define _AIE2_PCI_H_
>>   +#include <drm/amdxdna_accel.h>
>>   #include <linux/semaphore.h>
>>     #include "amdxdna_mailbox.h"
>> @@ -48,9 +49,6 @@
>>       pci_resource_len(NDEV2PDEV(_ndev), 
>> (_ndev)->xdna->dev_info->mbox_bar); \
>>   })
>>   -#define SMU_MPNPUCLK_FREQ_MAX(ndev) 
>> ((ndev)->priv->smu_mpnpuclk_freq_max)
>> -#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
>> -
>>   enum aie2_smu_reg_idx {
>>       SMU_CMD_REG = 0,
>>       SMU_ARG_REG,
>> @@ -112,14 +110,20 @@ struct aie_metadata {
>>       struct aie_tile_metadata shim;
>>   };
>>   -struct clock_entry {
>> -    char name[16];
>> -    u32 freq_mhz;
>> +enum rt_config_category {
>> +    AIE2_RT_CFG_INIT,
>> +    AIE2_RT_CFG_CLK_GATING,
>>   };
>>     struct rt_config {
>>       u32    type;
>>       u32    value;
>> +    u32    category;
>> +};
>> +
>> +struct dpm_clk_freq {
>> +    u32    npuclk;
>> +    u32    hclk;
>>   };
>>     /*
>> @@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
>>   };
>>     enum aie2_dev_status {
>> +    AIE2_DEV_UNINIT,
>>       AIE2_DEV_INIT,
>>       AIE2_DEV_START,
>>   };
>> @@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
>>       u32                total_col;
>>       struct aie_version        version;
>>       struct aie_metadata        metadata;
>> -    struct clock_entry        mp_npu_clock;
>> -    struct clock_entry        h_clock;
>> +
>> +    /* power management and clock*/
>> +    enum amdxdna_power_mode_type    pw_mode;
>> +    u32                dpm_level;
>> +    u32                dft_dpm_level;
>> +    u32                max_dpm_level;
>> +    u32                clk_gating;
>> +    u32                npuclk_freq;
>> +    u32                hclk_freq;
>>         /* Mailbox and the management channel */
>>       struct mailbox            *mbox;
>> @@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
>>       struct async_events        *async_events;
>>         enum aie2_dev_status        dev_status;
>> +    u32                hwctx_num;
>>   };
>>     #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
>> @@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
>>       u32    offset;
>>   };
>>   +struct aie2_hw_ops {
>> +    int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> +};
>> +
>>   struct amdxdna_dev_priv {
>>       const char            *fw_path;
>>       u64                protocol_major;
>>       u64                protocol_minor;
>> -    struct rt_config        rt_config;
>> +    const struct rt_config        *rt_config;
>> +    const struct dpm_clk_freq    *dpm_clk_tbl;
>> +
>>   #define COL_ALIGN_NONE   0
>>   #define COL_ALIGN_NATURE 1
>>       u32                col_align;
>> @@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
>>       struct aie2_bar_off_pair    sram_offs[SRAM_MAX_INDEX];
>>       struct aie2_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>       struct aie2_bar_off_pair    smu_regs_off[SMU_MAX_REGS];
>> -    u32                smu_mpnpuclk_freq_max;
>> -    u32                smu_hclk_freq_max;
>> +    struct aie2_hw_ops        hw_ops;
>>   };
>>     extern const struct amdxdna_dev_ops aie2_ops;
>>   +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>> +             enum rt_config_category category, u32 *val);
>> +
>> +/* aie2 npu hw config */
>> +extern const struct dpm_clk_freq npu1_dpm_clk_table[];
>> +extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>> +extern const struct rt_config npu1_default_rt_cfg[];
>> +extern const struct rt_config npu4_default_rt_cfg[];
>> +
>>   /* aie2_smu.c */
>>   int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>>   void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> +
>> +/* aie2_pm.c */
>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>> amdxdna_power_mode_type target);
>>     /* aie2_psp.c */
>>   struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct 
>> psp_config *conf);
>> diff --git a/drivers/accel/amdxdna/aie2_pm.c 
>> b/drivers/accel/amdxdna/aie2_pm.c
>> new file mode 100644
>> index 000000000000..426c38fce848
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/aie2_pm.c
>> @@ -0,0 +1,108 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <drm/amdxdna_accel.h>
>> +#include <drm/drm_device.h>
>> +#include <drm/drm_print.h>
>> +#include <drm/gpu_scheduler.h>
>> +
>> +#include "aie2_pci.h"
>> +#include "amdxdna_pci_drv.h"
>> +
>> +#define AIE2_CLK_GATING_ENABLE    1
>> +#define AIE2_CLK_GATING_DISABLE    0
>> +
>> +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 
>> val)
>> +{
>> +    int ret;
>> +
>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ndev->clk_gating = val;
>> +    return 0;
>> +}
>> +
>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>> +{
>> +    int ret;
>> +
>> +    if (ndev->dev_status != AIE2_DEV_UNINIT) {
>> +        /* Resume device */
>> +        ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
>> +        if (ret)
>> +            return ret;
>> +
>> +        ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
>> +        if (ret)
>> +            return ret;
>> +
>> +        return 0;
>> +    }
>> +
>> +    while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
>> +        ndev->max_dpm_level++;
>> +    ndev->max_dpm_level--;
>> +
>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
>> +    if (ret)
>> +        return ret;
>
> In the event of a failure do you want to try to restore dpm where it was?

This is initialization routine. If it fails, that indicates a 
firmware/hardware issue. It might not need to do more on a broken fw/hw.

And the driver will not be probe in this case.


Thanks,

Lizhi

>
>> +
>> +    ndev->pw_mode = POWER_MODE_DEFAULT;
>> +    ndev->dft_dpm_level = ndev->max_dpm_level;
>> +
>> +    return 0;
>> +}
>> +
>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>> amdxdna_power_mode_type target)
>> +{
>> +    struct amdxdna_dev *xdna = ndev->xdna;
>> +    u32 clk_gating, dpm_level;
>> +    int ret;
>> +
>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>
> lockdep_assert_held()
>
>> +
>> +    if (ndev->pw_mode == target)
>> +        return 0;
>> +
>> +    switch (target) {
>> +    case POWER_MODE_TURBO:
>> +        if (ndev->hwctx_num) {
>> +            XDNA_ERR(xdna, "Can not set turbo when there is active 
>> hwctx");
>> +            return -EINVAL;
>> +        }
>> +
>> +        clk_gating = AIE2_CLK_GATING_DISABLE;
>> +        dpm_level = ndev->max_dpm_level;
>> +        break;
>> +    case POWER_MODE_HIGH:
>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>> +        dpm_level = ndev->max_dpm_level;
>> +        break;
>> +    case POWER_MODE_DEFAULT:
>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>> +        dpm_level = ndev->dft_dpm_level;
>> +        break;
>> +    default:
>> +        return -EOPNOTSUPP;
>> +    }
>> +
>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = aie2_pm_set_clk_gating(ndev, clk_gating);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ndev->pw_mode = target;
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/aie2_smu.c 
>> b/drivers/accel/amdxdna/aie2_smu.c
>> index 91893d438da7..73388443c676 100644
>> --- a/drivers/accel/amdxdna/aie2_smu.c
>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>> @@ -19,8 +19,11 @@
>>   #define AIE2_SMU_POWER_OFF        0x4
>>   #define AIE2_SMU_SET_MPNPUCLK_FREQ    0x5
>>   #define AIE2_SMU_SET_HCLK_FREQ        0x6
>> +#define AIE2_SMU_SET_SOFT_DPMLEVEL    0x7
>> +#define AIE2_SMU_SET_HARD_DPMLEVEL    0x8
>>   -static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 
>> reg_cmd, u32 reg_arg)
>> +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
>> +             u32 reg_arg, u32 *out)
>>   {
>>       u32 resp;
>>       int ret;
>> @@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>> *ndev, u32 reg_cmd, u32 reg_arg)
>>           return ret;
>>       }
>>   +    if (out)
>> +        *out = readl(SMU_REG(ndev, SMU_OUT_REG));
>> +
>>       if (resp != SMU_RESULT_OK) {
>>           XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, 
>> resp);
>>           return -EINVAL;
>> @@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>> *ndev, u32 reg_cmd, u32 reg_arg)
>>       return 0;
>>   }
>>   -static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl 
>> *ndev, u32 freq_mhz)
>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>   {
>> +    u32 freq;
>>       int ret;
>>   -    if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
>> -        XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
>> -        return -EINVAL;
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>> + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>> +    if (ret) {
>> +        XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>> +             ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>       }
>> +    ndev->npuclk_freq = freq;
>>   -    ndev->mp_npu_clock.freq_mhz = freq_mhz;
>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
>> -    if (!ret)
>> -        XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", 
>> freq_mhz);
>> -
>> -    return ret;
>> -}
>> -
>> -static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, 
>> u32 freq_mhz)
>> -{
>> -    int ret;
>> -
>> -    if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
>> -        XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
>> -        return -EINVAL;
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
>> +                ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
>> +    if (ret) {
>> +        XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>> +             ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>       }
>> +    ndev->hclk_freq = freq;
>> +    ndev->dpm_level = dpm_level;
>>   -    ndev->h_clock.freq_mhz = freq_mhz;
>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
>> -    if (!ret)
>> -        XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", 
>> freq_mhz);
>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>> +         ndev->npuclk_freq, ndev->hclk_freq);
>>   -    return ret;
>> +    return 0;
>>   }
>>   -int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>   {
>>       int ret;
>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, 
>> NULL);
>>       if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>> +        XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>> +             dpm_level, ret);
>>           return ret;
>>       }
>>   -    ret = aie2_smu_set_mpnpu_clock_freq(ndev, 
>> SMU_MPNPUCLK_FREQ_MAX(ndev));
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, 
>> NULL);
>>       if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
>> +        XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>> +             dpm_level, ret);
>>           return ret;
>>       }
>> -    snprintf(ndev->mp_npu_clock.name, 
>> sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
>>   -    ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
>> +    ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>> +    ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>> +    ndev->dpm_level = dpm_level;
>> +
>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>> +         ndev->npuclk_freq, ndev->hclk_freq);
>> +
>> +    return 0;
>> +}
>> +
>> +int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>> +{
>> +    int ret;
>> +
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
>>       if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
>> +        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>>           return ret;
>>       }
>> -    snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H 
>> Clock");
>>         return 0;
>>   }
>> @@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>>   {
>>       int ret;
>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
>> +    ndev->priv->hw_ops.set_dpm(ndev, 0);
>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
>>       if (ret)
>>           XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
>>   }
>> diff --git a/drivers/accel/amdxdna/aie2_solver.c 
>> b/drivers/accel/amdxdna/aie2_solver.c
>> index a537c66589a4..1939625d6027 100644
>> --- a/drivers/accel/amdxdna/aie2_solver.c
>> +++ b/drivers/accel/amdxdna/aie2_solver.c
>> @@ -25,6 +25,7 @@ struct solver_node {
>>         struct partition_node    *pt_node;
>>       void            *cb_arg;
>> +    u32            dpm_level;
>>       u32            cols_len;
>>       u32            start_cols[] __counted_by(cols_len);
>>   };
>> @@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, 
>> struct alloc_requests *req)
>>       return 0;
>>   }
>>   +static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
>> +{
>> +    /*
>> +     * gops is retrieved from the xmodel, so it's always set
>> +     * fps and latency are the configurable params from the application
>> +     */
>> +    if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
>> +        return true;
>> +
>> +    return false;
>> +}
>> +
>> +static int set_dpm_level(struct solver_state *xrs, struct 
>> alloc_requests *req, u32 *dpm_level)
>> +{
>> +    struct solver_rgroup *rgp = &xrs->rgp;
>> +    struct cdo_parts *cdop = &req->cdo;
>> +    struct aie_qos *rqos = &req->rqos;
>> +    u32 freq, max_dpm_level, level;
>> +    struct solver_node *node;
>> +
>> +    max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
>> +    /* If no QoS parameters are passed, set it to the max DPM level */
>> +    if (!is_valid_qos_dpm_params(rqos)) {
>> +        level = max_dpm_level;
>> +        goto set_dpm;
>> +    }
>> +
>> +    /* Find one CDO group that meet the GOPs requirement. */
>> +    for (level = 0; level < max_dpm_level; level++) {
>> +        freq = xrs->cfg.clk_list.cu_clk_list[level];
>> +        if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
>> +            break;
>> +    }
>> +
>> +    /* set the dpm level which fits all the sessions */
>> +    list_for_each_entry(node, &rgp->node_list, list) {
>> +        if (node->dpm_level > level)
>> +            level = node->dpm_level;
>> +    }
>> +
>> +set_dpm:
>> +    *dpm_level = level;
>> +    return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
>> +}
>> +
>>   static struct solver_node *rg_search_node(struct solver_rgroup 
>> *rgp, u64 rid)
>>   {
>>       struct solver_node *node;
>> @@ -159,12 +205,9 @@ static int get_free_partition(struct 
>> solver_state *xrs,
>>       pt_node->ncols = ncols;
>>         /*
>> -     * Before fully support latency in QoS, if a request
>> -     * specifies a non-zero latency value, it will not share
>> -     * the partition with other requests.
>> +     * Always set exclusive to false for now.
>>        */
>> -    if (req->rqos.latency)
>> -        pt_node->exclusive = true;
>> +    pt_node->exclusive = false;
>>         list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
>>       xrs->rgp.npartition_node++;
>> @@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct 
>> alloc_requests *req, void *cb_arg)
>>       struct xrs_action_load load_act;
>>       struct solver_node *snode;
>>       struct solver_state *xrs;
>> +    u32 dpm_level;
>>       int ret;
>>         xrs = (struct solver_state *)hdl;
>> @@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct 
>> alloc_requests *req, void *cb_arg)
>>       if (ret)
>>           goto free_node;
>>   +    ret = set_dpm_level(xrs, req, &dpm_level);
>> +    if (ret)
>> +        goto free_node;
>> +
>> +    snode->dpm_level = dpm_level;
>>       snode->cb_arg = cb_arg;
>>         drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
>> diff --git a/drivers/accel/amdxdna/aie2_solver.h 
>> b/drivers/accel/amdxdna/aie2_solver.h
>> index 9b1847bb46a6..a2e3c52229e9 100644
>> --- a/drivers/accel/amdxdna/aie2_solver.h
>> +++ b/drivers/accel/amdxdna/aie2_solver.h
>> @@ -99,6 +99,7 @@ struct clk_list_info {
>>   struct xrs_action_ops {
>>       int (*load)(void *cb_arg, struct xrs_action_load *action);
>>       int (*unload)(void *cb_arg);
>> +    int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
>>   };
>>     /*
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index c3541796d189..6bbd437d48d8 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct 
>> drm_device *dev, void *data, struct
>>       return ret;
>>   }
>>   +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>> +{
>> +    struct amdxdna_client *client = filp->driver_priv;
>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> +    struct amdxdna_drm_set_state *args = data;
>> +    int ret;
>> +
>> +    if (!xdna->dev_info->ops->set_aie_state)
>> +        return -EOPNOTSUPP;
>> +
>> +    XDNA_DBG(xdna, "Request parameter %u", args->param);
>> +    mutex_lock(&xdna->dev_lock);
>> +    ret = xdna->dev_info->ops->set_aie_state(client, args);
>> +    mutex_unlock(&xdna->dev_lock);
>> +
>> +    return ret;
>> +}
>> +
>>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>       /* Context */
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, 
>> amdxdna_drm_create_hwctx_ioctl, 0),
>> @@ -173,6 +191,7 @@ static const struct drm_ioctl_desc 
>> amdxdna_drm_ioctls[] = {
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, 
>> amdxdna_drm_submit_cmd_ioctl, 0),
>>       /* AIE hardware */
>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 
>> 0),
>> +    DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, 
>> amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
>>   };
>>     static const struct file_operations amdxdna_fops = {
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index f5b830fb14bb..e2071e31d949 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
>>   struct amdxdna_client;
>>   struct amdxdna_dev;
>>   struct amdxdna_drm_get_info;
>> +struct amdxdna_drm_set_state;
>>   struct amdxdna_gem_obj;
>>   struct amdxdna_hwctx;
>>   struct amdxdna_sched_job;
>> @@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
>>       void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
>>       int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct 
>> amdxdna_sched_job *job, u64 *seq);
>>       int (*get_aie_info)(struct amdxdna_client *client, struct 
>> amdxdna_drm_get_info *args);
>> +    int (*set_aie_state)(struct amdxdna_client *client, struct 
>> amdxdna_drm_set_state *args);
>>   };
>>     /*
>> diff --git a/drivers/accel/amdxdna/npu1_regs.c 
>> b/drivers/accel/amdxdna/npu1_regs.c
>> index f00c50461b09..c8f4d1cac65d 100644
>> --- a/drivers/accel/amdxdna/npu1_regs.c
>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>> @@ -44,18 +44,30 @@
>>   #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
>>   #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
>>   -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
>> +const struct rt_config npu1_default_rt_cfg[] = {
>> +    { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>> +    { 0 },
>> +};
>>   -#define NPU1_MPNPUCLK_FREQ_MAX  600
>> -#define NPU1_HCLK_FREQ_MAX      1024
>> +const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>> +    {400, 800},
>> +    {600, 1024},
>> +    {600, 1024},
>> +    {600, 1024},
>> +    {600, 1024},
>> +    {720, 1309},
>> +    {720, 1309},
>> +    {847, 1600},
>> +    { 0 }
>> +};
>>     const struct amdxdna_dev_priv npu1_dev_priv = {
>>       .fw_path        = "amdnpu/1502_00/npu.sbin",
>>       .protocol_major = 0x5,
>>       .protocol_minor = 0x1,
>> -    .rt_config    = {NPU1_RT_CFG_TYPE_PDI_LOAD, 
>> NPU1_RT_CFG_VAL_PDI_LOAD_APP},
>> +    .rt_config    = npu1_default_rt_cfg,
>> +    .dpm_clk_tbl    = npu1_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NONE,
>>       .mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
>>       .mbox_size      = 0, /* Use BAR size */
>> @@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>>       },
>> -    .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
>> -    .smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
>> +    .hw_ops        = {
>> +        .set_dpm = npu1_set_dpm,
>> +    },
>>   };
>>     const struct amdxdna_dev_info dev_npu1_info = {
>> diff --git a/drivers/accel/amdxdna/npu2_regs.c 
>> b/drivers/accel/amdxdna/npu2_regs.c
>> index 00cb381031d2..ac63131f9c7c 100644
>> --- a/drivers/accel/amdxdna/npu2_regs.c
>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>> @@ -61,18 +61,12 @@
>>   #define NPU2_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>   #define NPU2_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>   -#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
>> -
>> -#define NPU2_MPNPUCLK_FREQ_MAX  1267
>> -#define NPU2_HCLK_FREQ_MAX      1800
>> -
>>   const struct amdxdna_dev_priv npu2_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_00/npu.sbin",
>>       .protocol_major = 0x6,
>>       .protocol_minor = 0x1,
>> -    .rt_config    = {NPU2_RT_CFG_TYPE_PDI_LOAD, 
>> NPU2_RT_CFG_VAL_PDI_LOAD_APP},
>> +    .rt_config    = npu4_default_rt_cfg,
>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>>       .mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
>>       .mbox_size      = 0, /* Use BAR size */
>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
>>       },
>> -    .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
>> -    .smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
>> +    .hw_ops    =     {
>> +        .set_dpm = npu4_set_dpm,
>> +    },
>>   };
>>     const struct amdxdna_dev_info dev_npu2_info = {
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c 
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index b6dae9667cca..a713ac18adfc 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -61,18 +61,33 @@
>>   #define NPU4_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>   #define NPU4_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>   -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
>> +const struct rt_config npu4_default_rt_cfg[] = {
>> +    { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>> +    { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>> +    { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>> +    { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>> +    { 0 },
>> +};
>>   -#define NPU4_MPNPUCLK_FREQ_MAX  1267
>> -#define NPU4_HCLK_FREQ_MAX      1800
>> +const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>> +    {396, 792},
>> +    {600, 1056},
>> +    {792, 1152},
>> +    {975, 1267},
>> +    {975, 1267},
>> +    {1056, 1408},
>> +    {1152, 1584},
>> +    {1267, 1800},
>> +    { 0 }
>> +};
>>     const struct amdxdna_dev_priv npu4_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>       .protocol_major = 0x6,
>>       .protocol_minor = 0x1,
>> -    .rt_config    = {NPU4_RT_CFG_TYPE_PDI_LOAD, 
>> NPU4_RT_CFG_VAL_PDI_LOAD_APP},
>> +    .rt_config    = npu4_default_rt_cfg,
>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>>       .mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
>>       .mbox_size      = 0, /* Use BAR size */
>> @@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>>       },
>> -    .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
>> -    .smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
>> +    .hw_ops        = {
>> +        .set_dpm = npu4_set_dpm,
>> +    },
>>   };
>>     const struct amdxdna_dev_info dev_npu4_info = {
>> diff --git a/drivers/accel/amdxdna/npu5_regs.c 
>> b/drivers/accel/amdxdna/npu5_regs.c
>> index bed1baf8e160..67a5d5bc8a49 100644
>> --- a/drivers/accel/amdxdna/npu5_regs.c
>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>> @@ -61,18 +61,12 @@
>>   #define NPU5_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>   #define NPU5_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>   -#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
>> -
>> -#define NPU5_MPNPUCLK_FREQ_MAX  1267
>> -#define NPU5_HCLK_FREQ_MAX      1800
>> -
>>   const struct amdxdna_dev_priv npu5_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_11/npu.sbin",
>>       .protocol_major = 0x6,
>>       .protocol_minor = 0x1,
>> -    .rt_config    = {NPU5_RT_CFG_TYPE_PDI_LOAD, 
>> NPU5_RT_CFG_VAL_PDI_LOAD_APP},
>> +    .rt_config    = npu4_default_rt_cfg,
>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>>       .mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
>>       .mbox_size      = 0, /* Use BAR size */
>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>>       },
>> -    .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
>> -    .smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
>> +    .hw_ops        = {
>> +        .set_dpm = npu4_set_dpm,
>> +    },
>>   };
>>     const struct amdxdna_dev_info dev_npu5_info = {
>> diff --git a/drivers/accel/amdxdna/npu6_regs.c 
>> b/drivers/accel/amdxdna/npu6_regs.c
>> index d1168fc55533..f46c760cefc7 100644
>> --- a/drivers/accel/amdxdna/npu6_regs.c
>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>> @@ -61,23 +61,12 @@
>>   #define NPU6_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>   #define NPU6_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>   -#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
>> -#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
>> -
>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
>> -
>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
>> -
>> -#define NPU6_MPNPUCLK_FREQ_MAX  1267
>> -#define NPU6_HCLK_FREQ_MAX      1800
>> -
>>   const struct amdxdna_dev_priv npu6_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>       .protocol_major = 0x6,
>>       .protocol_minor = 12,
>> -    .rt_config    = {NPU6_RT_CFG_TYPE_PDI_LOAD, 
>> NPU6_RT_CFG_VAL_PDI_LOAD_APP},
>> +    .rt_config    = npu4_default_rt_cfg,
>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>>       .mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
>>       .mbox_size      = 0, /* Use BAR size */
>> @@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>>       },
>> +    .hw_ops         = {
>> +        .set_dpm = npu4_set_dpm,
>> +    },
>> +
>>   };
>>     const struct amdxdna_dev_info dev_npu6_info = {
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> index 4f15e53a548d..9af9302baf90 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
>>       DRM_AMDXDNA_SYNC_BO,
>>       DRM_AMDXDNA_EXEC_CMD,
>>       DRM_AMDXDNA_GET_INFO,
>> +    DRM_AMDXDNA_SET_STATE,
>>   };
>>     /**
>> @@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
>>       __u64 errors;
>>   };
>>   +enum amdxdna_power_mode_type {
>> +    POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
>> +    POWER_MODE_LOW,     /* Set frequency to lowest DPM */
>> +    POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
>> +    POWER_MODE_HIGH,    /* Set frequency to highest DPM */
>> +    POWER_MODE_TURBO,   /* Maximum power */
>> +};
>> +
>> +/**
>> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
>> + * @power_mode: The mode type from enum amdxdna_power_mode_type
>> + * @pad: MBZ.
>> + */
>> +struct amdxdna_drm_get_power_mode {
>> +    __u8 power_mode;
>> +    __u8 pad[7];
>> +};
>> +
>>   /**
>>    * struct amdxdna_drm_query_firmware_version - Query the firmware 
>> version
>>    * @major: The major version number
>> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>>       DRM_AMDXDNA_QUERY_SENSORS,
>>       DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>>       DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
>> +    DRM_AMDXDNA_GET_POWER_MODE,
>>   };
>>     /**
>> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>>       __u64 buffer; /* in/out */
>>   };
>>   +enum amdxdna_drm_set_param {
>> +    DRM_AMDXDNA_SET_POWER_MODE,
>> +    DRM_AMDXDNA_WRITE_AIE_MEM,
>> +    DRM_AMDXDNA_WRITE_AIE_REG,
>> +};
>> +
>> +/**
>> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
>> + * @param: Value in enum amdxdna_drm_set_param.
>> + * @buffer_size: Size of the input param.
>> + * @buffer: Input param.
>> + */
>> +struct amdxdna_drm_set_state {
>> +    __u32 param; /* in */
>> +    __u32 buffer_size; /* in */
>> +    __u64 buffer; /* in */
>> +};
>> +
>> +/**
>> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE 
>> hardware
>> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
>> + * @pad: MBZ.
>> + */
>> +struct amdxdna_drm_set_power_mode {
>> +    __u8 power_mode;
>> +    __u8 pad[7];
>> +};
>> +
>>   #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
>>            struct amdxdna_drm_create_hwctx)
>> @@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
>>            struct amdxdna_drm_get_info)
>>   +#define DRM_IOCTL_AMDXDNA_SET_STATE \
>> +    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
>> +         struct amdxdna_drm_set_state)
>> +
>>   #if defined(__cplusplus)
>>   } /* extern c end */
>>   #endif
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-11  0:20   ` Mario Limonciello
@ 2024-12-11  5:32     ` Lizhi Hou
  0 siblings, 0 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-11  5:32 UTC (permalink / raw)
  To: Mario Limonciello, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam


On 12/10/24 16:20, Mario Limonciello wrote:
> On 12/6/2024 16:00, Lizhi Hou wrote:
>> The latest released firmware supports reading firmware interface version
>> from registers directly. The driver's probe routine reads the major and
>> minor version numbers. If the firmware interface does not compatible 
>> with
> s/does/is/
Thanks. I will fix this.
>> the driver, the driver's probe routine returns failure.
>>
>> Co-developed-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>
> Just to confirm you're not backing yourself into a corner the plan is 
> not to bump this major version any time soon for anything already 
> supported by the driver; right?

That is correct.


Thanks,

Lizhi

>
>
> Because once you do that this is going to get messy quickly.
>
> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_message.c | 26 ----------
>>   drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
>>   drivers/accel/amdxdna/aie2_pci.h     |  6 +--
>>   drivers/accel/amdxdna/npu1_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu2_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu4_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu5_regs.c    |  2 +-
>>   7 files changed, 64 insertions(+), 50 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 13b5a96f8d25..f6d46e1e5086 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -100,32 +100,6 @@ int aie2_get_runtime_cfg(struct amdxdna_dev_hdl 
>> *ndev, u32 type, u64 *value)
>>       return 0;
>>   }
>>   -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev)
>> -{
>> -    DECLARE_AIE2_MSG(protocol_version, MSG_OP_GET_PROTOCOL_VERSION);
>> -    struct amdxdna_dev *xdna = ndev->xdna;
>> -    int ret;
>> -
>> -    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>> -    if (ret) {
>> -        XDNA_ERR(xdna, "Failed to get protocol version, ret %d", ret);
>> -        return ret;
>> -    }
>> -
>> -    if (resp.major != ndev->priv->protocol_major) {
>> -        XDNA_ERR(xdna, "Incompatible firmware protocol version major 
>> %d minor %d",
>> -             resp.major, resp.minor);
>> -        return -EINVAL;
>> -    }
>> -
>> -    if (resp.minor < ndev->priv->protocol_minor) {
>> -        XDNA_ERR(xdna, "Firmware minor version smaller than 
>> supported");
>> -        return -EINVAL;
>> -    }
>> -
>> -    return 0;
>> -}
>> -
>>   int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
>>   {
>>       DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 489744a2e226..2d2b6b66617a 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -33,17 +33,51 @@ MODULE_PARM_DESC(aie2_max_col, "Maximum column 
>> could be used");
>>    * The related register and ring buffer information is on SRAM BAR.
>>    * This struct is the register layout.
>>    */
>> +#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
>>   struct mgmt_mbox_chann_info {
>> -    u32    x2i_tail;
>> -    u32    x2i_head;
>> -    u32    x2i_buf;
>> -    u32    x2i_buf_sz;
>> -    u32    i2x_tail;
>> -    u32    i2x_head;
>> -    u32    i2x_buf;
>> -    u32    i2x_buf_sz;
>> +    __u32    x2i_tail;
>> +    __u32    x2i_head;
>> +    __u32    x2i_buf;
>> +    __u32    x2i_buf_sz;
>> +    __u32    i2x_tail;
>> +    __u32    i2x_head;
>> +    __u32    i2x_buf;
>> +    __u32    i2x_buf_sz;
>> +    __u32    magic;
>> +    __u32    msi_id;
>> +    __u32    prot_major;
>> +    __u32    prot_minor;
>> +    __u32    rsvd[4];
>>   };
>>   +static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 
>> fw_major, u32 fw_minor)
>> +{
>> +    struct amdxdna_dev *xdna = ndev->xdna;
>> +
>> +    /*
>> +     * The driver supported mailbox behavior is defined by
>> +     * ndev->priv->protocol_major and protocol_minor.
>> +     *
>> +     * When protocol_major and fw_major are different, it means driver
>> +     * and firmware are incompatible.
>> +     */
>> +    if (ndev->priv->protocol_major != fw_major) {
>> +        XDNA_ERR(xdna, "Incompatible firmware protocol major %d 
>> minor %d",
>> +             fw_major, fw_minor);
>> +        return -EINVAL;
>> +    }
>> +
>> +    /*
>> +     * When protocol_minor is greater then fw_minor, that means driver
>> +     * relies on operation the installed firmware does not support.
>> +     */
>> +    if (ndev->priv->protocol_minor > fw_minor) {
>> +        XDNA_ERR(xdna, "Firmware minor version smaller than 
>> supported");
>> +        return -EINVAL;
>> +    }
>> +    return 0;
>> +}
>> +
>>   static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
>>   {
>>       struct amdxdna_dev *xdna = ndev->xdna;
>> @@ -57,6 +91,8 @@ static void aie2_dump_chann_info_debug(struct 
>> amdxdna_dev_hdl *ndev)
>>       XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
>>       XDNA_DBG(xdna, "x2i rsize   0x%x", ndev->mgmt_x2i.rb_size);
>>       XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
>> +    XDNA_DBG(xdna, "mailbox protocol major 0x%x", 
>> ndev->mgmt_prot_major);
>> +    XDNA_DBG(xdna, "mailbox protocol minor 0x%x", 
>> ndev->mgmt_prot_minor);
>>   }
>>     static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
>> @@ -87,6 +123,12 @@ static int aie2_get_mgmt_chann_info(struct 
>> amdxdna_dev_hdl *ndev)
>>       for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
>>           reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
>>   +    if (info_regs.magic != MGMT_MBOX_MAGIC) {
>> +        XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", 
>> info_regs.magic);
>> +        ret = -EINVAL;
>> +        goto done;
>> +    }
>> +
>>       i2x = &ndev->mgmt_i2x;
>>       x2i = &ndev->mgmt_x2i;
>>   @@ -99,14 +141,20 @@ static int aie2_get_mgmt_chann_info(struct 
>> amdxdna_dev_hdl *ndev)
>>       x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
>>       x2i->rb_start_addr   = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
>>       x2i->rb_size         = info_regs.x2i_buf_sz;
>> -    ndev->mgmt_chan_idx  = CHANN_INDEX(ndev, x2i->rb_start_addr);
>>   +    ndev->mgmt_chan_idx  = info_regs.msi_id;
>> +    ndev->mgmt_prot_major = info_regs.prot_major;
>> +    ndev->mgmt_prot_minor = info_regs.prot_minor;
>> +
>> +    ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, 
>> ndev->mgmt_prot_minor);
>> +
>> +done:
>>       aie2_dump_chann_info_debug(ndev);
>>         /* Must clear address at FW_ALIVE_OFF */
>>       writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
>>   -    return 0;
>> +    return ret;
>>   }
>>     int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>> @@ -155,12 +203,6 @@ static int aie2_mgmt_fw_init(struct 
>> amdxdna_dev_hdl *ndev)
>>   {
>>       int ret;
>>   -    ret = aie2_check_protocol_version(ndev);
>> -    if (ret) {
>> -        XDNA_ERR(ndev->xdna, "Check header hash failed");
>> -        return ret;
>> -    }
>> -
>>       ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Runtime config failed");
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 8c17b74654ce..cc159cadff9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -39,9 +39,6 @@
>>   })
>>     #define CHAN_SLOT_SZ SZ_8K
>> -#define CHANN_INDEX(ndev, rbuf_off) \
>> -    (((rbuf_off) - SRAM_REG_OFF((ndev), MBOX_CHANN_OFF)) / 
>> CHAN_SLOT_SZ)
>> -
>>   #define MBOX_SIZE(ndev) \
>>   ({ \
>>       typeof(ndev) _ndev = (ndev); \
>> @@ -170,6 +167,8 @@ struct amdxdna_dev_hdl {
>>       struct xdna_mailbox_chann_res    mgmt_x2i;
>>       struct xdna_mailbox_chann_res    mgmt_i2x;
>>       u32                mgmt_chan_idx;
>> +    u32                mgmt_prot_major;
>> +    u32                mgmt_prot_minor;
>>         u32                total_col;
>>       struct aie_version        version;
>> @@ -262,7 +261,6 @@ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>   int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>> u64 value);
>>   int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>> u64 *value);
>> -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev);
>>   int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
>>   int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct 
>> aie_version *version);
>>   int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct 
>> aie_metadata *metadata);
>> diff --git a/drivers/accel/amdxdna/npu1_regs.c 
>> b/drivers/accel/amdxdna/npu1_regs.c
>> index c8f4d1cac65d..e408af57e378 100644
>> --- a/drivers/accel/amdxdna/npu1_regs.c
>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>> @@ -65,7 +65,7 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>>   const struct amdxdna_dev_priv npu1_dev_priv = {
>>       .fw_path        = "amdnpu/1502_00/npu.sbin",
>>       .protocol_major = 0x5,
>> -    .protocol_minor = 0x1,
>> +    .protocol_minor = 0x7,
>>       .rt_config    = npu1_default_rt_cfg,
>>       .dpm_clk_tbl    = npu1_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NONE,
>> diff --git a/drivers/accel/amdxdna/npu2_regs.c 
>> b/drivers/accel/amdxdna/npu2_regs.c
>> index ac63131f9c7c..286bd0d475e2 100644
>> --- a/drivers/accel/amdxdna/npu2_regs.c
>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>> @@ -64,7 +64,7 @@
>>   const struct amdxdna_dev_priv npu2_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_00/npu.sbin",
>>       .protocol_major = 0x6,
>> -    .protocol_minor = 0x1,
>> +    .protocol_minor = 0x6,
>>       .rt_config    = npu4_default_rt_cfg,
>>       .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c 
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index a713ac18adfc..00c52833ce89 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -85,7 +85,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>   const struct amdxdna_dev_priv npu4_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>       .protocol_major = 0x6,
>> -    .protocol_minor = 0x1,
>> +    .protocol_minor = 12,
>>       .rt_config    = npu4_default_rt_cfg,
>>       .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>> diff --git a/drivers/accel/amdxdna/npu5_regs.c 
>> b/drivers/accel/amdxdna/npu5_regs.c
>> index 67a5d5bc8a49..118849272f27 100644
>> --- a/drivers/accel/amdxdna/npu5_regs.c
>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>> @@ -64,7 +64,7 @@
>>   const struct amdxdna_dev_priv npu5_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_11/npu.sbin",
>>       .protocol_major = 0x6,
>> -    .protocol_minor = 0x1,
>> +    .protocol_minor = 12,
>>       .rt_config    = npu4_default_rt_cfg,
>>       .dpm_clk_tbl    = npu4_dpm_clk_table,
>>       .col_align    = COL_ALIGN_NATURE,
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-11  5:28     ` Lizhi Hou
@ 2024-12-11 20:55       ` Mario Limonciello
  0 siblings, 0 replies; 33+ messages in thread
From: Mario Limonciello @ 2024-12-11 20:55 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	Narendra Gutta, George Yang

On 12/10/2024 23:28, Lizhi Hou wrote:
> 
> On 12/10/24 16:28, Mario Limonciello wrote:
>> On 12/6/2024 15:59, Lizhi Hou wrote:
>>> Add SET_STATE ioctl to configure device power mode for aie2 device.
>>> Three modes are supported initially.
>>>
>>> POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
>>> Management) level to value which has been set by resource solver or
>>> maximum DPM level the device supports.
>>>
>>> POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
>>> level the device supports.
>>>
>>> POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
>>> level the device supports.
>>>
>>> Disabling clock gating means all clocks always run on full speed. And
>>> the different clock frequency are used based on DPM level been set.
>>> Initially, the driver set the power mode to default mode.
>>>
>>> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>>> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>>> Co-developed-by: George Yang <George.Yang@amd.com>
>>> Signed-off-by: George Yang <George.Yang@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> ---
>>>   drivers/accel/amdxdna/Makefile          |   1 +
>>>   drivers/accel/amdxdna/TODO              |   1 -
>>>   drivers/accel/amdxdna/aie2_ctx.c        |   6 ++
>>>   drivers/accel/amdxdna/aie2_message.c    |   9 +-
>>>   drivers/accel/amdxdna/aie2_pci.c        | 136 +++++++++++++++++++-----
>>>   drivers/accel/amdxdna/aie2_pci.h        |  55 ++++++++--
>>>   drivers/accel/amdxdna/aie2_pm.c         | 108 +++++++++++++++++++
>>>   drivers/accel/amdxdna/aie2_smu.c        |  85 +++++++++------
>>>   drivers/accel/amdxdna/aie2_solver.c     |  59 +++++++++-
>>>   drivers/accel/amdxdna/aie2_solver.h     |   1 +
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  19 ++++
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   2 +
>>>   drivers/accel/amdxdna/npu1_regs.c       |  29 +++--
>>>   drivers/accel/amdxdna/npu2_regs.c       |  15 +--
>>>   drivers/accel/amdxdna/npu4_regs.c       |  32 ++++--
>>>   drivers/accel/amdxdna/npu5_regs.c       |  15 +--
>>>   drivers/accel/amdxdna/npu6_regs.c       |  19 ++--
>>>   include/uapi/drm/amdxdna_accel.h        |  52 +++++++++
>>>   18 files changed, 516 insertions(+), 128 deletions(-)
>>>   create mode 100644 drivers/accel/amdxdna/aie2_pm.c
>>>
>>> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/ 
>>> Makefile
>>> index 6baf181298de..0e9adf6890a0 100644
>>> --- a/drivers/accel/amdxdna/Makefile
>>> +++ b/drivers/accel/amdxdna/Makefile
>>> @@ -5,6 +5,7 @@ amdxdna-y := \
>>>       aie2_error.o \
>>>       aie2_message.o \
>>>       aie2_pci.o \
>>> +    aie2_pm.o \
>>>       aie2_psp.o \
>>>       aie2_smu.o \
>>>       aie2_solver.o \
>>> diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
>>> index de4e1dbc8868..5119bccd1917 100644
>>> --- a/drivers/accel/amdxdna/TODO
>>> +++ b/drivers/accel/amdxdna/TODO
>>> @@ -1,4 +1,3 @@
>>>   - Add import and export BO support
>>>   - Add debugfs support
>>>   - Add debug BO support
>>> -- Improve power management
>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/ 
>>> amdxdna/aie2_ctx.c
>>> index 07eecb40767f..6b4e6fcb7794 100644
>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>> @@ -518,6 +518,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>       struct drm_gpu_scheduler *sched;
>>>       struct amdxdna_hwctx_priv *priv;
>>>       struct amdxdna_gem_obj *heap;
>>> +    struct amdxdna_dev_hdl *ndev;
>>>       int i, ret;
>>>         priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
>>> @@ -612,6 +613,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>       }
>>>         hwctx->status = HWCTX_STAT_INIT;
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->hwctx_num++;
>>>         XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>>>   @@ -641,10 +644,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>     void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>>   {
>>> +    struct amdxdna_dev_hdl *ndev;
>>>       struct amdxdna_dev *xdna;
>>>       int idx;
>>>         xdna = hwctx->client->xdna;
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->hwctx_num--;
>>>       drm_sched_wqueue_stop(&hwctx->priv->sched);
>>>         /* Now, scheduler will not send command to device. */
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/ 
>>> amdxdna/aie2_message.c
>>> index fc33a158d223..13b5a96f8d25 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -70,11 +70,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
>>>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>>> u64 value)
>>>   {
>>>       DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
>>> +    int ret;
>>>         req.type = type;
>>>       req.value = value;
>>>   -    return aie2_send_mgmt_msg_wait(ndev, &msg);
>>> +    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", 
>>> ret);
>>> +        return ret;
>>> +    }
>>> +
>>> +    return 0;
>>>   }
>>>     int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>>> u64 *value)
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>> amdxdna/aie2_pci.c
>>> index 83abd16ade11..489744a2e226 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -109,28 +109,26 @@ static int aie2_get_mgmt_chann_info(struct 
>>> amdxdna_dev_hdl *ndev)
>>>       return 0;
>>>   }
>>>   -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
>>> +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>>> +             enum rt_config_category category, u32 *val)
>>>   {
>>> -    const struct rt_config *cfg = &ndev->priv->rt_config;
>>> -    u64 value;
>>> +    const struct rt_config *cfg;
>>> +    u32 value;
>>>       int ret;
>>>   -    ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
>>> -    if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
>>> -             cfg->type, cfg->value);
>>> -        return ret;
>>> -    }
>>> +    for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
>>> +        if (cfg->category != category)
>>> +            continue;
>>>   -    ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
>>> -    if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
>>> -        return ret;
>>> +        value = val ? *val : cfg->value;
>>> +        ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
>>> +        if (ret) {
>>> +            XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
>>> +                 cfg->type, value);
>>> +            return ret;
>>> +        }
>>>       }
>>>   -    if (value != cfg->value)
>>> -        return -EINVAL;
>>> -
>>>       return 0;
>>>   }
>>>   @@ -163,7 +161,7 @@ static int aie2_mgmt_fw_init(struct 
>>> amdxdna_dev_hdl *ndev)
>>>           return ret;
>>>       }
>>>   -    ret = aie2_runtime_cfg(ndev);
>>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>>>       if (ret) {
>>>           XDNA_ERR(ndev->xdna, "Runtime config failed");
>>>           return ret;
>>> @@ -257,9 +255,25 @@ static int aie2_xrs_unload(void *cb_arg)
>>>       return ret;
>>>   }
>>>   +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 
>>> dpm_level)
>>> +{
>>> +    struct amdxdna_dev *xdna = to_xdna_dev(ddev);
>>> +    struct amdxdna_dev_hdl *ndev;
>>> +
>>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>
>> This is a reinvented lockdep_assert_held() no?
>> Or is there some nuance I'm missing?
>>
>> I would suggest switching to lockdep_assert_held().
> 
> lockdep_assert_held() relies on CONFIG_LOCKDEP which might be off.
> 
> And there are similar use cases in drm, e.g.
> 
> In drm_probe_helper.c:
> 
>     drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex));
> 

OK thanks. In that case I don't have any concerns.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

>>
>>> +
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->dft_dpm_level = dpm_level;
>>> +    if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == 
>>> dpm_level)
>>> +        return 0;
>>> +
>>> +    return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>>> +}
>>> +
>>>   static struct xrs_action_ops aie2_xrs_actions = {
>>>       .load = aie2_xrs_load,
>>>       .unload = aie2_xrs_unload,
>>> +    .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
>>>   };
>>>     static void aie2_hw_stop(struct amdxdna_dev *xdna)
>>> @@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>>           goto stop_psp;
>>>       }
>>>   +    ret = aie2_pm_init(ndev);
>>> +    if (ret) {
>>> +        XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
>>> +        goto destroy_mgmt_chann;
>>> +    }
>>> +
>>>       ret = aie2_mgmt_fw_init(ndev);
>>>       if (ret) {
>>>           XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
>>> @@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>       }
>>>       ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>   -    xrs_cfg.clk_list.num_levels = 3;
>>> -    xrs_cfg.clk_list.cu_clk_list[0] = 0;
>>> -    xrs_cfg.clk_list.cu_clk_list[1] = 800;
>>> -    xrs_cfg.clk_list.cu_clk_list[2] = 1000;
>>> +    xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>> +    for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>> +        xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv- 
>>> >dpm_clk_tbl[i].hclk;
>>>       xrs_cfg.sys_eff_factor = 1;
>>>       xrs_cfg.ddev = &xdna->ddev;
>>>       xrs_cfg.actions = &aie2_xrs_actions;
>>> @@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct 
>>> amdxdna_client *client,
>>>       return 0;
>>>   }
>>>   +static int aie2_get_power_mode(struct amdxdna_client *client,
>>> +                   struct amdxdna_drm_get_info *args)
>>> +{
>>> +    struct amdxdna_drm_get_power_mode mode = {};
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +    struct amdxdna_dev_hdl *ndev;
>>> +
>>> +    ndev = xdna->dev_handle;
>>> +    mode.power_mode = ndev->pw_mode;
>>> +
>>> +    if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, 
>>> sizeof(mode)))
>>> +        return -EFAULT;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static int aie2_get_clock_metadata(struct amdxdna_client *client,
>>>                      struct amdxdna_drm_get_info *args)
>>>   {
>>> @@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct 
>>> amdxdna_client *client,
>>>       if (!clock)
>>>           return -ENOMEM;
>>>   -    memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
>>> -           sizeof(clock->mp_npu_clock.name));
>>> -    clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
>>> -    memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock- 
>>> >h_clock.name));
>>> -    clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
>>> +    snprintf(clock->mp_npu_clock.name, sizeof(clock- 
>>> >mp_npu_clock.name),
>>> +         "MP-NPU Clock");
>>> +    clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
>>> +    snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H 
>>> Clock");
>>> +    clock->h_clock.freq_mhz = ndev->hclk_freq;
>>>         if (copy_to_user(u64_to_user_ptr(args->buffer), clock, 
>>> sizeof(*clock)))
>>>           ret = -EFAULT;
>>> @@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client 
>>> *client, struct amdxdna_drm_get_i
>>>       case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
>>>           ret = aie2_get_firmware_version(client, args);
>>>           break;
>>> +    case DRM_AMDXDNA_GET_POWER_MODE:
>>> +        ret = aie2_get_power_mode(client, args);
>>> +        break;
>>>       default:
>>>           XDNA_ERR(xdna, "Not supported request parameter %u", args- 
>>> >param);
>>>           ret = -EOPNOTSUPP;
>>> @@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client 
>>> *client, struct amdxdna_drm_get_i
>>>       return ret;
>>>   }
>>>   +static int aie2_set_power_mode(struct amdxdna_client *client,
>>> +                   struct amdxdna_drm_set_state *args)
>>> +{
>>> +    struct amdxdna_drm_set_power_mode power_state;
>>> +    enum amdxdna_power_mode_type power_mode;
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +
>>> +    if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
>>> +               sizeof(power_state))) {
>>> +        XDNA_ERR(xdna, "Failed to copy power mode request into 
>>> kernel");
>>> +        return -EFAULT;
>>> +    }
>>> +
>>> +    power_mode = power_state.power_mode;
>>> +    if (power_mode > POWER_MODE_TURBO) {
>>> +        XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    return aie2_pm_set_mode(xdna->dev_handle, power_mode);
>>> +}
>>> +
>>> +static int aie2_set_state(struct amdxdna_client *client,
>>> +              struct amdxdna_drm_set_state *args)
>>> +{
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +    int ret, idx;
>>> +
>>> +    if (!drm_dev_enter(&xdna->ddev, &idx))
>>> +        return -ENODEV;
>>> +
>>> +    switch (args->param) {
>>> +    case DRM_AMDXDNA_SET_POWER_MODE:
>>> +        ret = aie2_set_power_mode(client, args);
>>> +        break;
>>> +    default:
>>> +        XDNA_ERR(xdna, "Not supported request parameter %u", args- 
>>> >param);
>>> +        ret = -EOPNOTSUPP;
>>> +        break;
>>> +    }
>>> +
>>> +    drm_dev_exit(idx);
>>> +    return ret;
>>> +}
>>> +
>>>   const struct amdxdna_dev_ops aie2_ops = {
>>>       .init           = aie2_init,
>>>       .fini           = aie2_fini,
>>>       .resume         = aie2_hw_start,
>>>       .suspend        = aie2_hw_stop,
>>>       .get_aie_info   = aie2_get_info,
>>> +    .set_aie_state    = aie2_set_state,
>>>       .hwctx_init     = aie2_hwctx_init,
>>>       .hwctx_fini     = aie2_hwctx_fini,
>>>       .hwctx_config   = aie2_hwctx_config,
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>> amdxdna/aie2_pci.h
>>> index 1c6f07d9b805..8c17b74654ce 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -6,6 +6,7 @@
>>>   #ifndef _AIE2_PCI_H_
>>>   #define _AIE2_PCI_H_
>>>   +#include <drm/amdxdna_accel.h>
>>>   #include <linux/semaphore.h>
>>>     #include "amdxdna_mailbox.h"
>>> @@ -48,9 +49,6 @@
>>>       pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info- 
>>> >mbox_bar); \
>>>   })
>>>   -#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv- 
>>> >smu_mpnpuclk_freq_max)
>>> -#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
>>> -
>>>   enum aie2_smu_reg_idx {
>>>       SMU_CMD_REG = 0,
>>>       SMU_ARG_REG,
>>> @@ -112,14 +110,20 @@ struct aie_metadata {
>>>       struct aie_tile_metadata shim;
>>>   };
>>>   -struct clock_entry {
>>> -    char name[16];
>>> -    u32 freq_mhz;
>>> +enum rt_config_category {
>>> +    AIE2_RT_CFG_INIT,
>>> +    AIE2_RT_CFG_CLK_GATING,
>>>   };
>>>     struct rt_config {
>>>       u32    type;
>>>       u32    value;
>>> +    u32    category;
>>> +};
>>> +
>>> +struct dpm_clk_freq {
>>> +    u32    npuclk;
>>> +    u32    hclk;
>>>   };
>>>     /*
>>> @@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
>>>   };
>>>     enum aie2_dev_status {
>>> +    AIE2_DEV_UNINIT,
>>>       AIE2_DEV_INIT,
>>>       AIE2_DEV_START,
>>>   };
>>> @@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
>>>       u32                total_col;
>>>       struct aie_version        version;
>>>       struct aie_metadata        metadata;
>>> -    struct clock_entry        mp_npu_clock;
>>> -    struct clock_entry        h_clock;
>>> +
>>> +    /* power management and clock*/
>>> +    enum amdxdna_power_mode_type    pw_mode;
>>> +    u32                dpm_level;
>>> +    u32                dft_dpm_level;
>>> +    u32                max_dpm_level;
>>> +    u32                clk_gating;
>>> +    u32                npuclk_freq;
>>> +    u32                hclk_freq;
>>>         /* Mailbox and the management channel */
>>>       struct mailbox            *mbox;
>>> @@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
>>>       struct async_events        *async_events;
>>>         enum aie2_dev_status        dev_status;
>>> +    u32                hwctx_num;
>>>   };
>>>     #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
>>> @@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
>>>       u32    offset;
>>>   };
>>>   +struct aie2_hw_ops {
>>> +    int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +};
>>> +
>>>   struct amdxdna_dev_priv {
>>>       const char            *fw_path;
>>>       u64                protocol_major;
>>>       u64                protocol_minor;
>>> -    struct rt_config        rt_config;
>>> +    const struct rt_config        *rt_config;
>>> +    const struct dpm_clk_freq    *dpm_clk_tbl;
>>> +
>>>   #define COL_ALIGN_NONE   0
>>>   #define COL_ALIGN_NATURE 1
>>>       u32                col_align;
>>> @@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
>>>       struct aie2_bar_off_pair    sram_offs[SRAM_MAX_INDEX];
>>>       struct aie2_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>>       struct aie2_bar_off_pair    smu_regs_off[SMU_MAX_REGS];
>>> -    u32                smu_mpnpuclk_freq_max;
>>> -    u32                smu_hclk_freq_max;
>>> +    struct aie2_hw_ops        hw_ops;
>>>   };
>>>     extern const struct amdxdna_dev_ops aie2_ops;
>>>   +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>>> +             enum rt_config_category category, u32 *val);
>>> +
>>> +/* aie2 npu hw config */
>>> +extern const struct dpm_clk_freq npu1_dpm_clk_table[];
>>> +extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>>> +extern const struct rt_config npu1_default_rt_cfg[];
>>> +extern const struct rt_config npu4_default_rt_cfg[];
>>> +
>>>   /* aie2_smu.c */
>>>   int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>>>   void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
>>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +
>>> +/* aie2_pm.c */
>>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
>>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>>> amdxdna_power_mode_type target);
>>>     /* aie2_psp.c */
>>>   struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct 
>>> psp_config *conf);
>>> diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/ 
>>> aie2_pm.c
>>> new file mode 100644
>>> index 000000000000..426c38fce848
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/aie2_pm.c
>>> @@ -0,0 +1,108 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <drm/amdxdna_accel.h>
>>> +#include <drm/drm_device.h>
>>> +#include <drm/drm_print.h>
>>> +#include <drm/gpu_scheduler.h>
>>> +
>>> +#include "aie2_pci.h"
>>> +#include "amdxdna_pci_drv.h"
>>> +
>>> +#define AIE2_CLK_GATING_ENABLE    1
>>> +#define AIE2_CLK_GATING_DISABLE    0
>>> +
>>> +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 
>>> val)
>>> +{
>>> +    int ret;
>>> +
>>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ndev->clk_gating = val;
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> +    int ret;
>>> +
>>> +    if (ndev->dev_status != AIE2_DEV_UNINIT) {
>>> +        /* Resume device */
>>> +        ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        return 0;
>>> +    }
>>> +
>>> +    while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
>>> +        ndev->max_dpm_level++;
>>> +    ndev->max_dpm_level--;
>>> +
>>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
>>> +    if (ret)
>>> +        return ret;
>>
>> In the event of a failure do you want to try to restore dpm where it was?
> 
> This is initialization routine. If it fails, that indicates a firmware/ 
> hardware issue. It might not need to do more on a broken fw/hw.
> 
> And the driver will not be probe in this case.
> 
> 
> Thanks,
> 
> Lizhi
> 
>>
>>> +
>>> +    ndev->pw_mode = POWER_MODE_DEFAULT;
>>> +    ndev->dft_dpm_level = ndev->max_dpm_level;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>>> amdxdna_power_mode_type target)
>>> +{
>>> +    struct amdxdna_dev *xdna = ndev->xdna;
>>> +    u32 clk_gating, dpm_level;
>>> +    int ret;
>>> +
>>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>
>> lockdep_assert_held()
>>
>>> +
>>> +    if (ndev->pw_mode == target)
>>> +        return 0;
>>> +
>>> +    switch (target) {
>>> +    case POWER_MODE_TURBO:
>>> +        if (ndev->hwctx_num) {
>>> +            XDNA_ERR(xdna, "Can not set turbo when there is active 
>>> hwctx");
>>> +            return -EINVAL;
>>> +        }
>>> +
>>> +        clk_gating = AIE2_CLK_GATING_DISABLE;
>>> +        dpm_level = ndev->max_dpm_level;
>>> +        break;
>>> +    case POWER_MODE_HIGH:
>>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>>> +        dpm_level = ndev->max_dpm_level;
>>> +        break;
>>> +    case POWER_MODE_DEFAULT:
>>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>>> +        dpm_level = ndev->dft_dpm_level;
>>> +        break;
>>> +    default:
>>> +        return -EOPNOTSUPP;
>>> +    }
>>> +
>>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ret = aie2_pm_set_clk_gating(ndev, clk_gating);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ndev->pw_mode = target;
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/ 
>>> amdxdna/aie2_smu.c
>>> index 91893d438da7..73388443c676 100644
>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>> @@ -19,8 +19,11 @@
>>>   #define AIE2_SMU_POWER_OFF        0x4
>>>   #define AIE2_SMU_SET_MPNPUCLK_FREQ    0x5
>>>   #define AIE2_SMU_SET_HCLK_FREQ        0x6
>>> +#define AIE2_SMU_SET_SOFT_DPMLEVEL    0x7
>>> +#define AIE2_SMU_SET_HARD_DPMLEVEL    0x8
>>>   -static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 
>>> reg_cmd, u32 reg_arg)
>>> +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
>>> +             u32 reg_arg, u32 *out)
>>>   {
>>>       u32 resp;
>>>       int ret;
>>> @@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>>> *ndev, u32 reg_cmd, u32 reg_arg)
>>>           return ret;
>>>       }
>>>   +    if (out)
>>> +        *out = readl(SMU_REG(ndev, SMU_OUT_REG));
>>> +
>>>       if (resp != SMU_RESULT_OK) {
>>>           XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, 
>>> resp);
>>>           return -EINVAL;
>>> @@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>>> *ndev, u32 reg_cmd, u32 reg_arg)
>>>       return 0;
>>>   }
>>>   -static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl 
>>> *ndev, u32 freq_mhz)
>>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>   {
>>> +    u32 freq;
>>>       int ret;
>>>   -    if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
>>> -        XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
>>> -        return -EINVAL;
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>> + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>> +             ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>>       }
>>> +    ndev->npuclk_freq = freq;
>>>   -    ndev->mp_npu_clock.freq_mhz = freq_mhz;
>>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
>>> -    if (!ret)
>>> -        XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", 
>>> freq_mhz);
>>> -
>>> -    return ret;
>>> -}
>>> -
>>> -static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, 
>>> u32 freq_mhz)
>>> -{
>>> -    int ret;
>>> -
>>> -    if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
>>> -        XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
>>> -        return -EINVAL;
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
>>> +                ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>> +             ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>>       }
>>> +    ndev->hclk_freq = freq;
>>> +    ndev->dpm_level = dpm_level;
>>>   -    ndev->h_clock.freq_mhz = freq_mhz;
>>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
>>> -    if (!ret)
>>> -        XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", 
>>> freq_mhz);
>>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>>> +         ndev->npuclk_freq, ndev->hclk_freq);
>>>   -    return ret;
>>> +    return 0;
>>>   }
>>>   -int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>   {
>>>       int ret;
>>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, 
>>> NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>>> +             dpm_level, ret);
>>>           return ret;
>>>       }
>>>   -    ret = aie2_smu_set_mpnpu_clock_freq(ndev, 
>>> SMU_MPNPUCLK_FREQ_MAX(ndev));
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, 
>>> NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>> +             dpm_level, ret);
>>>           return ret;
>>>       }
>>> -    snprintf(ndev->mp_npu_clock.name, sizeof(ndev- 
>>> >mp_npu_clock.name), "MP-NPU Clock");
>>>   -    ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
>>> +    ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>> +    ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>> +    ndev->dpm_level = dpm_level;
>>> +
>>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>>> +         ndev->npuclk_freq, ndev->hclk_freq);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> +    int ret;
>>> +
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>>>           return ret;
>>>       }
>>> -    snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H 
>>> Clock");
>>>         return 0;
>>>   }
>>> @@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>>>   {
>>>       int ret;
>>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
>>> +    ndev->priv->hw_ops.set_dpm(ndev, 0);
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
>>>       if (ret)
>>>           XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
>>>   }
>>> diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/ 
>>> amdxdna/aie2_solver.c
>>> index a537c66589a4..1939625d6027 100644
>>> --- a/drivers/accel/amdxdna/aie2_solver.c
>>> +++ b/drivers/accel/amdxdna/aie2_solver.c
>>> @@ -25,6 +25,7 @@ struct solver_node {
>>>         struct partition_node    *pt_node;
>>>       void            *cb_arg;
>>> +    u32            dpm_level;
>>>       u32            cols_len;
>>>       u32            start_cols[] __counted_by(cols_len);
>>>   };
>>> @@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, 
>>> struct alloc_requests *req)
>>>       return 0;
>>>   }
>>>   +static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
>>> +{
>>> +    /*
>>> +     * gops is retrieved from the xmodel, so it's always set
>>> +     * fps and latency are the configurable params from the application
>>> +     */
>>> +    if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
>>> +        return true;
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static int set_dpm_level(struct solver_state *xrs, struct 
>>> alloc_requests *req, u32 *dpm_level)
>>> +{
>>> +    struct solver_rgroup *rgp = &xrs->rgp;
>>> +    struct cdo_parts *cdop = &req->cdo;
>>> +    struct aie_qos *rqos = &req->rqos;
>>> +    u32 freq, max_dpm_level, level;
>>> +    struct solver_node *node;
>>> +
>>> +    max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
>>> +    /* If no QoS parameters are passed, set it to the max DPM level */
>>> +    if (!is_valid_qos_dpm_params(rqos)) {
>>> +        level = max_dpm_level;
>>> +        goto set_dpm;
>>> +    }
>>> +
>>> +    /* Find one CDO group that meet the GOPs requirement. */
>>> +    for (level = 0; level < max_dpm_level; level++) {
>>> +        freq = xrs->cfg.clk_list.cu_clk_list[level];
>>> +        if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
>>> +            break;
>>> +    }
>>> +
>>> +    /* set the dpm level which fits all the sessions */
>>> +    list_for_each_entry(node, &rgp->node_list, list) {
>>> +        if (node->dpm_level > level)
>>> +            level = node->dpm_level;
>>> +    }
>>> +
>>> +set_dpm:
>>> +    *dpm_level = level;
>>> +    return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
>>> +}
>>> +
>>>   static struct solver_node *rg_search_node(struct solver_rgroup 
>>> *rgp, u64 rid)
>>>   {
>>>       struct solver_node *node;
>>> @@ -159,12 +205,9 @@ static int get_free_partition(struct 
>>> solver_state *xrs,
>>>       pt_node->ncols = ncols;
>>>         /*
>>> -     * Before fully support latency in QoS, if a request
>>> -     * specifies a non-zero latency value, it will not share
>>> -     * the partition with other requests.
>>> +     * Always set exclusive to false for now.
>>>        */
>>> -    if (req->rqos.latency)
>>> -        pt_node->exclusive = true;
>>> +    pt_node->exclusive = false;
>>>         list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
>>>       xrs->rgp.npartition_node++;
>>> @@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct 
>>> alloc_requests *req, void *cb_arg)
>>>       struct xrs_action_load load_act;
>>>       struct solver_node *snode;
>>>       struct solver_state *xrs;
>>> +    u32 dpm_level;
>>>       int ret;
>>>         xrs = (struct solver_state *)hdl;
>>> @@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct 
>>> alloc_requests *req, void *cb_arg)
>>>       if (ret)
>>>           goto free_node;
>>>   +    ret = set_dpm_level(xrs, req, &dpm_level);
>>> +    if (ret)
>>> +        goto free_node;
>>> +
>>> +    snode->dpm_level = dpm_level;
>>>       snode->cb_arg = cb_arg;
>>>         drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
>>> diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/ 
>>> amdxdna/aie2_solver.h
>>> index 9b1847bb46a6..a2e3c52229e9 100644
>>> --- a/drivers/accel/amdxdna/aie2_solver.h
>>> +++ b/drivers/accel/amdxdna/aie2_solver.h
>>> @@ -99,6 +99,7 @@ struct clk_list_info {
>>>   struct xrs_action_ops {
>>>       int (*load)(void *cb_arg, struct xrs_action_load *action);
>>>       int (*unload)(void *cb_arg);
>>> +    int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
>>>   };
>>>     /*
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.c
>>> index c3541796d189..6bbd437d48d8 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct 
>>> drm_device *dev, void *data, struct
>>>       return ret;
>>>   }
>>>   +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>> +{
>>> +    struct amdxdna_client *client = filp->driver_priv;
>>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>>> +    struct amdxdna_drm_set_state *args = data;
>>> +    int ret;
>>> +
>>> +    if (!xdna->dev_info->ops->set_aie_state)
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    XDNA_DBG(xdna, "Request parameter %u", args->param);
>>> +    mutex_lock(&xdna->dev_lock);
>>> +    ret = xdna->dev_info->ops->set_aie_state(client, args);
>>> +    mutex_unlock(&xdna->dev_lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>>       /* Context */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, 
>>> amdxdna_drm_create_hwctx_ioctl, 0),
>>> @@ -173,6 +191,7 @@ static const struct drm_ioctl_desc 
>>> amdxdna_drm_ioctls[] = {
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, 
>>> amdxdna_drm_submit_cmd_ioctl, 0),
>>>       /* AIE hardware */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 
>>> 0),
>>> +    DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, 
>>> amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
>>>   };
>>>     static const struct file_operations amdxdna_fops = {
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.h
>>> index f5b830fb14bb..e2071e31d949 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
>>>   struct amdxdna_client;
>>>   struct amdxdna_dev;
>>>   struct amdxdna_drm_get_info;
>>> +struct amdxdna_drm_set_state;
>>>   struct amdxdna_gem_obj;
>>>   struct amdxdna_hwctx;
>>>   struct amdxdna_sched_job;
>>> @@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
>>>       void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
>>>       int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct 
>>> amdxdna_sched_job *job, u64 *seq);
>>>       int (*get_aie_info)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_get_info *args);
>>> +    int (*set_aie_state)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_set_state *args);
>>>   };
>>>     /*
>>> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/ 
>>> amdxdna/npu1_regs.c
>>> index f00c50461b09..c8f4d1cac65d 100644
>>> --- a/drivers/accel/amdxdna/npu1_regs.c
>>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>>> @@ -44,18 +44,30 @@
>>>   #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
>>>   #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
>>>   -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
>>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
>>> +const struct rt_config npu1_default_rt_cfg[] = {
>>> +    { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 0 },
>>> +};
>>>   -#define NPU1_MPNPUCLK_FREQ_MAX  600
>>> -#define NPU1_HCLK_FREQ_MAX      1024
>>> +const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>>> +    {400, 800},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {720, 1309},
>>> +    {720, 1309},
>>> +    {847, 1600},
>>> +    { 0 }
>>> +};
>>>     const struct amdxdna_dev_priv npu1_dev_priv = {
>>>       .fw_path        = "amdnpu/1502_00/npu.sbin",
>>>       .protocol_major = 0x5,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU1_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU1_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu1_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu1_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NONE,
>>>       .mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu1_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu1_info = {
>>> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/ 
>>> amdxdna/npu2_regs.c
>>> index 00cb381031d2..ac63131f9c7c 100644
>>> --- a/drivers/accel/amdxdna/npu2_regs.c
>>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>>> @@ -61,18 +61,12 @@
>>>   #define NPU2_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU2_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU2_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU2_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu2_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_00/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU2_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU2_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
>>> +    .hw_ops    =     {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu2_info = {
>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>> amdxdna/npu4_regs.c
>>> index b6dae9667cca..a713ac18adfc 100644
>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>> @@ -61,18 +61,33 @@
>>>   #define NPU4_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU4_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
>>> +const struct rt_config npu4_default_rt_cfg[] = {
>>> +    { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 0 },
>>> +};
>>>   -#define NPU4_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU4_HCLK_FREQ_MAX      1800
>>> +const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>> +    {396, 792},
>>> +    {600, 1056},
>>> +    {792, 1152},
>>> +    {975, 1267},
>>> +    {975, 1267},
>>> +    {1056, 1408},
>>> +    {1152, 1584},
>>> +    {1267, 1800},
>>> +    { 0 }
>>> +};
>>>     const struct amdxdna_dev_priv npu4_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU4_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU4_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu4_info = {
>>> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/ 
>>> amdxdna/npu5_regs.c
>>> index bed1baf8e160..67a5d5bc8a49 100644
>>> --- a/drivers/accel/amdxdna/npu5_regs.c
>>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>>> @@ -61,18 +61,12 @@
>>>   #define NPU5_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU5_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU5_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU5_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu5_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_11/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU5_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU5_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu5_info = {
>>> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/ 
>>> amdxdna/npu6_regs.c
>>> index d1168fc55533..f46c760cefc7 100644
>>> --- a/drivers/accel/amdxdna/npu6_regs.c
>>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>>> @@ -61,23 +61,12 @@
>>>   #define NPU6_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU6_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
>>> -
>>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
>>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
>>> -
>>> -#define NPU6_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU6_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu6_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 12,
>>> -    .rt_config    = {NPU6_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU6_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>>>       },
>>> +    .hw_ops         = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>> +
>>>   };
>>>     const struct amdxdna_dev_info dev_npu6_info = {
>>> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/ 
>>> amdxdna_accel.h
>>> index 4f15e53a548d..9af9302baf90 100644
>>> --- a/include/uapi/drm/amdxdna_accel.h
>>> +++ b/include/uapi/drm/amdxdna_accel.h
>>> @@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
>>>       DRM_AMDXDNA_SYNC_BO,
>>>       DRM_AMDXDNA_EXEC_CMD,
>>>       DRM_AMDXDNA_GET_INFO,
>>> +    DRM_AMDXDNA_SET_STATE,
>>>   };
>>>     /**
>>> @@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
>>>       __u64 errors;
>>>   };
>>>   +enum amdxdna_power_mode_type {
>>> +    POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
>>> +    POWER_MODE_LOW,     /* Set frequency to lowest DPM */
>>> +    POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
>>> +    POWER_MODE_HIGH,    /* Set frequency to highest DPM */
>>> +    POWER_MODE_TURBO,   /* Maximum power */
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
>>> + * @power_mode: The mode type from enum amdxdna_power_mode_type
>>> + * @pad: MBZ.
>>> + */
>>> +struct amdxdna_drm_get_power_mode {
>>> +    __u8 power_mode;
>>> +    __u8 pad[7];
>>> +};
>>> +
>>>   /**
>>>    * struct amdxdna_drm_query_firmware_version - Query the firmware 
>>> version
>>>    * @major: The major version number
>>> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>>>       DRM_AMDXDNA_QUERY_SENSORS,
>>>       DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>>>       DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
>>> +    DRM_AMDXDNA_GET_POWER_MODE,
>>>   };
>>>     /**
>>> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>>>       __u64 buffer; /* in/out */
>>>   };
>>>   +enum amdxdna_drm_set_param {
>>> +    DRM_AMDXDNA_SET_POWER_MODE,
>>> +    DRM_AMDXDNA_WRITE_AIE_MEM,
>>> +    DRM_AMDXDNA_WRITE_AIE_REG,
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
>>> + * @param: Value in enum amdxdna_drm_set_param.
>>> + * @buffer_size: Size of the input param.
>>> + * @buffer: Input param.
>>> + */
>>> +struct amdxdna_drm_set_state {
>>> +    __u32 param; /* in */
>>> +    __u32 buffer_size; /* in */
>>> +    __u64 buffer; /* in */
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE 
>>> hardware
>>> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
>>> + * @pad: MBZ.
>>> + */
>>> +struct amdxdna_drm_set_power_mode {
>>> +    __u8 power_mode;
>>> +    __u8 pad[7];
>>> +};
>>> +
>>>   #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
>>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
>>>            struct amdxdna_drm_create_hwctx)
>>> @@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
>>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
>>>            struct amdxdna_drm_get_info)
>>>   +#define DRM_IOCTL_AMDXDNA_SET_STATE \
>>> +    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
>>> +         struct amdxdna_drm_set_state)
>>> +
>>>   #if defined(__cplusplus)
>>>   } /* extern c end */
>>>   #endif
>>


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c
  2024-12-06 22:00 ` [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c Lizhi Hou
  2024-12-10 20:54   ` Mario Limonciello
@ 2024-12-13 16:20   ` Jeffrey Hugo
  2024-12-13 16:42     ` Lizhi Hou
  1 sibling, 1 reply; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:20 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Mike Lothian

On 12/6/2024 3:00 PM, Lizhi Hou wrote:
> For amdxdna_mailbox.c, linux/interrupt.h is indirectly included by
> trace/events/amdxdna.h. So if TRACING is disabled, driver compiling will
> fail.
> 
> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
> Reported-by: Mike Lothian <mike@fireburn.co.uk>
> Closes: https://lore.kernel.org/dri-devel/CAHbf0-E+Z2O7rW-x+-EKNQ-nLbf=_ohaNzXxE7WD2cj9kFJERQ@mail.gmail.com/
> Signed-off-by: Mike Lothian <mike@fireburn.co.uk>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Slight problem with the SOB chain which prevents me from applying this.

Mike comes first, which indicates he authored this patch.  Then you, 
which says you took Mike's patch and are moving it forward.  However, 
since you sent it and it doesn't have "From: Mike", you get listed as 
author.

Either Mike needs to be the listed author, or a Co-developed-by tag for 
Mike needs to be listed immediately before his SOB.

With either of those:

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices
  2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
  2024-12-11  0:31   ` Mario Limonciello
@ 2024-12-13 16:31   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:31 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Add device status to track if aie2_hw_start() or aie2_hw_stop() is
> re-entered. In aie2_hw_stop(), call drmm_kfree to free mbox.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock
  2024-12-06 21:59 ` [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock Lizhi Hou
@ 2024-12-13 16:33   ` Jeffrey Hugo
  0 siblings, 0 replies; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:33 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Hardware mailbox message receiving handler calls mmput to release the
> process mm. If the process has already exited, the mmput here may call mmu
> notifier handler, amdxdna_hmm_invalidate, which will cause a dead lock.
> Using mmput_async instead prevents this dead lock.
> 
> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support
  2024-12-06 21:59 ` [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support Lizhi Hou
  2024-12-11  0:30   ` Mario Limonciello
@ 2024-12-13 16:37   ` Jeffrey Hugo
  2024-12-13 16:47     ` Lizhi Hou
  1 sibling, 1 reply; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:37 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Xiaoming Ren

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Add NPU6 registers and other private configurations.
> 
> Co-developed-by: Xiaoming Ren <xiaoming.ren@amd.com>
> Signed-off-by: Xiaoming Ren <xiaoming.ren@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/Makefile    |   3 +-
>   drivers/accel/amdxdna/npu6_regs.c | 121 ++++++++++++++++++++++++++++++

This looks like dead code to me. I would expect somewhere else in the 
driver, dev_npu6_info would be used, but that is not the case.  What am 
I missing?

-Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c
  2024-12-13 16:20   ` Jeffrey Hugo
@ 2024-12-13 16:42     ` Lizhi Hou
  0 siblings, 0 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-13 16:42 UTC (permalink / raw)
  To: Jeffrey Hugo, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Mike Lothian


On 12/13/24 08:20, Jeffrey Hugo wrote:
> On 12/6/2024 3:00 PM, Lizhi Hou wrote:
>> For amdxdna_mailbox.c, linux/interrupt.h is indirectly included by
>> trace/events/amdxdna.h. So if TRACING is disabled, driver compiling will
>> fail.
>>
>> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
>> Reported-by: Mike Lothian <mike@fireburn.co.uk>
>> Closes: 
>> https://lore.kernel.org/dri-devel/CAHbf0-E+Z2O7rW-x+-EKNQ-nLbf=_ohaNzXxE7WD2cj9kFJERQ@mail.gmail.com/
>> Signed-off-by: Mike Lothian <mike@fireburn.co.uk>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>
> Slight problem with the SOB chain which prevents me from applying this.
>
> Mike comes first, which indicates he authored this patch.  Then you, 
> which says you took Mike's patch and are moving it forward. However, 
> since you sent it and it doesn't have "From: Mike", you get listed as 
> author.
>
> Either Mike needs to be the listed author, or a Co-developed-by tag 
> for Mike needs to be listed immediately before his SOB.
>
> With either of those:
>
> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

Got it. I added "From: Mike" and created a standalone patch for this 
fix. 
https://lore.kernel.org/dri-devel/20241213163856.1472207-1-lizhi.hou@amd.com/


Thanks,

Lizhi


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray
  2024-12-06 21:59 ` [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray Lizhi Hou
@ 2024-12-13 16:42   ` Jeffrey Hugo
  2024-12-13 16:50     ` Lizhi Hou
  0 siblings, 1 reply; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:42 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Switch mailbox message id and hardware context id management over from
> the idr api to the xarray api.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Implementation looks sane, however you appear to be missing several 
instances of #include <linux/xarray.h>

-Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support
  2024-12-13 16:37   ` Jeffrey Hugo
@ 2024-12-13 16:47     ` Lizhi Hou
  0 siblings, 0 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-13 16:47 UTC (permalink / raw)
  To: Jeffrey Hugo, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Xiaoming Ren


On 12/13/24 08:37, Jeffrey Hugo wrote:
> On 12/6/2024 2:59 PM, Lizhi Hou wrote:
>> Add NPU6 registers and other private configurations.
>>
>> Co-developed-by: Xiaoming Ren <xiaoming.ren@amd.com>
>> Signed-off-by: Xiaoming Ren <xiaoming.ren@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/Makefile    |   3 +-
>>   drivers/accel/amdxdna/npu6_regs.c | 121 ++++++++++++++++++++++++++++++
>
> This looks like dead code to me. I would expect somewhere else in the 
> driver, dev_npu6_info would be used, but that is not the case.  What 
> am I missing?

You are correct. I miss merged one line to the patch in amdxdna_idx[]..  
Thanks a lot.


Lizhi

>
> -Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 5/8] accel/amdxdna: Add query firmware version
  2024-12-06 21:59 ` [PATCH V2 5/8] accel/amdxdna: Add query firmware version Lizhi Hou
  2024-12-11  0:28   ` Mario Limonciello
@ 2024-12-13 16:48   ` Jeffrey Hugo
  1 sibling, 0 replies; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:48 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Enhance GET_INFO ioctl to support retrieving firmware version.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

Warning below through.

>   enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_AIE_STATUS,
>   	DRM_AMDXDNA_QUERY_AIE_METADATA,
> @@ -382,7 +396,7 @@ enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_CLOCK_METADATA,
>   	DRM_AMDXDNA_QUERY_SENSORS,
>   	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
> -	DRM_AMDXDNA_NUM_GET_PARAM,
> +	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,

This would normally be considered breaking the uAPI, but since this 
driver is not yet in a released kernel, this is allowed. Keep this in 
mind for the future (can add things, but not remove them).

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray
  2024-12-13 16:42   ` Jeffrey Hugo
@ 2024-12-13 16:50     ` Lizhi Hou
  0 siblings, 0 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-13 16:50 UTC (permalink / raw)
  To: Jeffrey Hugo, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello


On 12/13/24 08:42, Jeffrey Hugo wrote:
> On 12/6/2024 2:59 PM, Lizhi Hou wrote:
>> Switch mailbox message id and hardware context id management over from
>> the idr api to the xarray api.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>
> Implementation looks sane, however you appear to be missing several 
> instances of #include <linux/xarray.h>

Sure. I will add them.


Thanks,

Lizhi

>
> -Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-06 21:59 ` [PATCH V2 6/8] accel/amdxdna: Enhance power management settings Lizhi Hou
  2024-12-11  0:28   ` Mario Limonciello
@ 2024-12-13 16:55   ` Jeffrey Hugo
  2024-12-13 17:31     ` Lizhi Hou
  1 sibling, 1 reply; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:55 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Narendra Gutta, George Yang

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> +/**
> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
> + * @power_mode: The mode type from enum amdxdna_power_mode_type
> + * @pad: MBZ.

I don't see a check for zero in the implementation

> + */
> +struct amdxdna_drm_get_power_mode {
> +	__u8 power_mode;
> +	__u8 pad[7];
> +};
> +
>   /**
>    * struct amdxdna_drm_query_firmware_version - Query the firmware version
>    * @major: The major version number
> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>   	DRM_AMDXDNA_QUERY_SENSORS,
>   	DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>   	DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
> +	DRM_AMDXDNA_GET_POWER_MODE,
>   };
>   
>   /**
> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>   	__u64 buffer; /* in/out */
>   };
>   
> +enum amdxdna_drm_set_param {
> +	DRM_AMDXDNA_SET_POWER_MODE,
> +	DRM_AMDXDNA_WRITE_AIE_MEM,
> +	DRM_AMDXDNA_WRITE_AIE_REG,
> +};
> +
> +/**
> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
> + * @param: Value in enum amdxdna_drm_set_param.
> + * @buffer_size: Size of the input param.
> + * @buffer: Input param.

Is this a pointer address?  Maybe clarify that?

> + */
> +struct amdxdna_drm_set_state {
> +	__u32 param; /* in */
> +	__u32 buffer_size; /* in */
> +	__u64 buffer; /* in */
> +};
> +
> +/**
> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
> + * @pad: MBZ.

I don't see a check for zero in the implementation.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-06 22:00 ` [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers Lizhi Hou
  2024-12-11  0:20   ` Mario Limonciello
@ 2024-12-13 16:58   ` Jeffrey Hugo
  2024-12-13 17:02     ` Lizhi Hou
  1 sibling, 1 reply; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 16:58 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 3:00 PM, Lizhi Hou wrote:
> The latest released firmware supports reading firmware interface version
> from registers directly. The driver's probe routine reads the major and
> minor version numbers. If the firmware interface does not compatible with
> the driver, the driver's probe routine returns failure.
> 
> Co-developed-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_message.c | 26 ----------
>   drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
>   drivers/accel/amdxdna/aie2_pci.h     |  6 +--
>   drivers/accel/amdxdna/npu1_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu2_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu4_regs.c    |  2 +-
>   drivers/accel/amdxdna/npu5_regs.c    |  2 +-

Do you need an update to npu6_regs?

-Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 0/8] AMD NPU driver improvements
  2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
                   ` (7 preceding siblings ...)
  2024-12-06 22:00 ` [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c Lizhi Hou
@ 2024-12-13 17:00 ` Jeffrey Hugo
  8 siblings, 0 replies; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 17:00 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/6/2024 2:59 PM, Lizhi Hou wrote:
> Add recent improvements and bug fixes for amdxdna driver (depends on [1])
> 1. Support recent hardware and firmware.
> 2. Replace idr APIs with xarray.
> 3. Fix the bugs been found.
> 
> [1]: https://lore.kernel.org/all/20241118172942.2014541-1-lizhi.hou@amd.com/
> 
> Changes since v1:
> - Add one patch to fix possible compiling failure
> - Minor fixes for code review comments

Patch 1 and 2 applied to drm-misc-next.  You can drop them from the 
series when you post v3.

-Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-13 16:58   ` Jeffrey Hugo
@ 2024-12-13 17:02     ` Lizhi Hou
  2024-12-13 17:11       ` Jeffrey Hugo
  0 siblings, 1 reply; 33+ messages in thread
From: Lizhi Hou @ 2024-12-13 17:02 UTC (permalink / raw)
  To: Jeffrey Hugo, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello


On 12/13/24 08:58, Jeffrey Hugo wrote:
> On 12/6/2024 3:00 PM, Lizhi Hou wrote:
>> The latest released firmware supports reading firmware interface version
>> from registers directly. The driver's probe routine reads the major and
>> minor version numbers. If the firmware interface does not compatible 
>> with
>> the driver, the driver's probe routine returns failure.
>>
>> Co-developed-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_message.c | 26 ----------
>>   drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
>>   drivers/accel/amdxdna/aie2_pci.h     |  6 +--
>>   drivers/accel/amdxdna/npu1_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu2_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu4_regs.c    |  2 +-
>>   drivers/accel/amdxdna/npu5_regs.c    |  2 +-
>
> Do you need an update to npu6_regs?

npu6_regs is added with upstream fw minor already. Thus, it does not 
need an update.


Lizhi

>
> -Jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers
  2024-12-13 17:02     ` Lizhi Hou
@ 2024-12-13 17:11       ` Jeffrey Hugo
  0 siblings, 0 replies; 33+ messages in thread
From: Jeffrey Hugo @ 2024-12-13 17:11 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello

On 12/13/2024 10:02 AM, Lizhi Hou wrote:
> 
> On 12/13/24 08:58, Jeffrey Hugo wrote:
>> On 12/6/2024 3:00 PM, Lizhi Hou wrote:
>>> The latest released firmware supports reading firmware interface version
>>> from registers directly. The driver's probe routine reads the major and
>>> minor version numbers. If the firmware interface does not compatible 
>>> with
>>> the driver, the driver's probe routine returns failure.
>>>
>>> Co-developed-by: Min Ma <min.ma@amd.com>
>>> Signed-off-by: Min Ma <min.ma@amd.com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> ---
>>>   drivers/accel/amdxdna/aie2_message.c | 26 ----------
>>>   drivers/accel/amdxdna/aie2_pci.c     | 74 ++++++++++++++++++++++------
>>>   drivers/accel/amdxdna/aie2_pci.h     |  6 +--
>>>   drivers/accel/amdxdna/npu1_regs.c    |  2 +-
>>>   drivers/accel/amdxdna/npu2_regs.c    |  2 +-
>>>   drivers/accel/amdxdna/npu4_regs.c    |  2 +-
>>>   drivers/accel/amdxdna/npu5_regs.c    |  2 +-
>>
>> Do you need an update to npu6_regs?
> 
> npu6_regs is added with upstream fw minor already. Thus, it does not 
> need an update.

Ok.

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>

I'm expecting to see a v3 based on the other review feedback. Let me 
know if this is incorrect.

-Jeff


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings
  2024-12-13 16:55   ` Jeffrey Hugo
@ 2024-12-13 17:31     ` Lizhi Hou
  0 siblings, 0 replies; 33+ messages in thread
From: Lizhi Hou @ 2024-12-13 17:31 UTC (permalink / raw)
  To: Jeffrey Hugo, ogabbay, dri-devel
  Cc: linux-kernel, min.ma, max.zhen, sonal.santan, king.tam,
	mario.limonciello, Narendra Gutta, George Yang


On 12/13/24 08:55, Jeffrey Hugo wrote:
> On 12/6/2024 2:59 PM, Lizhi Hou wrote:
>> +/**
>> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
>> + * @power_mode: The mode type from enum amdxdna_power_mode_type
>> + * @pad: MBZ.
>
> I don't see a check for zero in the implementation
We discussed the 'pad' field in uapi structures. Because the driver is 
not released yet, we would create a patch to change all 'pad' to MBZ and 
check them in the ioctl.
>
>> + */
>> +struct amdxdna_drm_get_power_mode {
>> +    __u8 power_mode;
>> +    __u8 pad[7];
>> +};
>> +
>>   /**
>>    * struct amdxdna_drm_query_firmware_version - Query the firmware 
>> version
>>    * @major: The major version number
>> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>>       DRM_AMDXDNA_QUERY_SENSORS,
>>       DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>>       DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
>> +    DRM_AMDXDNA_GET_POWER_MODE,
>>   };
>>     /**
>> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>>       __u64 buffer; /* in/out */
>>   };
>>   +enum amdxdna_drm_set_param {
>> +    DRM_AMDXDNA_SET_POWER_MODE,
>> +    DRM_AMDXDNA_WRITE_AIE_MEM,
>> +    DRM_AMDXDNA_WRITE_AIE_REG,
>> +};
>> +
>> +/**
>> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
>> + * @param: Value in enum amdxdna_drm_set_param.
>> + * @buffer_size: Size of the input param.
>> + * @buffer: Input param.
>
> Is this a pointer address?  Maybe clarify that?

Yes, it is. I will add comment for it.


Thanks,

Lizhi

>
>> + */
>> +struct amdxdna_drm_set_state {
>> +    __u32 param; /* in */
>> +    __u32 buffer_size; /* in */
>> +    __u64 buffer; /* in */
>> +};
>> +
>> +/**
>> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE 
>> hardware
>> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
>> + * @pad: MBZ.
>
> I don't see a check for zero in the implementation.
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2024-12-13 17:31 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-06 21:59 [PATCH V2 0/8] AMD NPU driver improvements Lizhi Hou
2024-12-06 21:59 ` [PATCH V2 1/8] accel/amdxdna: Add device status for aie2 devices Lizhi Hou
2024-12-11  0:31   ` Mario Limonciello
2024-12-13 16:31   ` Jeffrey Hugo
2024-12-06 21:59 ` [PATCH V2 2/8] accel/amdxdna: Replace mmput with mmput_async to avoid dead lock Lizhi Hou
2024-12-13 16:33   ` Jeffrey Hugo
2024-12-06 21:59 ` [PATCH V2 3/8] accel/amdxdna: Add RyzenAI-npu6 support Lizhi Hou
2024-12-11  0:30   ` Mario Limonciello
2024-12-13 16:37   ` Jeffrey Hugo
2024-12-13 16:47     ` Lizhi Hou
2024-12-06 21:59 ` [PATCH V2 4/8] accel/amdxdna: Replace idr api with xarray Lizhi Hou
2024-12-13 16:42   ` Jeffrey Hugo
2024-12-13 16:50     ` Lizhi Hou
2024-12-06 21:59 ` [PATCH V2 5/8] accel/amdxdna: Add query firmware version Lizhi Hou
2024-12-11  0:28   ` Mario Limonciello
2024-12-13 16:48   ` Jeffrey Hugo
2024-12-06 21:59 ` [PATCH V2 6/8] accel/amdxdna: Enhance power management settings Lizhi Hou
2024-12-11  0:28   ` Mario Limonciello
2024-12-11  5:28     ` Lizhi Hou
2024-12-11 20:55       ` Mario Limonciello
2024-12-13 16:55   ` Jeffrey Hugo
2024-12-13 17:31     ` Lizhi Hou
2024-12-06 22:00 ` [PATCH V2 7/8] accel/amdxdna: Read firmware interface version from registers Lizhi Hou
2024-12-11  0:20   ` Mario Limonciello
2024-12-11  5:32     ` Lizhi Hou
2024-12-13 16:58   ` Jeffrey Hugo
2024-12-13 17:02     ` Lizhi Hou
2024-12-13 17:11       ` Jeffrey Hugo
2024-12-06 22:00 ` [PATCH V2 8/8] accel/amdxdna: Add include interrupt.h to amdxdna_mailbox.c Lizhi Hou
2024-12-10 20:54   ` Mario Limonciello
2024-12-13 16:20   ` Jeffrey Hugo
2024-12-13 16:42     ` Lizhi Hou
2024-12-13 17:00 ` [PATCH V2 0/8] AMD NPU driver improvements Jeffrey Hugo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox