[PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ

dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
@ 2025-06-05 16:20 Maciej Falkowski
  2025-06-06 16:30 ` Jeff Hugo
  2025-06-16 11:39 ` Jacek Lawrynowicz
  0 siblings, 2 replies; 6+ messages in thread
From: Maciej Falkowski @ 2025-06-05 16:20 UTC (permalink / raw)
  To: dri-devel
  Cc: oded.gabbay, jeff.hugo, jacek.lawrynowicz, lizhi.hou,
	Andrzej Kacprowski, Maciej Falkowski

From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>

Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,
enabling turbo mode for jobs submitted via the command queue.
Turbo mode allows jobs to run at higher frequencies,
potentially improving performance for demanding workloads.

The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag
to allow test mode to explicitly disable turbo mode
requested by the application.
The IVPU_TEST_MODE_TURBO mode has been renamed to
IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.

Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
---
 drivers/accel/ivpu/ivpu_drv.h | 11 ++---
 drivers/accel/ivpu/ivpu_job.c | 81 +++++++++++++++++++++++------------
 include/uapi/drm/ivpu_accel.h | 14 ++++++
 3 files changed, 73 insertions(+), 33 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 17aa3532c76d..62ab1c654e63 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #ifndef __IVPU_DRV_H__
@@ -209,10 +209,11 @@ extern bool ivpu_force_snoop;
 #define IVPU_TEST_MODE_D0I3_MSG_ENABLE    BIT(5)
 #define IVPU_TEST_MODE_MIP_DISABLE        BIT(6)
 #define IVPU_TEST_MODE_DISABLE_TIMEOUTS   BIT(8)
-#define IVPU_TEST_MODE_TURBO		  BIT(9)
-#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10)
-#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(11)
-#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(12)
+#define IVPU_TEST_MODE_TURBO_ENABLE       BIT(9)
+#define IVPU_TEST_MODE_TURBO_DISABLE      BIT(10)
+#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11)
+#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(12)
+#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(13)
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index fae8351aa330..060f1fc031d3 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
  */
 
 #include <drm/drm_file.h>
@@ -100,6 +100,43 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
 	return NULL;
 }
 
+/**
+ * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue.
+ * @cmdq: Pointer to the command queue structure.
+ *
+ * Returns the number of entries that can fit in the command queue memory.
+ */
+static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
+{
+	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
+
+	return size / sizeof(struct vpu_job_queue_entry);
+}
+
+/**
+ * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode.
+ * @vdev: Pointer to the ivpu device structure.
+ * @flags: Input flags to determine the command queue flags.
+ *
+ * Returns the calculated command queue flags, considering both the input flags
+ * and the current test mode settings.
+ */
+static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags)
+{
+	u32 cmdq_flags = 0;
+
+	if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX))
+		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+	/* Test mode can override the TURBO flag coming from the application */
+	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE)
+		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE)
+		cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+
+	return cmdq_flags;
+}
+
 static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
 {
 	ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
@@ -107,8 +144,7 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
 	kfree(cmdq);
 }
 
-static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority,
-					  bool is_legacy)
+static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags)
 {
 	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_cmdq *cmdq = NULL;
@@ -121,10 +157,6 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
 		ivpu_err(vdev, "Failed to allocate command queue\n");
 		return NULL;
 	}
-
-	cmdq->priority = priority;
-	cmdq->is_legacy = is_legacy;
-
 	ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
 			      &file_priv->cmdq_id_next, GFP_KERNEL);
 	if (ret < 0) {
@@ -132,7 +164,15 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
 		goto err_free_cmdq;
 	}
 
-	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id);
+	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
+	cmdq->priority = priority;
+
+	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
+	cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
+	cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags);
+
+	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n",
+		 cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags);
 	return cmdq;
 
 err_free_cmdq:
@@ -188,27 +228,14 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
 	return ret;
 }
 
-static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
+static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
 {
-	jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
 	jobq->header.head = 0;
 	jobq->header.tail = 0;
 
-	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
-		ivpu_dbg(vdev, JOB, "Turbo mode enabled");
-		jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
-	}
-
 	wmb(); /* Flush WC buffer for jobq->header */
 }
 
-static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
-{
-	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
-
-	return size / sizeof(struct vpu_job_queue_entry);
-}
-
 static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
 {
 	struct ivpu_device *vdev = file_priv->vdev;
@@ -219,10 +246,7 @@ static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
 	if (cmdq->db_id)
 		return 0;
 
-	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
-	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
-
-	ivpu_cmdq_jobq_init(vdev, cmdq->jobq);
+	ivpu_cmdq_jobq_reset(vdev, cmdq->jobq);
 
 	if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
 		ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
@@ -291,9 +315,10 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_pr
 			break;
 
 	if (!cmdq) {
-		cmdq = ivpu_cmdq_create(file_priv, priority, true);
+		cmdq = ivpu_cmdq_create(file_priv, priority, 0);
 		if (!cmdq)
 			return NULL;
+		cmdq->is_legacy = true;
 	}
 
 	return cmdq;
@@ -891,7 +916,7 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *
 
 	mutex_lock(&file_priv->lock);
 
-	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
+	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags);
 	if (cmdq)
 		args->cmdq_id = cmdq->id;
 
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 2f24103f4533..160ee1411d4a 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -445,6 +445,9 @@ struct drm_ivpu_metric_streamer_get_data {
 	__u64 data_size;
 };
 
+/* Command queue flags */
+#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
+
 /**
  * struct drm_ivpu_cmdq_create - Create command queue for job submission
  */
@@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
 	 * %DRM_IVPU_JOB_PRIORITY_REALTIME
 	 */
 	__u32 priority;
+	/**
+	 * @flags:
+	 *
+	 * Supported flags:
+	 *
+	 * %DRM_IVPU_CMDQ_FLAG_TURBO
+	 *
+	 * Enable low-latency mode for the command queue. The NPU will maximize performance
+	 * when executing jobs from such queue at the cost of increased power usage.
+	 */
+	__u32 flags;
 };
 
 /**
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
  2025-06-05 16:20 [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl Maciej Falkowski
@ 2025-06-06 16:30 ` Jeff Hugo
  2025-06-12 13:31   ` Falkowski, Maciej
  2025-06-16 11:39 ` Jacek Lawrynowicz
  1 sibling, 1 reply; 6+ messages in thread
From: Jeff Hugo @ 2025-06-06 16:30 UTC (permalink / raw)
  To: Maciej Falkowski, dri-devel
  Cc: oded.gabbay, jacek.lawrynowicz, lizhi.hou, Andrzej Kacprowski

On 6/5/2025 10:20 AM, Maciej Falkowski wrote:
> From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
> 
> Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,

Introduce

> enabling turbo mode for jobs submitted via the command queue.
> Turbo mode allows jobs to run at higher frequencies,
> potentially improving performance for demanding workloads.
> 
> The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag

"This change" is redundant. Just start with "Also add the..."

> to allow test mode to explicitly disable turbo mode
> requested by the application.
> The IVPU_TEST_MODE_TURBO mode has been renamed to
> IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.
> 
> +/* Command queue flags */
> +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
> +
>   /**
>    * struct drm_ivpu_cmdq_create - Create command queue for job submission
>    */
> @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
>   	 * %DRM_IVPU_JOB_PRIORITY_REALTIME
>   	 */
>   	__u32 priority;
> +	/**
> +	 * @flags:
> +	 *
> +	 * Supported flags:
> +	 *
> +	 * %DRM_IVPU_CMDQ_FLAG_TURBO
> +	 *
> +	 * Enable low-latency mode for the command queue. The NPU will maximize performance
> +	 * when executing jobs from such queue at the cost of increased power usage.
> +	 */
> +	__u32 flags;

This is going to break the struct size on compat.  You probably need a 
__u32 reserved to maintain 64-bit alignment.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
  2025-06-06 16:30 ` Jeff Hugo
@ 2025-06-12 13:31   ` Falkowski, Maciej
  2025-06-12 13:42     ` Jeff Hugo
  0 siblings, 1 reply; 6+ messages in thread
From: Falkowski, Maciej @ 2025-06-12 13:31 UTC (permalink / raw)
  To: Jeff Hugo, dri-devel
  Cc: oded.gabbay, jacek.lawrynowicz, lizhi.hou, Andrzej Kacprowski

On 6/6/2025 6:30 PM, Jeff Hugo wrote:

> On 6/5/2025 10:20 AM, Maciej Falkowski wrote:
>> From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
>>
>> Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,
>
> Introduce
Ack, thanks.
>
>> enabling turbo mode for jobs submitted via the command queue.
>> Turbo mode allows jobs to run at higher frequencies,
>> potentially improving performance for demanding workloads.
>>
>> The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag
>
> "This change" is redundant. Just start with "Also add the..."
Ack, thanks.
>
>> to allow test mode to explicitly disable turbo mode
>> requested by the application.
>> The IVPU_TEST_MODE_TURBO mode has been renamed to
>> IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.
>>
>> +/* Command queue flags */
>> +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
>> +
>>   /**
>>    * struct drm_ivpu_cmdq_create - Create command queue for job 
>> submission
>>    */
>> @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
>>        * %DRM_IVPU_JOB_PRIORITY_REALTIME
>>        */
>>       __u32 priority;
>> +    /**
>> +     * @flags:
>> +     *
>> +     * Supported flags:
>> +     *
>> +     * %DRM_IVPU_CMDQ_FLAG_TURBO
>> +     *
>> +     * Enable low-latency mode for the command queue. The NPU will 
>> maximize performance
>> +     * when executing jobs from such queue at the cost of increased 
>> power usage.
>> +     */
>> +    __u32 flags;
>
> This is going to break the struct size on compat.  You probably need a 
> __u32 reserved to maintain 64-bit alignment. 

Thank you for suggestion,
I think compat is preserved here as u32 imposes 4 byte alignment on 64bit
so the alignment is going to be 12 bytes on both 32bit and 64bit, I 
tested this manually.
Please correct me if I am wrong.

Best regards,
Maciej

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
  2025-06-12 13:31   ` Falkowski, Maciej
@ 2025-06-12 13:42     ` Jeff Hugo
  2025-06-13  8:01       ` Jacek Lawrynowicz
  0 siblings, 1 reply; 6+ messages in thread
From: Jeff Hugo @ 2025-06-12 13:42 UTC (permalink / raw)
  To: Falkowski, Maciej, dri-devel
  Cc: oded.gabbay, jacek.lawrynowicz, lizhi.hou, Andrzej Kacprowski

On 6/12/2025 7:31 AM, Falkowski, Maciej wrote:
> On 6/6/2025 6:30 PM, Jeff Hugo wrote:
> 
>> On 6/5/2025 10:20 AM, Maciej Falkowski wrote:
>>> From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
>>>
>>> Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,
>>
>> Introduce
> Ack, thanks.
>>
>>> enabling turbo mode for jobs submitted via the command queue.
>>> Turbo mode allows jobs to run at higher frequencies,
>>> potentially improving performance for demanding workloads.
>>>
>>> The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag
>>
>> "This change" is redundant. Just start with "Also add the..."
> Ack, thanks.
>>
>>> to allow test mode to explicitly disable turbo mode
>>> requested by the application.
>>> The IVPU_TEST_MODE_TURBO mode has been renamed to
>>> IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.
>>>
>>> +/* Command queue flags */
>>> +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
>>> +
>>>   /**
>>>    * struct drm_ivpu_cmdq_create - Create command queue for job 
>>> submission
>>>    */
>>> @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
>>>        * %DRM_IVPU_JOB_PRIORITY_REALTIME
>>>        */
>>>       __u32 priority;
>>> +    /**
>>> +     * @flags:
>>> +     *
>>> +     * Supported flags:
>>> +     *
>>> +     * %DRM_IVPU_CMDQ_FLAG_TURBO
>>> +     *
>>> +     * Enable low-latency mode for the command queue. The NPU will 
>>> maximize performance
>>> +     * when executing jobs from such queue at the cost of increased 
>>> power usage.
>>> +     */
>>> +    __u32 flags;
>>
>> This is going to break the struct size on compat.  You probably need a 
>> __u32 reserved to maintain 64-bit alignment. 
> 
> Thank you for suggestion,
> I think compat is preserved here as u32 imposes 4 byte alignment on 64bit
> so the alignment is going to be 12 bytes on both 32bit and 64bit, I 
> tested this manually.
> Please correct me if I am wrong.

Looks like I'm wrong.  Majority of the structures have 64-bit values, 
and I didn't clearly see that this specific one is only 32-bit values.

My initial comment was based on 
https://docs.kernel.org/process/botching-up-ioctls.html - specifically:

Pad the entire struct to a multiple of 64-bits if the structure contains 
64-bit types - the structure size will otherwise differ on 32-bit versus 
64-bit. Having a different structure size hurts when passing arrays of 
structures to the kernel, or if the kernel checks the structure size, 
which e.g. the drm core does.

Ok. This was the only functional comment, and it is resolved. The other 
two are trivial fixups, so I think with those -

Reviewed-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
  2025-06-12 13:42     ` Jeff Hugo
@ 2025-06-13  8:01       ` Jacek Lawrynowicz
  0 siblings, 0 replies; 6+ messages in thread
From: Jacek Lawrynowicz @ 2025-06-13  8:01 UTC (permalink / raw)
  To: Jeff Hugo, Falkowski, Maciej, dri-devel
  Cc: oded.gabbay, lizhi.hou, Andrzej Kacprowski

Hi,

On 6/12/2025 3:42 PM, Jeff Hugo wrote:
> On 6/12/2025 7:31 AM, Falkowski, Maciej wrote:
>> On 6/6/2025 6:30 PM, Jeff Hugo wrote:
>>
>>> On 6/5/2025 10:20 AM, Maciej Falkowski wrote:
>>>> From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
>>>>
>>>> Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,
>>>
>>> Introduce
>> Ack, thanks.
>>>
>>>> enabling turbo mode for jobs submitted via the command queue.
>>>> Turbo mode allows jobs to run at higher frequencies,
>>>> potentially improving performance for demanding workloads.
>>>>
>>>> The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag
>>>
>>> "This change" is redundant. Just start with "Also add the..."
>> Ack, thanks.
>>>
>>>> to allow test mode to explicitly disable turbo mode
>>>> requested by the application.
>>>> The IVPU_TEST_MODE_TURBO mode has been renamed to
>>>> IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.
>>>>
>>>> +/* Command queue flags */
>>>> +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
>>>> +
>>>>   /**
>>>>    * struct drm_ivpu_cmdq_create - Create command queue for job submission
>>>>    */
>>>> @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
>>>>        * %DRM_IVPU_JOB_PRIORITY_REALTIME
>>>>        */
>>>>       __u32 priority;
>>>> +    /**
>>>> +     * @flags:
>>>> +     *
>>>> +     * Supported flags:
>>>> +     *
>>>> +     * %DRM_IVPU_CMDQ_FLAG_TURBO
>>>> +     *
>>>> +     * Enable low-latency mode for the command queue. The NPU will maximize performance
>>>> +     * when executing jobs from such queue at the cost of increased power usage.
>>>> +     */
>>>> +    __u32 flags;
>>>
>>> This is going to break the struct size on compat.  You probably need a __u32 reserved to maintain 64-bit alignment. 
>>
>> Thank you for suggestion,
>> I think compat is preserved here as u32 imposes 4 byte alignment on 64bit
>> so the alignment is going to be 12 bytes on both 32bit and 64bit, I tested this manually.
>> Please correct me if I am wrong.
> 
> Looks like I'm wrong.  Majority of the structures have 64-bit values, and I didn't clearly see that this specific one is only 32-bit values.
> 
> My initial comment was based on https://docs.kernel.org/process/botching-up-ioctls.html - specifically:
> 
> Pad the entire struct to a multiple of 64-bits if the structure contains 64-bit types - the structure size will otherwise differ on 32-bit versus 64-bit. Having a different structure size hurts when passing arrays of structures to the kernel, or if the kernel checks the structure size, which e.g. the drm core does.
> 
> Ok. This was the only functional comment, and it is resolved. The other two are trivial fixups, so I think with those -
> 
> Reviewed-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>

Thanks for pointing this out anyway. It turns out we have alignment problems with couple other structures in UAPI.
We will send a separate fix for these.

Regards,
Jacek

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl
  2025-06-05 16:20 [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl Maciej Falkowski
  2025-06-06 16:30 ` Jeff Hugo
@ 2025-06-16 11:39 ` Jacek Lawrynowicz
  1 sibling, 0 replies; 6+ messages in thread
From: Jacek Lawrynowicz @ 2025-06-16 11:39 UTC (permalink / raw)
  To: Maciej Falkowski, dri-devel
  Cc: oded.gabbay, jeff.hugo, lizhi.hou, Andrzej Kacprowski

Applied to drm-misc-next

On 6/5/2025 6:20 PM, Maciej Falkowski wrote:
> From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
> 
> Introduces a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl,
> enabling turbo mode for jobs submitted via the command queue.
> Turbo mode allows jobs to run at higher frequencies,
> potentially improving performance for demanding workloads.
> 
> The change also adds the IVPU_TEST_MODE_TURBO_DISABLE flag
> to allow test mode to explicitly disable turbo mode
> requested by the application.
> The IVPU_TEST_MODE_TURBO mode has been renamed to
> IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency.
> 
> Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
> ---
>  drivers/accel/ivpu/ivpu_drv.h | 11 ++---
>  drivers/accel/ivpu/ivpu_job.c | 81 +++++++++++++++++++++++------------
>  include/uapi/drm/ivpu_accel.h | 14 ++++++
>  3 files changed, 73 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
> index 17aa3532c76d..62ab1c654e63 100644
> --- a/drivers/accel/ivpu/ivpu_drv.h
> +++ b/drivers/accel/ivpu/ivpu_drv.h
> @@ -1,6 +1,6 @@
>  /* SPDX-License-Identifier: GPL-2.0-only */
>  /*
> - * Copyright (C) 2020-2024 Intel Corporation
> + * Copyright (C) 2020-2025 Intel Corporation
>   */
>  
>  #ifndef __IVPU_DRV_H__
> @@ -209,10 +209,11 @@ extern bool ivpu_force_snoop;
>  #define IVPU_TEST_MODE_D0I3_MSG_ENABLE    BIT(5)
>  #define IVPU_TEST_MODE_MIP_DISABLE        BIT(6)
>  #define IVPU_TEST_MODE_DISABLE_TIMEOUTS   BIT(8)
> -#define IVPU_TEST_MODE_TURBO		  BIT(9)
> -#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(10)
> -#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(11)
> -#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(12)
> +#define IVPU_TEST_MODE_TURBO_ENABLE       BIT(9)
> +#define IVPU_TEST_MODE_TURBO_DISABLE      BIT(10)
> +#define IVPU_TEST_MODE_CLK_RELINQ_DISABLE BIT(11)
> +#define IVPU_TEST_MODE_CLK_RELINQ_ENABLE  BIT(12)
> +#define IVPU_TEST_MODE_D0I2_DISABLE       BIT(13)
>  extern int ivpu_test_mode;
>  
>  struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
> diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
> index fae8351aa330..060f1fc031d3 100644
> --- a/drivers/accel/ivpu/ivpu_job.c
> +++ b/drivers/accel/ivpu/ivpu_job.c
> @@ -1,6 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  /*
> - * Copyright (C) 2020-2024 Intel Corporation
> + * Copyright (C) 2020-2025 Intel Corporation
>   */
>  
>  #include <drm/drm_file.h>
> @@ -100,6 +100,43 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
>  	return NULL;
>  }
>  
> +/**
> + * ivpu_cmdq_get_entry_count - Calculate the number of entries in the command queue.
> + * @cmdq: Pointer to the command queue structure.
> + *
> + * Returns the number of entries that can fit in the command queue memory.
> + */
> +static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
> +{
> +	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
> +
> +	return size / sizeof(struct vpu_job_queue_entry);
> +}
> +
> +/**
> + * ivpu_cmdq_get_flags - Get command queue flags based on input flags and test mode.
> + * @vdev: Pointer to the ivpu device structure.
> + * @flags: Input flags to determine the command queue flags.
> + *
> + * Returns the calculated command queue flags, considering both the input flags
> + * and the current test mode settings.
> + */
> +static u32 ivpu_cmdq_get_flags(struct ivpu_device *vdev, u32 flags)
> +{
> +	u32 cmdq_flags = 0;
> +
> +	if ((flags & DRM_IVPU_CMDQ_FLAG_TURBO) && (ivpu_hw_ip_gen(vdev) >= IVPU_HW_IP_40XX))
> +		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
> +
> +	/* Test mode can override the TURBO flag coming from the application */
> +	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_ENABLE)
> +		cmdq_flags |= VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
> +	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO_DISABLE)
> +		cmdq_flags &= ~VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
> +
> +	return cmdq_flags;
> +}
> +
>  static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
>  {
>  	ivpu_preemption_buffers_free(file_priv->vdev, file_priv, cmdq);
> @@ -107,8 +144,7 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
>  	kfree(cmdq);
>  }
>  
> -static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority,
> -					  bool is_legacy)
> +static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 priority, u32 flags)
>  {
>  	struct ivpu_device *vdev = file_priv->vdev;
>  	struct ivpu_cmdq *cmdq = NULL;
> @@ -121,10 +157,6 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
>  		ivpu_err(vdev, "Failed to allocate command queue\n");
>  		return NULL;
>  	}
> -
> -	cmdq->priority = priority;
> -	cmdq->is_legacy = is_legacy;
> -
>  	ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
>  			      &file_priv->cmdq_id_next, GFP_KERNEL);
>  	if (ret < 0) {
> @@ -132,7 +164,15 @@ static struct ivpu_cmdq *ivpu_cmdq_create(struct ivpu_file_priv *file_priv, u8 p
>  		goto err_free_cmdq;
>  	}
>  
> -	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d\n", cmdq->id, file_priv->ctx.id);
> +	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
> +	cmdq->priority = priority;
> +
> +	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
> +	cmdq->jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
> +	cmdq->jobq->header.flags = ivpu_cmdq_get_flags(vdev, flags);
> +
> +	ivpu_dbg(vdev, JOB, "Command queue %d created, ctx %d, flags 0x%08x\n",
> +		 cmdq->id, file_priv->ctx.id, cmdq->jobq->header.flags);
>  	return cmdq;
>  
>  err_free_cmdq:
> @@ -188,27 +228,14 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
>  	return ret;
>  }
>  
> -static void ivpu_cmdq_jobq_init(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
> +static void ivpu_cmdq_jobq_reset(struct ivpu_device *vdev, struct vpu_job_queue *jobq)
>  {
> -	jobq->header.engine_idx = VPU_ENGINE_COMPUTE;
>  	jobq->header.head = 0;
>  	jobq->header.tail = 0;
>  
> -	if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
> -		ivpu_dbg(vdev, JOB, "Turbo mode enabled");
> -		jobq->header.flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
> -	}
> -
>  	wmb(); /* Flush WC buffer for jobq->header */
>  }
>  
> -static inline u32 ivpu_cmdq_get_entry_count(struct ivpu_cmdq *cmdq)
> -{
> -	size_t size = ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header);
> -
> -	return size / sizeof(struct vpu_job_queue_entry);
> -}
> -
>  static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
>  {
>  	struct ivpu_device *vdev = file_priv->vdev;
> @@ -219,10 +246,7 @@ static int ivpu_cmdq_register(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
>  	if (cmdq->db_id)
>  		return 0;
>  
> -	cmdq->entry_count = ivpu_cmdq_get_entry_count(cmdq);
> -	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
> -
> -	ivpu_cmdq_jobq_init(vdev, cmdq->jobq);
> +	ivpu_cmdq_jobq_reset(vdev, cmdq->jobq);
>  
>  	if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
>  		ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, cmdq->priority);
> @@ -291,9 +315,10 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire_legacy(struct ivpu_file_priv *file_pr
>  			break;
>  
>  	if (!cmdq) {
> -		cmdq = ivpu_cmdq_create(file_priv, priority, true);
> +		cmdq = ivpu_cmdq_create(file_priv, priority, 0);
>  		if (!cmdq)
>  			return NULL;
> +		cmdq->is_legacy = true;
>  	}
>  
>  	return cmdq;
> @@ -891,7 +916,7 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *
>  
>  	mutex_lock(&file_priv->lock);
>  
> -	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false);
> +	cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), args->flags);
>  	if (cmdq)
>  		args->cmdq_id = cmdq->id;
>  
> diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
> index 2f24103f4533..160ee1411d4a 100644
> --- a/include/uapi/drm/ivpu_accel.h
> +++ b/include/uapi/drm/ivpu_accel.h
> @@ -445,6 +445,9 @@ struct drm_ivpu_metric_streamer_get_data {
>  	__u64 data_size;
>  };
>  
> +/* Command queue flags */
> +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
> +
>  /**
>   * struct drm_ivpu_cmdq_create - Create command queue for job submission
>   */
> @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create {
>  	 * %DRM_IVPU_JOB_PRIORITY_REALTIME
>  	 */
>  	__u32 priority;
> +	/**
> +	 * @flags:
> +	 *
> +	 * Supported flags:
> +	 *
> +	 * %DRM_IVPU_CMDQ_FLAG_TURBO
> +	 *
> +	 * Enable low-latency mode for the command queue. The NPU will maximize performance
> +	 * when executing jobs from such queue at the cost of increased power usage.
> +	 */
> +	__u32 flags;
>  };
>  
>  /**


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-06-16 11:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-05 16:20 [PATCH] accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl Maciej Falkowski
2025-06-06 16:30 ` Jeff Hugo
2025-06-12 13:31   ` Falkowski, Maciej
2025-06-12 13:42     ` Jeff Hugo
2025-06-13  8:01       ` Jacek Lawrynowicz
2025-06-16 11:39 ` Jacek Lawrynowicz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).